prefer using __builtin_thread_pointer over assembly primitives. Fixes #851 and #852 as well.

This commit is contained in:
Daan Leijen 2024-03-02 11:50:57 -08:00
parent 8f353d8005
commit 3966953b7f
2 changed files with 47 additions and 28 deletions

View File

@ -128,7 +128,7 @@ endif()
if(MI_SECURE)
message(STATUS "Set full secure build (MI_SECURE=ON)")
list(APPEND mi_defines MI_SECURE=4)
list(APPEND mi_defines MI_SECURE=4)
endif()
if(MI_TRACK_VALGRIND)
@ -468,7 +468,7 @@ if (MI_BUILD_OBJECT)
set(mimalloc-obj-static "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/mimalloc-obj.dir/src/static.c${CMAKE_C_OUTPUT_EXTENSION}")
set(mimalloc-obj-out "${CMAKE_CURRENT_BINARY_DIR}/${mi_basename}${CMAKE_C_OUTPUT_EXTENSION}")
add_custom_command(OUTPUT ${mimalloc-obj-out} DEPENDS mimalloc-obj COMMAND "${CMAKE_COMMAND}" -E copy "${mimalloc-obj-static}" "${mimalloc-obj-out}")
add_custom_target(mimalloc-obj-target ALL DEPENDS ${mimalloc-obj-out})
add_custom_target(mimalloc-obj-target ALL DEPENDS ${mimalloc-obj-out})
endif()
# the following seems to lead to cmake warnings/errors on some systems, disable for now :-(

View File

@ -35,10 +35,10 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config );
// Free OS memory
int _mi_prim_free(void* addr, size_t size );
// Allocate OS memory. Return NULL on error.
// The `try_alignment` is just a hint and the returned pointer does not have to be aligned.
// If `commit` is false, the virtual memory range only needs to be reserved (with no access)
// If `commit` is false, the virtual memory range only needs to be reserved (with no access)
// which will later be committed explicitly using `_mi_prim_commit`.
// `is_zero` is set to true if the memory was zero initialized (as on most OS's)
// pre: !commit => !allow_large
@ -82,11 +82,11 @@ mi_msecs_t _mi_prim_clock_now(void);
typedef struct mi_process_info_s {
mi_msecs_t elapsed;
mi_msecs_t utime;
mi_msecs_t stime;
size_t current_rss;
size_t peak_rss;
mi_msecs_t stime;
size_t current_rss;
size_t peak_rss;
size_t current_commit;
size_t peak_commit;
size_t peak_commit;
size_t page_faults;
} mi_process_info_t;
@ -117,7 +117,7 @@ void _mi_prim_thread_associate_default_heap(mi_heap_t* heap);
//-------------------------------------------------------------------
// Thread id: `_mi_prim_thread_id()`
//
//
// Getting the thread id should be performant as it is called in the
// fast path of `_mi_free` and we specialize for various platforms as
// inlined definitions. Regular code should call `init.c:_mi_thread_id()`.
@ -125,26 +125,14 @@ void _mi_prim_thread_associate_default_heap(mi_heap_t* heap);
// for each thread (unequal to zero).
//-------------------------------------------------------------------
// defined in `init.c`; do not use these directly
extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from
extern bool _mi_process_is_initialized; // has mi_process_init been called?
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept;
#if defined(_WIN32)
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
// Windows: works on Intel and ARM in both 32- and 64-bit
return (uintptr_t)NtCurrentTeb();
}
// We use assembly for a fast thread id on the main platforms. The TLS layout depends on
// both the OS and libc implementation so we use specific tests for each main platform.
// On some libc + platform combinations we can directly access a thread-local storage (TLS) slot.
// The TLS layout depends on both the OS and libc implementation so we use specific tests for each main platform.
// If you test on another platform and it works please send a PR :-)
// see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register.
#elif defined(__GNUC__) && ( \
//
// Note: on most platforms this is not actually used anymore as we prefer `__builtin_thread_pointer()` nowadays.
// However, we do still use it with older clang compilers and Apple OS (as we use TLS slot for the default heap there).
#if defined(__GNUC__) && ( \
(defined(__GLIBC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \
|| (defined(__APPLE__) && (defined(__x86_64__) || defined(__aarch64__))) \
|| (defined(__BIONIC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \
@ -152,6 +140,8 @@ static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
|| (defined(__OpenBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \
)
#define MI_HAS_TLS_SLOT
static inline void* mi_prim_tls_slot(size_t slot) mi_attr_noexcept {
void* res;
const size_t ofs = (slot*sizeof(void*));
@ -205,6 +195,33 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce
#endif
}
#endif
// defined in `init.c`; do not use these directly
extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from
extern bool _mi_process_is_initialized; // has mi_process_init been called?
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept;
#if defined(_WIN32)
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
// Windows: works on Intel and ARM in both 32- and 64-bit
return (uintptr_t)NtCurrentTeb();
}
#elif defined(__has_builtin) && __has_builtin(__builtin_thread_pointer) && \
(!defined(__clang_major__) || __clang_major__ >= 14) // older clang versions emit bad code; fall back to using the TLS slot (<https://lore.kernel.org/linux-arm-kernel/202110280952.352F66D8@keescook/T/>)
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
// Works on most Unix based platforms
return (uintptr_t)__builtin_thread_pointer();
}
#elif defined(MI_HAS_TLS_SLOT)
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
#if defined(__BIONIC__)
// issue #384, #495: on the Bionic libc (Android), slot 1 is the thread id
@ -251,7 +268,6 @@ static inline mi_heap_t* mi_prim_get_default_heap(void);
#if defined(MI_MALLOC_OVERRIDE)
#if defined(__APPLE__) // macOS
#define MI_TLS_SLOT 89 // seems unused?
// #define MI_TLS_RECURSE_GUARD 1
// other possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89)
// see <https://github.com/rweichler/substrate/blob/master/include/pthread_machdep.h>
#elif defined(__OpenBSD__)
@ -269,6 +285,9 @@ static inline mi_heap_t* mi_prim_get_default_heap(void);
#if defined(MI_TLS_SLOT)
# if !defined(MI_HAS_TLS_SLOT)
# error "trying to use a TLS slot for the default heap, but the mi_prim_tls_slot primitives are not defined
# endif
static inline mi_heap_t* mi_prim_get_default_heap(void) {
mi_heap_t* heap = (mi_heap_t*)mi_prim_tls_slot(MI_TLS_SLOT);