prefer using __builtin_thread_pointer over assembly primitives. Fixes #851 and #852 as well.

This commit is contained in:
Daan Leijen 2024-03-02 11:50:57 -08:00
parent 8f353d8005
commit 3966953b7f
2 changed files with 47 additions and 28 deletions

View File

@ -125,26 +125,14 @@ void _mi_prim_thread_associate_default_heap(mi_heap_t* heap);
// for each thread (unequal to zero).
//-------------------------------------------------------------------
// defined in `init.c`; do not use these directly
extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from
extern bool _mi_process_is_initialized; // has mi_process_init been called?
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept;
#if defined(_WIN32)
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
// Windows: works on Intel and ARM in both 32- and 64-bit
return (uintptr_t)NtCurrentTeb();
}
// We use assembly for a fast thread id on the main platforms. The TLS layout depends on
// both the OS and libc implementation so we use specific tests for each main platform.
// On some libc + platform combinations we can directly access a thread-local storage (TLS) slot.
// The TLS layout depends on both the OS and libc implementation so we use specific tests for each main platform.
// If you test on another platform and it works please send a PR :-)
// see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register.
#elif defined(__GNUC__) && ( \
//
// Note: on most platforms this is not actually used anymore as we prefer `__builtin_thread_pointer()` nowadays.
// However, we do still use it with older clang compilers and Apple OS (as we use TLS slot for the default heap there).
#if defined(__GNUC__) && ( \
(defined(__GLIBC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \
|| (defined(__APPLE__) && (defined(__x86_64__) || defined(__aarch64__))) \
|| (defined(__BIONIC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \
@ -152,6 +140,8 @@ static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
|| (defined(__OpenBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \
)
#define MI_HAS_TLS_SLOT
static inline void* mi_prim_tls_slot(size_t slot) mi_attr_noexcept {
void* res;
const size_t ofs = (slot*sizeof(void*));
@ -205,6 +195,33 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce
#endif
}
#endif
// defined in `init.c`; do not use these directly
extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from
extern bool _mi_process_is_initialized; // has mi_process_init been called?
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept;
#if defined(_WIN32)
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
// Windows: works on Intel and ARM in both 32- and 64-bit
return (uintptr_t)NtCurrentTeb();
}
#elif defined(__has_builtin) && __has_builtin(__builtin_thread_pointer) && \
(!defined(__clang_major__) || __clang_major__ >= 14) // older clang versions emit bad code; fall back to using the TLS slot (<https://lore.kernel.org/linux-arm-kernel/202110280952.352F66D8@keescook/T/>)
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
// Works on most Unix based platforms
return (uintptr_t)__builtin_thread_pointer();
}
#elif defined(MI_HAS_TLS_SLOT)
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
#if defined(__BIONIC__)
// issue #384, #495: on the Bionic libc (Android), slot 1 is the thread id
@ -251,7 +268,6 @@ static inline mi_heap_t* mi_prim_get_default_heap(void);
#if defined(MI_MALLOC_OVERRIDE)
#if defined(__APPLE__) // macOS
#define MI_TLS_SLOT 89 // seems unused?
// #define MI_TLS_RECURSE_GUARD 1
// other possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89)
// see <https://github.com/rweichler/substrate/blob/master/include/pthread_machdep.h>
#elif defined(__OpenBSD__)
@ -269,6 +285,9 @@ static inline mi_heap_t* mi_prim_get_default_heap(void);
#if defined(MI_TLS_SLOT)
# if !defined(MI_HAS_TLS_SLOT)
# error "trying to use a TLS slot for the default heap, but the mi_prim_tls_slot primitives are not defined
# endif
static inline mi_heap_t* mi_prim_get_default_heap(void) {
mi_heap_t* heap = (mi_heap_t*)mi_prim_tls_slot(MI_TLS_SLOT);