use builtin_thread_pointer only on non-apple arm64 (issue #883 and #851)

This commit is contained in:
Daan 2024-04-26 23:34:10 -07:00
parent 146f9d2333
commit cc3c14f2ed

View File

@ -203,16 +203,14 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce
#endif
// Do we have __builtin_thread_pointer? (do not make this a compound test as that fails on older gcc's, see issue #851)
#ifdef __has_builtin
#if __has_builtin(__builtin_thread_pointer)
#if (!defined(__APPLE__)) && /* on apple (M1) the wrong register is read (tpidr_el0 instead of tpidrro_el0) so fall back to TLS slot assembly (<https://github.com/microsoft/mimalloc/issues/343#issuecomment-763272369>)*/ \
(!defined(__clang_major__) || __clang_major__ >= 14) // older clang versions emit bad code; fall back to using the TLS slot (<https://lore.kernel.org/linux-arm-kernel/202110280952.352F66D8@keescook/T/>)
#define MI_HAS_BUILTIN_THREAD_POINTER 1
#endif
#endif
#elif defined(__GNUC__) && (__GNUC__ >= 7) && defined(__aarch64__) // special case aarch64 for older gcc versions (issue #851)
#define MI_HAS_BUILTIN_THREAD_POINTER 1
// Do we have __builtin_thread_pointer? This would be the preferred way to get a unique thread id
// but unfortunately, it seems we cannot test for this reliably at this time (see issue #883)
// Nevertheless, it seems needed on older graviton platforms (see issue #851).
// For now, we only enable this for specific platforms.
#if defined(__GNUC__) && (__GNUC__ >= 7) && defined(__aarch64__) /* special case aarch64 for older gcc versions (issue #851) */ \
&& !defined(__APPLE__) /* on apple (M1) the wrong register is read (tpidr_el0 instead of tpidrro_el0) so fall back to TLS slot assembly (<https://github.com/microsoft/mimalloc/issues/343#issuecomment-763272369>)*/ \
&& (!defined(__clang_major__) || __clang_major__ >= 14) /* older clang versions emit bad code; fall back to using the TLS slot (<https://lore.kernel.org/linux-arm-kernel/202110280952.352F66D8@keescook/T/>) */
#define MI_USE_BUILTIN_THREAD_POINTER 1
#endif
@ -234,6 +232,13 @@ static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
return (uintptr_t)NtCurrentTeb();
}
#elif MI_USE_BUILTIN_THREAD_POINTER
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
// Works on most Unix based platforms with recent compilers
return (uintptr_t)__builtin_thread_pointer();
}
#elif defined(MI_HAS_TLS_SLOT)
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
@ -249,13 +254,6 @@ static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
#endif
}
#elif MI_HAS_BUILTIN_THREAD_POINTER
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
// Works on most Unix based platforms
return (uintptr_t)__builtin_thread_pointer();
}
#else
// otherwise use portable C, taking the address of a thread local variable (this is still very fast on most platforms).