merge from dev-atomic with new atomic interface
This commit is contained in:
commit
03071dec0f
@ -125,6 +125,7 @@ endif()
|
||||
if(MI_DEBUG_TSAN MATCHES "ON")
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||
message(STATUS "Build with thread sanitizer (MI_DEBUG_TSAN=ON)")
|
||||
list(APPEND mi_defines MI_TSAN=1)
|
||||
list(APPEND mi_cflags -fsanitize=thread -g -O1)
|
||||
list(APPEND CMAKE_EXE_LINKER_FLAGS -fsanitize=thread)
|
||||
else()
|
||||
@ -229,6 +230,11 @@ endif()
|
||||
message(STATUS "")
|
||||
message(STATUS "Library base name: ${mi_basename}")
|
||||
message(STATUS "Build type : ${CMAKE_BUILD_TYPE_LC}")
|
||||
if(MI_USE_CXX MATCHES "ON")
|
||||
message(STATUS "Compiler : ${CMAKE_CXX_COMPILER}")
|
||||
else()
|
||||
message(STATUS "Compiler : ${CMAKE_C_COMPILER}")
|
||||
endif()
|
||||
message(STATUS "Install directory: ${mi_install_dir}")
|
||||
message(STATUS "Build targets : ${mi_build_targets}")
|
||||
message(STATUS "")
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* ----------------------------------------------------------------------------
|
||||
Copyright (c) 2018, Microsoft Research, Daan Leijen
|
||||
Copyright (c) 2018,2020 Microsoft Research, Daan Leijen
|
||||
This is free software; you can redistribute it and/or modify it under the
|
||||
terms of the MIT license. A copy of the license can be found in the file
|
||||
"LICENSE" at the root of this distribution.
|
||||
@ -8,127 +8,99 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
#ifndef MIMALLOC_ATOMIC_H
|
||||
#define MIMALLOC_ATOMIC_H
|
||||
|
||||
// ------------------------------------------------------
|
||||
// --------------------------------------------------------------------------------------------
|
||||
// Atomics
|
||||
// We need to be portable between C, C++, and MSVC.
|
||||
// ------------------------------------------------------
|
||||
// We base the primitives on the C/C++ atomics and create a mimimal wrapper for MSVC in C compilation mode.
|
||||
// This is why we try to use only `uintptr_t` and `<type>*` as atomic types.
|
||||
// To gain better insight in the range of used atomics, we use explicitly named memory order operations
|
||||
// instead of passing the memory order as a parameter.
|
||||
// -----------------------------------------------------------------------------------------------
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define _Atomic(tp) tp
|
||||
#define ATOMIC_VAR_INIT(x) x
|
||||
#elif defined(__cplusplus)
|
||||
#if defined(__cplusplus)
|
||||
// Use C++ atomics
|
||||
#include <atomic>
|
||||
#define _Atomic(tp) std::atomic<tp>
|
||||
#define _Atomic(tp) std::atomic<tp>
|
||||
#define mi_atomic(name) std::atomic_##name
|
||||
#define mi_memory_order(name) std::memory_order_##name
|
||||
#elif defined(_MSC_VER)
|
||||
// Use MSVC C wrapper for C11 atomics
|
||||
#define _Atomic(tp) tp
|
||||
#define ATOMIC_VAR_INIT(x) x
|
||||
#define mi_atomic(name) mi_atomic_##name
|
||||
#define mi_memory_order(name) mi_memory_order_##name
|
||||
#else
|
||||
// Use C11 atomics
|
||||
#include <stdatomic.h>
|
||||
#define mi_atomic(name) atomic_##name
|
||||
#define mi_memory_order(name) memory_order_##name
|
||||
#endif
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Atomic operations specialized for mimalloc
|
||||
// ------------------------------------------------------
|
||||
// Various defines for all used memory orders in mimalloc
|
||||
#define mi_atomic_cas_weak(p,expected,desired,mem_success,mem_fail) \
|
||||
mi_atomic(compare_exchange_weak_explicit)(p,expected,desired,mem_success,mem_fail)
|
||||
|
||||
// Atomically add a value; returns the previous value. Memory ordering is relaxed.
|
||||
static inline uintptr_t mi_atomic_add(volatile _Atomic(uintptr_t)* p, uintptr_t add);
|
||||
#define mi_atomic_cas_strong(p,expected,desired,mem_success,mem_fail) \
|
||||
mi_atomic(compare_exchange_strong_explicit)(p,expected,desired,mem_success,mem_fail)
|
||||
|
||||
// Atomically "and" a value; returns the previous value. Memory ordering is relaxed.
|
||||
static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x);
|
||||
#define mi_atomic_load_acquire(p) mi_atomic(load_explicit)(p,mi_memory_order(acquire))
|
||||
#define mi_atomic_load_relaxed(p) mi_atomic(load_explicit)(p,mi_memory_order(relaxed))
|
||||
#define mi_atomic_store_release(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(release))
|
||||
#define mi_atomic_store_relaxed(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed))
|
||||
#define mi_atomic_exchange_release(p,x) mi_atomic(exchange_explicit)(p,x,mi_memory_order(release))
|
||||
#define mi_atomic_exchange_acq_rel(p,x) mi_atomic(exchange_explicit)(p,x,mi_memory_order(acq_rel))
|
||||
#define mi_atomic_cas_weak_release(p,exp,des) mi_atomic_cas_weak(p,exp,des,mi_memory_order(release),mi_memory_order(relaxed))
|
||||
#define mi_atomic_cas_weak_acq_rel(p,exp,des) mi_atomic_cas_weak(p,exp,des,mi_memory_order(acq_rel),mi_memory_order(acquire))
|
||||
#define mi_atomic_cas_strong_release(p,exp,des) mi_atomic_cas_strong(p,exp,des,mi_memory_order(release),mi_memory_order(relaxed))
|
||||
#define mi_atomic_cas_strong_acq_rel(p,exp,des) mi_atomic_cas_strong(p,exp,des,mi_memory_order(acq_rel),mi_memory_order(acquire))
|
||||
|
||||
// Atomically "or" a value; returns the previous value. Memory ordering is relaxed.
|
||||
static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x);
|
||||
#define mi_atomic_add_relaxed(p,x) mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(relaxed))
|
||||
#define mi_atomic_sub_relaxed(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(relaxed))
|
||||
#define mi_atomic_add_acq_rel(p,x) mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(acq_rel))
|
||||
#define mi_atomic_sub_acq_rel(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(acq_rel))
|
||||
#define mi_atomic_and_acq_rel(p,x) mi_atomic(fetch_and_explicit)(p,x,mi_memory_order(acq_rel))
|
||||
#define mi_atomic_or_acq_rel(p,x) mi_atomic(fetch_or_explicit)(p,x,mi_memory_order(acq_rel))
|
||||
|
||||
// Atomically compare and exchange a value; returns `true` if successful.
|
||||
// May fail spuriously. Memory ordering as release on success, and relaxed on failure.
|
||||
// (Note: expected and desired are in opposite order from atomic_compare_exchange)
|
||||
static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected);
|
||||
#define mi_atomic_increment_relaxed(p) mi_atomic_add_relaxed(p,1)
|
||||
#define mi_atomic_decrement_relaxed(p) mi_atomic_sub_relaxed(p,1)
|
||||
#define mi_atomic_increment_acq_rel(p) mi_atomic_add_acq_rel(p,1)
|
||||
#define mi_atomic_decrement_acq_rel(p) mi_atomic_sub_acq_rel(p,1)
|
||||
|
||||
// Atomically compare and exchange a value; returns `true` if successful.
|
||||
// Memory ordering is acquire-release
|
||||
// (Note: expected and desired are in opposite order from atomic_compare_exchange)
|
||||
static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected);
|
||||
|
||||
// Atomically exchange a value. Memory ordering is acquire-release.
|
||||
static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange);
|
||||
|
||||
// Atomically read a value. Memory ordering is relaxed.
|
||||
static inline uintptr_t mi_atomic_read_relaxed(const volatile _Atomic(uintptr_t)* p);
|
||||
|
||||
// Atomically read a value. Memory ordering is acquire.
|
||||
static inline uintptr_t mi_atomic_read(const volatile _Atomic(uintptr_t)* p);
|
||||
|
||||
// Atomically write a value. Memory ordering is release.
|
||||
static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x);
|
||||
|
||||
// Yield
|
||||
static inline void mi_atomic_yield(void);
|
||||
static inline intptr_t mi_atomic_addi(_Atomic(intptr_t)* p, intptr_t add);
|
||||
static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)* p, intptr_t sub);
|
||||
|
||||
// Atomically add a 64-bit value; returns the previous value.
|
||||
// Note: not using _Atomic(int64_t) as it is only used for statistics.
|
||||
static inline void mi_atomic_addi64(volatile int64_t* p, int64_t add);
|
||||
|
||||
// Atomically update `*p` with the maximum of `*p` and `x` as a 64-bit value.
|
||||
// Returns the previous value. Note: not using _Atomic(int64_t) as it is only used for statistics.
|
||||
static inline void mi_atomic_maxi64(volatile int64_t* p, int64_t x);
|
||||
#if defined(__cplusplus) || !defined(_MSC_VER)
|
||||
|
||||
// Atomically read a 64-bit value
|
||||
// Note: not using _Atomic(int64_t) as it is only used for statistics.
|
||||
static inline int64_t mi_atomic_readi64(volatile int64_t* p);
|
||||
// In C++/C11 atomics we have polymorpic atomics so can use the typed `ptr` variants
|
||||
// (where `tp` is the type of atomic value)
|
||||
// We use these macros so we can provide a typed wrapper in MSVC in C compilation mode as well
|
||||
#define mi_atomic_load_ptr_acquire(tp,p) mi_atomic_load_acquire(p)
|
||||
#define mi_atomic_load_ptr_relaxed(tp,p) mi_atomic_load_relaxed(p)
|
||||
#define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release(p,x)
|
||||
#define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed(p,x)
|
||||
#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,des)
|
||||
#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,des)
|
||||
#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,des)
|
||||
#define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,x)
|
||||
#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,x)
|
||||
|
||||
// Atomically subtract a value; returns the previous value.
|
||||
static inline uintptr_t mi_atomic_sub(volatile _Atomic(uintptr_t)* p, uintptr_t sub) {
|
||||
return mi_atomic_add(p, (uintptr_t)(-((intptr_t)sub)));
|
||||
// These are used by the statistics
|
||||
static inline int64_t mi_atomic_addi64_relaxed(volatile int64_t* p, int64_t add) {
|
||||
return mi_atomic(fetch_add_explicit)((_Atomic(int64_t)*)p, add, mi_memory_order(relaxed));
|
||||
}
|
||||
static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x) {
|
||||
int64_t current = mi_atomic_load_relaxed((_Atomic(int64_t)*)p);
|
||||
while (current < x && !mi_atomic_cas_weak_release((_Atomic(int64_t)*)p, ¤t, x)) { /* nothing */ };
|
||||
}
|
||||
|
||||
// Atomically increment a value; returns the incremented result.
|
||||
static inline uintptr_t mi_atomic_increment(volatile _Atomic(uintptr_t)* p) {
|
||||
return mi_atomic_add(p, 1);
|
||||
}
|
||||
|
||||
// Atomically decrement a value; returns the decremented result.
|
||||
static inline uintptr_t mi_atomic_decrement(volatile _Atomic(uintptr_t)* p) {
|
||||
return mi_atomic_sub(p, 1);
|
||||
}
|
||||
#elif defined(_MSC_VER)
|
||||
|
||||
// Atomically add a signed value; returns the previous value.
|
||||
static inline intptr_t mi_atomic_addi(volatile _Atomic(intptr_t)* p, intptr_t add) {
|
||||
return (intptr_t)mi_atomic_add((volatile _Atomic(uintptr_t)*)p, (uintptr_t)add);
|
||||
}
|
||||
|
||||
// Atomically subtract a signed value; returns the previous value.
|
||||
static inline intptr_t mi_atomic_subi(volatile _Atomic(intptr_t)* p, intptr_t sub) {
|
||||
return (intptr_t)mi_atomic_addi(p,-sub);
|
||||
}
|
||||
|
||||
// Atomically read a pointer; Memory order is relaxed (i.e. no fence, only atomic).
|
||||
#define mi_atomic_read_ptr_relaxed(T,p) \
|
||||
(T*)(mi_atomic_read_relaxed((const volatile _Atomic(uintptr_t)*)(p)))
|
||||
|
||||
// Atomically read a pointer; Memory order is acquire.
|
||||
#define mi_atomic_read_ptr(T,p) \
|
||||
(T*)(mi_atomic_read((const volatile _Atomic(uintptr_t)*)(p)))
|
||||
|
||||
// Atomically write a pointer; Memory order is acquire.
|
||||
#define mi_atomic_write_ptr(T,p,x) \
|
||||
mi_atomic_write((volatile _Atomic(uintptr_t)*)(p), (uintptr_t)((T*)x))
|
||||
|
||||
// Atomically compare and exchange a pointer; returns `true` if successful. May fail spuriously.
|
||||
// Memory order is release. (like a write)
|
||||
// (Note: expected and desired are in opposite order from atomic_compare_exchange)
|
||||
#define mi_atomic_cas_ptr_weak(T,p,desired,expected) \
|
||||
mi_atomic_cas_weak((volatile _Atomic(uintptr_t)*)(p), (uintptr_t)((T*)(desired)), (uintptr_t)((T*)(expected)))
|
||||
|
||||
// Atomically compare and exchange a pointer; returns `true` if successful. Memory order is acquire_release.
|
||||
// (Note: expected and desired are in opposite order from atomic_compare_exchange)
|
||||
#define mi_atomic_cas_ptr_strong(T,p,desired,expected) \
|
||||
mi_atomic_cas_strong((volatile _Atomic(uintptr_t)*)(p),(uintptr_t)((T*)(desired)), (uintptr_t)((T*)(expected)))
|
||||
|
||||
// Atomically exchange a pointer value.
|
||||
#define mi_atomic_exchange_ptr(T,p,exchange) \
|
||||
(T*)mi_atomic_exchange((volatile _Atomic(uintptr_t)*)(p), (uintptr_t)((T*)exchange))
|
||||
|
||||
|
||||
#ifdef _MSC_VER
|
||||
// MSVC C compilation wrapper that uses Interlocked operations to model C11 atomics.
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
#include <Windows.h>
|
||||
#include <intrin.h>
|
||||
#ifdef _WIN64
|
||||
typedef LONG64 msc_intptr_t;
|
||||
@ -137,43 +109,71 @@ typedef LONG64 msc_intptr_t;
|
||||
typedef LONG msc_intptr_t;
|
||||
#define MI_64(f) f
|
||||
#endif
|
||||
static inline uintptr_t mi_atomic_add(volatile _Atomic(uintptr_t)* p, uintptr_t add) {
|
||||
|
||||
typedef enum mi_memory_order_e {
|
||||
mi_memory_order_relaxed,
|
||||
mi_memory_order_consume,
|
||||
mi_memory_order_acquire,
|
||||
mi_memory_order_release,
|
||||
mi_memory_order_acq_rel,
|
||||
mi_memory_order_seq_cst
|
||||
} mi_memory_order;
|
||||
|
||||
static inline uintptr_t mi_atomic_fetch_add_explicit(_Atomic(uintptr_t)* p, uintptr_t add, mi_memory_order mo) {
|
||||
return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add);
|
||||
}
|
||||
static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x) {
|
||||
static inline uintptr_t mi_atomic_fetch_sub_explicit(_Atomic(uintptr_t)*p, uintptr_t sub, mi_memory_order mo) {
|
||||
return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, -((msc_intptr_t)sub));
|
||||
}
|
||||
static inline uintptr_t mi_atomic_fetch_and_explicit(_Atomic(uintptr_t)* p, uintptr_t x, mi_memory_order mo) {
|
||||
return (uintptr_t)MI_64(_InterlockedAnd)((volatile msc_intptr_t*)p, (msc_intptr_t)x);
|
||||
}
|
||||
static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x) {
|
||||
static inline uintptr_t mi_atomic_fetch_or_explicit(_Atomic(uintptr_t)* p, uintptr_t x, mi_memory_order mo) {
|
||||
return (uintptr_t)MI_64(_InterlockedOr)((volatile msc_intptr_t*)p, (msc_intptr_t)x);
|
||||
}
|
||||
static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) {
|
||||
return (expected == (uintptr_t)MI_64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)expected));
|
||||
static inline bool mi_atomic_compare_exchange_strong_explicit(_Atomic(uintptr_t)* p, uintptr_t* expected, uintptr_t desired, mi_memory_order mo1, mi_memory_order mo2) {
|
||||
uintptr_t read = (uintptr_t)MI_64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)(*expected));
|
||||
if (read == *expected) {
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
*expected = read;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) {
|
||||
return mi_atomic_cas_strong(p,desired,expected);
|
||||
static inline bool mi_atomic_compare_exchange_weak_explicit(_Atomic(uintptr_t)*p, uintptr_t* expected, uintptr_t desired, mi_memory_order mo1, mi_memory_order mo2) {
|
||||
return mi_atomic_compare_exchange_strong_explicit(p, expected, desired, mo1, mo2);
|
||||
}
|
||||
static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange) {
|
||||
static inline uintptr_t mi_atomic_exchange_explicit(_Atomic(uintptr_t)* p, uintptr_t exchange, mi_memory_order mo) {
|
||||
return (uintptr_t)MI_64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange);
|
||||
}
|
||||
static inline uintptr_t mi_atomic_read(volatile _Atomic(uintptr_t) const* p) {
|
||||
return *p;
|
||||
static inline mi_atomic_thread_fence(mi_memory_order mo) {
|
||||
_Atomic(uintptr_t)x = 0;
|
||||
mi_atomic_exchange_explicit(&x, 1, mo);
|
||||
}
|
||||
static inline uintptr_t mi_atomic_read_relaxed(volatile _Atomic(uintptr_t) const* p) {
|
||||
static inline uintptr_t mi_atomic_load_explicit(_Atomic(uintptr_t) const* p, mi_memory_order mo) {
|
||||
#if defined(_M_IX86) || defined(_M_X64)
|
||||
return *p;
|
||||
#else
|
||||
uintptr_t x = *p;
|
||||
if (mo > mi_memory_order_relaxed) {
|
||||
while (!mi_atomic_compare_exchange_weak_explicit(p, &x, x, mo, mi_memory_order_relaxed)) { /* nothing */ };
|
||||
}
|
||||
return x;
|
||||
#endif
|
||||
}
|
||||
static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x) {
|
||||
static inline void mi_atomic_store_explicit(_Atomic(uintptr_t)* p, uintptr_t x, mi_memory_order mo) {
|
||||
#if defined(_M_IX86) || defined(_M_X64)
|
||||
*p = x;
|
||||
#else
|
||||
mi_atomic_exchange(p,x);
|
||||
mi_atomic_exchange_explicit(p,x,mo);
|
||||
#endif
|
||||
}
|
||||
static inline void mi_atomic_yield(void) {
|
||||
YieldProcessor();
|
||||
}
|
||||
static inline void mi_atomic_addi64(volatile _Atomic(int64_t)* p, int64_t add) {
|
||||
|
||||
// These are used by the statistics
|
||||
static inline int64_t mi_atomic_addi64_relaxed(volatile _Atomic(int64_t)* p, int64_t add) {
|
||||
#ifdef _WIN64
|
||||
mi_atomic_addi(p,add);
|
||||
return (int64_t)mi_atomic_addi((int64_t*)p,add);
|
||||
#else
|
||||
int64_t current;
|
||||
int64_t sum;
|
||||
@ -181,91 +181,52 @@ static inline void mi_atomic_addi64(volatile _Atomic(int64_t)* p, int64_t add) {
|
||||
current = *p;
|
||||
sum = current + add;
|
||||
} while (_InterlockedCompareExchange64(p, sum, current) != current);
|
||||
return current;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void mi_atomic_maxi64(volatile _Atomic(int64_t)*p, int64_t x) {
|
||||
static inline void mi_atomic_maxi64_relaxed(volatile _Atomic(int64_t)*p, int64_t x) {
|
||||
int64_t current;
|
||||
do {
|
||||
current = *p;
|
||||
} while (current < x && _InterlockedCompareExchange64(p, x, current) != current);
|
||||
}
|
||||
|
||||
static inline int64_t mi_atomic_readi64(volatile _Atomic(int64_t)*p) {
|
||||
#ifdef _WIN64
|
||||
return *p;
|
||||
#else
|
||||
int64_t current;
|
||||
do {
|
||||
current = *p;
|
||||
} while (_InterlockedCompareExchange64(p, current, current) != current);
|
||||
return current;
|
||||
#endif
|
||||
}
|
||||
// The pointer macros cast to `uintptr_t`.
|
||||
#define mi_atomic_load_ptr_acquire(tp,p) (tp*)mi_atomic_load_acquire((_Atomic(uintptr_t)*)(p))
|
||||
#define mi_atomic_load_ptr_relaxed(tp,p) (tp*)mi_atomic_load_relaxed((_Atomic(uintptr_t)*)(p))
|
||||
#define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release((_Atomic(uintptr_t)*)(p),(uintptr_t)(x))
|
||||
#define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed((_Atomic(uintptr_t)*)(p),(uintptr_t)(x))
|
||||
#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des)
|
||||
#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des)
|
||||
#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des)
|
||||
#define mi_atomic_exchange_ptr_release(tp,p,x) (tp*)mi_atomic_exchange_release((_Atomic(uintptr_t)*)(p),(uintptr_t)x)
|
||||
#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) (tp*)mi_atomic_exchange_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t)x)
|
||||
|
||||
#else
|
||||
#ifdef __cplusplus
|
||||
#define MI_USING_STD using namespace std;
|
||||
#else
|
||||
#define MI_USING_STD
|
||||
#endif
|
||||
static inline uintptr_t mi_atomic_add(volatile _Atomic(uintptr_t)* p, uintptr_t add) {
|
||||
MI_USING_STD
|
||||
return atomic_fetch_add_explicit(p, add, memory_order_relaxed);
|
||||
}
|
||||
static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x) {
|
||||
MI_USING_STD
|
||||
return atomic_fetch_and_explicit(p, x, memory_order_acq_rel);
|
||||
}
|
||||
static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x) {
|
||||
MI_USING_STD
|
||||
return atomic_fetch_or_explicit(p, x, memory_order_acq_rel);
|
||||
}
|
||||
static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) {
|
||||
MI_USING_STD
|
||||
return atomic_compare_exchange_weak_explicit(p, &expected, desired, memory_order_acq_rel, memory_order_acquire);
|
||||
}
|
||||
static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) {
|
||||
MI_USING_STD
|
||||
return atomic_compare_exchange_strong_explicit(p, &expected, desired, memory_order_acq_rel, memory_order_acquire);
|
||||
}
|
||||
static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange) {
|
||||
MI_USING_STD
|
||||
return atomic_exchange_explicit(p, exchange, memory_order_acq_rel);
|
||||
}
|
||||
static inline uintptr_t mi_atomic_read_relaxed(const volatile _Atomic(uintptr_t)* p) {
|
||||
MI_USING_STD
|
||||
return atomic_load_explicit((volatile _Atomic(uintptr_t)*) p, memory_order_relaxed);
|
||||
}
|
||||
static inline uintptr_t mi_atomic_read(const volatile _Atomic(uintptr_t)* p) {
|
||||
MI_USING_STD
|
||||
return atomic_load_explicit((volatile _Atomic(uintptr_t)*) p, memory_order_acquire);
|
||||
}
|
||||
static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x) {
|
||||
MI_USING_STD
|
||||
return atomic_store_explicit(p, x, memory_order_release);
|
||||
}
|
||||
static inline void mi_atomic_addi64(volatile int64_t* p, int64_t add) {
|
||||
MI_USING_STD
|
||||
atomic_fetch_add_explicit((volatile _Atomic(int64_t)*)p, add, memory_order_relaxed);
|
||||
}
|
||||
static inline int64_t mi_atomic_readi64(volatile int64_t* p) {
|
||||
MI_USING_STD
|
||||
return atomic_load_explicit((volatile _Atomic(int64_t)*) p, memory_order_relaxed);
|
||||
}
|
||||
static inline void mi_atomic_maxi64(volatile int64_t* p, int64_t x) {
|
||||
MI_USING_STD
|
||||
int64_t current;
|
||||
do {
|
||||
current = mi_atomic_readi64(p);
|
||||
} while (current < x && !atomic_compare_exchange_weak_explicit((volatile _Atomic(int64_t)*)p, ¤t, x, memory_order_acq_rel, memory_order_relaxed));
|
||||
|
||||
|
||||
// Atomically add a signed value; returns the previous value.
|
||||
static inline intptr_t mi_atomic_addi(_Atomic(intptr_t)*p, intptr_t add) {
|
||||
return (intptr_t)mi_atomic_add_acq_rel((_Atomic(uintptr_t)*)p, (uintptr_t)add);
|
||||
}
|
||||
|
||||
// Atomically subtract a signed value; returns the previous value.
|
||||
static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub) {
|
||||
return (intptr_t)mi_atomic_addi(p, -sub);
|
||||
}
|
||||
|
||||
// Yield
|
||||
#if defined(__cplusplus)
|
||||
#include <thread>
|
||||
static inline void mi_atomic_yield(void) {
|
||||
std::this_thread::yield();
|
||||
}
|
||||
#include <thread>
|
||||
static inline void mi_atomic_yield(void) {
|
||||
std::this_thread::yield();
|
||||
}
|
||||
#elif defined(_WIN32)
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
static inline void mi_atomic_yield(void) {
|
||||
YieldProcessor();
|
||||
}
|
||||
#elif (defined(__GNUC__) || defined(__clang__)) && \
|
||||
(defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))
|
||||
#if defined(__x86_64__) || defined(__i386__)
|
||||
@ -278,17 +239,16 @@ static inline void mi_atomic_maxi64(volatile int64_t* p, int64_t x) {
|
||||
}
|
||||
#endif
|
||||
#elif defined(__wasi__)
|
||||
#include <sched.h>
|
||||
static inline void mi_atomic_yield(void) {
|
||||
sched_yield();
|
||||
}
|
||||
#include <sched.h>
|
||||
static inline void mi_atomic_yield(void) {
|
||||
sched_yield();
|
||||
}
|
||||
#else
|
||||
#include <unistd.h>
|
||||
static inline void mi_atomic_yield(void) {
|
||||
sleep(0);
|
||||
}
|
||||
#include <unistd.h>
|
||||
static inline void mi_atomic_yield(void) {
|
||||
sleep(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif // __MIMALLOC_ATOMIC_H
|
||||
|
@ -467,21 +467,21 @@ static inline size_t mi_page_usable_block_size(const mi_page_t* page) {
|
||||
|
||||
// Thread free access
|
||||
static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) {
|
||||
return (mi_block_t*)(mi_atomic_read_relaxed(&page->xthread_free) & ~3);
|
||||
return (mi_block_t*)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free) & ~3);
|
||||
}
|
||||
|
||||
static inline mi_delayed_t mi_page_thread_free_flag(const mi_page_t* page) {
|
||||
return (mi_delayed_t)(mi_atomic_read_relaxed(&page->xthread_free) & 3);
|
||||
return (mi_delayed_t)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free) & 3);
|
||||
}
|
||||
|
||||
// Heap access
|
||||
static inline mi_heap_t* mi_page_heap(const mi_page_t* page) {
|
||||
return (mi_heap_t*)(mi_atomic_read_relaxed(&page->xheap));
|
||||
return (mi_heap_t*)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xheap));
|
||||
}
|
||||
|
||||
static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) {
|
||||
mi_assert_internal(mi_page_thread_free_flag(page) != MI_DELAYED_FREEING);
|
||||
mi_atomic_write(&page->xheap,(uintptr_t)heap);
|
||||
mi_atomic_store_release(&page->xheap,(uintptr_t)heap);
|
||||
}
|
||||
|
||||
// Thread free flag helpers
|
||||
|
@ -14,7 +14,9 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
|
||||
// Minimal alignment necessary. On most platforms 16 bytes are needed
|
||||
// due to SSE registers for example. This must be at least `MI_INTPTR_SIZE`
|
||||
#ifndef MI_MAX_ALIGN_SIZE
|
||||
#define MI_MAX_ALIGN_SIZE 16 // sizeof(max_align_t)
|
||||
#endif
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Variants
|
||||
@ -160,6 +162,7 @@ typedef enum mi_delayed_e {
|
||||
|
||||
// The `in_full` and `has_aligned` page flags are put in a union to efficiently
|
||||
// test if both are false (`full_aligned == 0`) in the `mi_free` routine.
|
||||
#if !MI_TSAN
|
||||
typedef union mi_page_flags_s {
|
||||
uint8_t full_aligned;
|
||||
struct {
|
||||
@ -167,6 +170,16 @@ typedef union mi_page_flags_s {
|
||||
uint8_t has_aligned : 1;
|
||||
} x;
|
||||
} mi_page_flags_t;
|
||||
#else
|
||||
// under thread sanitizer, use a byte for each flag to suppress warning, issue #130
|
||||
typedef union mi_page_flags_s {
|
||||
uint16_t full_aligned;
|
||||
struct {
|
||||
uint8_t in_full;
|
||||
uint8_t has_aligned;
|
||||
} x;
|
||||
} mi_page_flags_t;
|
||||
#endif
|
||||
|
||||
// Thread free list.
|
||||
// We use the bottom 2 bits of the pointer for mi_delayed_t flags
|
||||
@ -226,12 +239,12 @@ typedef struct mi_page_s {
|
||||
uint32_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`)
|
||||
uint32_t xblock_size; // size available in each block (always `>0`)
|
||||
|
||||
mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`)
|
||||
volatile _Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads
|
||||
volatile _Atomic(uintptr_t) xheap;
|
||||
mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`)
|
||||
_Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads
|
||||
_Atomic(uintptr_t) xheap;
|
||||
|
||||
struct mi_page_s* next; // next page owned by this thread with the same `block_size`
|
||||
struct mi_page_s* prev; // previous page owned by this thread with the same `block_size`
|
||||
struct mi_page_s* next; // next page owned by this thread with the same `block_size`
|
||||
struct mi_page_s* prev; // previous page owned by this thread with the same `block_size`
|
||||
|
||||
// 64-bit 9 words, 32-bit 12 words, (+2 for secure)
|
||||
#if MI_INTPTR_SIZE==8
|
||||
@ -277,10 +290,11 @@ typedef struct mi_segment_s {
|
||||
uintptr_t decommit_mask;
|
||||
uintptr_t commit_mask;
|
||||
|
||||
// from here is zero initialized
|
||||
struct mi_segment_s* next; // the list of freed segments in the cache
|
||||
struct mi_segment_s* abandoned_next;
|
||||
_Atomic(struct mi_segment_s*) abandoned_next;
|
||||
|
||||
// from here is zero initialized
|
||||
struct mi_segment_s* next; // the list of freed segments in the cache (must be first field, see `segment.c:mi_segment_init`)
|
||||
|
||||
size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`)
|
||||
size_t abandoned_visits; // count how often this segment is visited in the abandoned list (to force reclaim it it is too long)
|
||||
size_t used; // count of pages in use
|
||||
@ -291,7 +305,7 @@ typedef struct mi_segment_s {
|
||||
|
||||
// layout like this to optimize access in `mi_free`
|
||||
mi_segment_kind_t kind;
|
||||
volatile _Atomic(uintptr_t) thread_id; // unique id of the thread owning this segment
|
||||
_Atomic(uintptr_t) thread_id; // unique id of the thread owning this segment
|
||||
size_t slice_entries; // entries in the `slices` array, at most `MI_SLICES_PER_SEGMENT`
|
||||
mi_slice_t slices[MI_SLICES_PER_SEGMENT];
|
||||
} mi_segment_t;
|
||||
@ -351,7 +365,7 @@ struct mi_heap_s {
|
||||
mi_tld_t* tld;
|
||||
mi_page_t* pages_free_direct[MI_PAGES_DIRECT]; // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size.
|
||||
mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin")
|
||||
volatile _Atomic(mi_block_t*) thread_delayed_free;
|
||||
_Atomic(mi_block_t*) thread_delayed_free;
|
||||
uintptr_t thread_id; // thread this heap belongs too
|
||||
uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`)
|
||||
uintptr_t keys[2]; // two random keys used to encode the `thread_delayed_free` list
|
||||
|
@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
#ifndef MIMALLOC_H
|
||||
#define MIMALLOC_H
|
||||
|
||||
#define MI_MALLOC_VERSION 164 // major + 2 digits minor
|
||||
#define MI_MALLOC_VERSION 166 // major + 2 digits minor
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Compiler specific attributes
|
||||
@ -192,7 +192,7 @@ mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_mallocn(mi_heap_
|
||||
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2);
|
||||
|
||||
mi_decl_nodiscard mi_decl_export void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(3);
|
||||
mi_decl_nodiscard mi_decl_export void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_alloc_size2(3,4);;
|
||||
mi_decl_nodiscard mi_decl_export void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_alloc_size2(3,4);
|
||||
mi_decl_nodiscard mi_decl_export void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(3);
|
||||
|
||||
mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_noexcept mi_attr_malloc;
|
||||
@ -256,7 +256,7 @@ mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_b
|
||||
|
||||
// Experimental
|
||||
mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept;
|
||||
mi_decl_nodiscard mi_decl_export bool mi_is_redirected() mi_attr_noexcept;
|
||||
mi_decl_nodiscard mi_decl_export bool mi_is_redirected(void) mi_attr_noexcept;
|
||||
|
||||
mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept;
|
||||
mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept;
|
||||
|
23
src/alloc.c
23
src/alloc.c
@ -305,11 +305,10 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc
|
||||
}
|
||||
|
||||
// Try to put the block on either the page-local thread free list, or the heap delayed free list.
|
||||
mi_thread_free_t tfree;
|
||||
mi_thread_free_t tfreex;
|
||||
bool use_delayed;
|
||||
mi_thread_free_t tfree = mi_atomic_load_relaxed(&page->xthread_free);
|
||||
do {
|
||||
tfree = mi_atomic_read_relaxed(&page->xthread_free);
|
||||
use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE);
|
||||
if (mi_unlikely(use_delayed)) {
|
||||
// unlikely: this only happens on the first concurrent free in a page that is in the full list
|
||||
@ -320,27 +319,27 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc
|
||||
mi_block_set_next(page, block, mi_tf_block(tfree));
|
||||
tfreex = mi_tf_set_block(tfree,block);
|
||||
}
|
||||
} while (!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree));
|
||||
} while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex));
|
||||
|
||||
if (mi_unlikely(use_delayed)) {
|
||||
// racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`)
|
||||
mi_heap_t* const heap = mi_page_heap(page);
|
||||
mi_heap_t* const heap = (mi_heap_t*)(mi_atomic_load_acquire(&page->xheap)); //mi_page_heap(page);
|
||||
mi_assert_internal(heap != NULL);
|
||||
if (heap != NULL) {
|
||||
// add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity)
|
||||
mi_block_t* dfree;
|
||||
mi_block_t* dfree = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free);
|
||||
do {
|
||||
dfree = mi_atomic_read_ptr_relaxed(mi_block_t,&heap->thread_delayed_free);
|
||||
mi_block_set_nextx(heap,block,dfree, heap->keys);
|
||||
} while (!mi_atomic_cas_ptr_weak(mi_block_t,&heap->thread_delayed_free, block, dfree));
|
||||
} while (!mi_atomic_cas_ptr_weak_release(mi_block_t,&heap->thread_delayed_free, &dfree, block));
|
||||
}
|
||||
|
||||
// and reset the MI_DELAYED_FREEING flag
|
||||
tfree = mi_atomic_load_relaxed(&page->xthread_free);
|
||||
do {
|
||||
tfreex = tfree = mi_atomic_read_relaxed(&page->xthread_free);
|
||||
tfreex = tfree;
|
||||
mi_assert_internal(mi_tf_delayed(tfree) == MI_DELAYED_FREEING);
|
||||
tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE);
|
||||
} while (!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree));
|
||||
} while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex));
|
||||
}
|
||||
}
|
||||
|
||||
@ -684,7 +683,7 @@ mi_decl_restrict char* mi_strndup(const char* s, size_t n) mi_attr_noexcept {
|
||||
#ifndef PATH_MAX
|
||||
#define PATH_MAX MAX_PATH
|
||||
#endif
|
||||
#include <windows.h>
|
||||
#include <Windows.h>
|
||||
mi_decl_restrict char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept {
|
||||
// todo: use GetFullPathNameW to allow longer file names
|
||||
char buf[PATH_MAX];
|
||||
@ -765,12 +764,12 @@ typedef void (*std_new_handler_t)();
|
||||
std_new_handler_t __attribute((weak)) _ZSt15get_new_handlerv() {
|
||||
return NULL;
|
||||
}
|
||||
std_new_handler_t mi_get_new_handler() {
|
||||
static std_new_handler_t mi_get_new_handler() {
|
||||
return _ZSt15get_new_handlerv();
|
||||
}
|
||||
#else
|
||||
// note: on windows we could dynamically link to `?get_new_handler@std@@YAP6AXXZXZ`.
|
||||
std_new_handler_t mi_get_new_handler() {
|
||||
static std_new_handler_t mi_get_new_handler() {
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
20
src/arena.c
20
src/arena.c
@ -69,7 +69,7 @@ typedef struct mi_arena_s {
|
||||
bool is_zero_init; // is the arena zero initialized?
|
||||
bool is_committed; // is the memory committed
|
||||
bool is_large; // large OS page allocated
|
||||
volatile _Atomic(uintptr_t) search_idx; // optimization to start the search for free blocks
|
||||
_Atomic(uintptr_t) search_idx; // optimization to start the search for free blocks
|
||||
mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero?
|
||||
mi_bitmap_field_t* blocks_committed; // if `!is_committed`, are the blocks committed?
|
||||
mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`)
|
||||
@ -111,12 +111,12 @@ static size_t mi_block_count_of_size(size_t size) {
|
||||
static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx)
|
||||
{
|
||||
const size_t fcount = arena->field_count;
|
||||
size_t idx = mi_atomic_read(&arena->search_idx); // start from last search
|
||||
size_t idx = mi_atomic_load_acquire(&arena->search_idx); // start from last search
|
||||
for (size_t visited = 0; visited < fcount; visited++, idx++) {
|
||||
if (idx >= fcount) idx = 0; // wrap around
|
||||
// try to atomically claim a range of bits
|
||||
if (mi_bitmap_try_find_claim_field(arena->blocks_inuse, idx, blocks, bitmap_idx)) {
|
||||
mi_atomic_write(&arena->search_idx, idx); // start search from here next time
|
||||
mi_atomic_store_release(&arena->search_idx, idx); // start search from here next time
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@ -332,7 +332,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
|
||||
mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE);
|
||||
// try numa affine allocation
|
||||
for (size_t i = 0; i < MI_MAX_ARENAS; i++) {
|
||||
mi_arena_t* arena = mi_atomic_read_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
|
||||
mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
|
||||
if (arena==NULL) break; // end reached
|
||||
if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local?
|
||||
(*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
|
||||
@ -344,7 +344,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
|
||||
}
|
||||
// try from another numa node instead..
|
||||
for (size_t i = 0; i < MI_MAX_ARENAS; i++) {
|
||||
mi_arena_t* arena = mi_atomic_read_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
|
||||
mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
|
||||
if (arena==NULL) break; // end reached
|
||||
if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local!
|
||||
(*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
|
||||
@ -394,7 +394,7 @@ void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, bool
|
||||
size_t bitmap_idx;
|
||||
mi_arena_id_indices(memid, &arena_idx, &bitmap_idx);
|
||||
mi_assert_internal(arena_idx < MI_MAX_ARENAS);
|
||||
mi_arena_t* arena = mi_atomic_read_ptr_relaxed(mi_arena_t,&mi_arenas[arena_idx]);
|
||||
mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t,&mi_arenas[arena_idx]);
|
||||
mi_assert_internal(arena != NULL);
|
||||
if (arena == NULL) {
|
||||
_mi_error_message(EINVAL, "trying to free from non-existent arena: %p, size %zu, memid: 0x%zx\n", p, size, memid);
|
||||
@ -420,15 +420,15 @@ void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, bool
|
||||
|
||||
static bool mi_arena_add(mi_arena_t* arena) {
|
||||
mi_assert_internal(arena != NULL);
|
||||
mi_assert_internal((uintptr_t)mi_atomic_read_ptr_relaxed(uint8_t,&arena->start) % MI_SEGMENT_ALIGN == 0);
|
||||
mi_assert_internal((uintptr_t)mi_atomic_load_ptr_relaxed(uint8_t,&arena->start) % MI_SEGMENT_ALIGN == 0);
|
||||
mi_assert_internal(arena->block_count > 0);
|
||||
|
||||
uintptr_t i = mi_atomic_increment(&mi_arena_count);
|
||||
uintptr_t i = mi_atomic_increment_acq_rel(&mi_arena_count);
|
||||
if (i >= MI_MAX_ARENAS) {
|
||||
mi_atomic_decrement(&mi_arena_count);
|
||||
mi_atomic_decrement_acq_rel(&mi_arena_count);
|
||||
return false;
|
||||
}
|
||||
mi_atomic_write_ptr(mi_arena_t,&mi_arenas[i], arena);
|
||||
mi_atomic_store_ptr_release(mi_arena_t,&mi_arenas[i], arena);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -30,7 +30,7 @@ and that the sequence must be smaller or equal to the bits in a field.
|
||||
#define MI_BITMAP_FIELD_FULL (~((uintptr_t)0)) // all bits set
|
||||
|
||||
// An atomic bitmap of `uintptr_t` fields
|
||||
typedef volatile _Atomic(uintptr_t) mi_bitmap_field_t;
|
||||
typedef _Atomic(uintptr_t) mi_bitmap_field_t;
|
||||
typedef mi_bitmap_field_t* mi_bitmap_t;
|
||||
|
||||
// A bitmap index is the index of the bit in a bitmap.
|
||||
@ -77,6 +77,14 @@ static inline uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) {
|
||||
#if defined(_MSC_VER)
|
||||
#define MI_HAVE_BITSCAN
|
||||
#include <intrin.h>
|
||||
#ifndef MI_64
|
||||
#if MI_INTPTR_SIZE==8
|
||||
#define MI_64(f) f##64
|
||||
#else
|
||||
#define MI_64(f) f
|
||||
#endif
|
||||
#endif
|
||||
|
||||
static inline size_t mi_bsf(uintptr_t x) {
|
||||
if (x==0) return 8*MI_INTPTR_SIZE;
|
||||
DWORD idx;
|
||||
@ -118,9 +126,9 @@ static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t bitmap_f
|
||||
mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields);
|
||||
mi_assert_internal(bitidx + count <= MI_BITMAP_FIELD_BITS);
|
||||
|
||||
uintptr_t field = mi_atomic_read_relaxed(&bitmap[idx]);
|
||||
uintptr_t field = mi_atomic_load_relaxed(&bitmap[idx]);
|
||||
if ((field & mask) == 0) { // free?
|
||||
if (mi_atomic_cas_strong(&bitmap[idx], (field|mask), field)) {
|
||||
if (mi_atomic_cas_strong_acq_rel(&bitmap[idx], &field, (field|mask))) {
|
||||
// claimed!
|
||||
return true;
|
||||
}
|
||||
@ -134,8 +142,8 @@ static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t bitmap_f
|
||||
static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx)
|
||||
{
|
||||
mi_assert_internal(bitmap_idx != NULL);
|
||||
volatile _Atomic(uintptr_t)* field = &bitmap[idx];
|
||||
uintptr_t map = mi_atomic_read(field);
|
||||
_Atomic(uintptr_t)* field = &bitmap[idx];
|
||||
uintptr_t map = mi_atomic_load_relaxed(field);
|
||||
if (map==MI_BITMAP_FIELD_FULL) return false; // short cut
|
||||
|
||||
// search for 0-bit sequence of length count
|
||||
@ -155,9 +163,8 @@ static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx
|
||||
mi_assert_internal((m >> bitidx) == mask); // no overflow?
|
||||
const uintptr_t newmap = map | m;
|
||||
mi_assert_internal((newmap^map) >> bitidx == mask);
|
||||
if (!mi_atomic_cas_weak(field, newmap, map)) { // TODO: use strong cas here?
|
||||
// no success, another thread claimed concurrently.. keep going
|
||||
map = mi_atomic_read(field);
|
||||
if (!mi_atomic_cas_weak_acq_rel(field, &map, newmap)) { // TODO: use strong cas here?
|
||||
// no success, another thread claimed concurrently.. keep going (with updated `map`)
|
||||
continue;
|
||||
}
|
||||
else {
|
||||
@ -211,7 +218,7 @@ static inline bool mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, s
|
||||
const uintptr_t mask = mi_bitmap_mask_(count, bitidx);
|
||||
mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields);
|
||||
// mi_assert_internal((bitmap[idx] & mask) == mask);
|
||||
uintptr_t prev = mi_atomic_and(&bitmap[idx], ~mask);
|
||||
uintptr_t prev = mi_atomic_and_acq_rel(&bitmap[idx], ~mask);
|
||||
return ((prev & mask) == mask);
|
||||
}
|
||||
|
||||
@ -224,7 +231,7 @@ static inline bool mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, siz
|
||||
const uintptr_t mask = mi_bitmap_mask_(count, bitidx);
|
||||
mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields);
|
||||
//mi_assert_internal(any_zero != NULL || (bitmap[idx] & mask) == 0);
|
||||
uintptr_t prev = mi_atomic_or(&bitmap[idx], mask);
|
||||
uintptr_t prev = mi_atomic_or_acq_rel(&bitmap[idx], mask);
|
||||
if (any_zero != NULL) *any_zero = ((prev & mask) != mask);
|
||||
return ((prev & mask) == 0);
|
||||
}
|
||||
@ -235,7 +242,7 @@ static inline bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_field
|
||||
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
|
||||
const uintptr_t mask = mi_bitmap_mask_(count, bitidx);
|
||||
mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields);
|
||||
uintptr_t field = mi_atomic_read_relaxed(&bitmap[idx]);
|
||||
uintptr_t field = mi_atomic_load_relaxed(&bitmap[idx]);
|
||||
if (any_ones != NULL) *any_ones = ((field & mask) != 0);
|
||||
return ((field & mask) == mask);
|
||||
}
|
||||
|
@ -142,7 +142,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
|
||||
|
||||
// collect all pages owned by this thread
|
||||
mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL);
|
||||
mi_assert_internal( collect != MI_ABANDON || mi_atomic_read_ptr(mi_block_t,&heap->thread_delayed_free) == NULL );
|
||||
mi_assert_internal( collect != MI_ABANDON || mi_atomic_load_ptr_acquire(mi_block_t,&heap->thread_delayed_free) == NULL );
|
||||
|
||||
// collect segment caches
|
||||
if (collect >= MI_FORCE) {
|
||||
|
10
src/init.c
10
src/init.c
@ -310,7 +310,7 @@ static void _mi_thread_done(mi_heap_t* default_heap);
|
||||
// nothing to do as it is done in DllMain
|
||||
#elif defined(_WIN32) && !defined(MI_SHARED_LIB)
|
||||
// use thread local storage keys to detect thread ending
|
||||
#include <windows.h>
|
||||
#include <Windows.h>
|
||||
#include <fibersapi.h>
|
||||
#if (_WIN32_WINNT < 0x600) // before Windows Vista
|
||||
WINBASEAPI DWORD WINAPI FlsAlloc( _In_opt_ PFLS_CALLBACK_FUNCTION lpCallback );
|
||||
@ -430,11 +430,11 @@ static bool os_preloading = true; // true until this module is initialized
|
||||
static bool mi_redirected = false; // true if malloc redirects to mi_malloc
|
||||
|
||||
// Returns true if this module has not been initialized; Don't use C runtime routines until it returns false.
|
||||
bool _mi_preloading() {
|
||||
bool _mi_preloading(void) {
|
||||
return os_preloading;
|
||||
}
|
||||
|
||||
bool mi_is_redirected() mi_attr_noexcept {
|
||||
bool mi_is_redirected(void) mi_attr_noexcept {
|
||||
return mi_redirected;
|
||||
}
|
||||
|
||||
@ -456,7 +456,7 @@ mi_decl_export void _mi_redirect_entry(DWORD reason) {
|
||||
}
|
||||
}
|
||||
__declspec(dllimport) bool mi_allocator_init(const char** message);
|
||||
__declspec(dllimport) void mi_allocator_done();
|
||||
__declspec(dllimport) void mi_allocator_done(void);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@ -465,7 +465,7 @@ static bool mi_allocator_init(const char** message) {
|
||||
if (message != NULL) *message = NULL;
|
||||
return true;
|
||||
}
|
||||
static void mi_allocator_done() {
|
||||
static void mi_allocator_done(void) {
|
||||
// nothing to do
|
||||
}
|
||||
#endif
|
||||
|
@ -60,7 +60,7 @@ static mi_option_desc_t options[_mi_option_last] =
|
||||
{ 0, UNINIT, MI_OPTION(verbose) },
|
||||
|
||||
// the following options are experimental and not all combinations make sense.
|
||||
{ 1, UNINIT, MI_OPTION(eager_commit) }, // commit on demand?
|
||||
{ 1, UNINIT, MI_OPTION(eager_commit) }, // commit per segment directly (4MiB) (but see also `eager_commit_delay`)
|
||||
#if defined(_WIN32) || (MI_INTPTR_SIZE <= 4) // and other OS's without overcommit?
|
||||
{ 0, UNINIT, MI_OPTION(eager_region_commit) },
|
||||
{ 0, UNINIT, MI_OPTION(reset_decommits) }, // reset decommits memory
|
||||
@ -77,7 +77,7 @@ static mi_option_desc_t options[_mi_option_last] =
|
||||
#if defined(__NetBSD__)
|
||||
{ 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed
|
||||
#else
|
||||
{ 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed
|
||||
{ 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
|
||||
#endif
|
||||
{ 1, UNINIT, MI_OPTION(allow_decommit) }, // decommit pages when not eager committed
|
||||
{ 250, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds
|
||||
@ -175,11 +175,11 @@ static _Atomic(uintptr_t) out_len;
|
||||
static void mi_out_buf(const char* msg, void* arg) {
|
||||
UNUSED(arg);
|
||||
if (msg==NULL) return;
|
||||
if (mi_atomic_read_relaxed(&out_len)>=MI_MAX_DELAY_OUTPUT) return;
|
||||
if (mi_atomic_load_relaxed(&out_len)>=MI_MAX_DELAY_OUTPUT) return;
|
||||
size_t n = strlen(msg);
|
||||
if (n==0) return;
|
||||
// claim space
|
||||
uintptr_t start = mi_atomic_add(&out_len, n);
|
||||
uintptr_t start = mi_atomic_add_acq_rel(&out_len, n);
|
||||
if (start >= MI_MAX_DELAY_OUTPUT) return;
|
||||
// check bound
|
||||
if (start+n >= MI_MAX_DELAY_OUTPUT) {
|
||||
@ -191,7 +191,7 @@ static void mi_out_buf(const char* msg, void* arg) {
|
||||
static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf, void* arg) {
|
||||
if (out==NULL) return;
|
||||
// claim (if `no_more_buf == true`, no more output will be added after this point)
|
||||
size_t count = mi_atomic_add(&out_len, (no_more_buf ? MI_MAX_DELAY_OUTPUT : 1));
|
||||
size_t count = mi_atomic_add_acq_rel(&out_len, (no_more_buf ? MI_MAX_DELAY_OUTPUT : 1));
|
||||
// and output the current contents
|
||||
if (count>MI_MAX_DELAY_OUTPUT) count = MI_MAX_DELAY_OUTPUT;
|
||||
out_buf[count] = 0;
|
||||
@ -219,17 +219,17 @@ static void mi_out_buf_stderr(const char* msg, void* arg) {
|
||||
// For now, don't register output from multiple threads.
|
||||
#pragma warning(suppress:4180)
|
||||
static mi_output_fun* volatile mi_out_default; // = NULL
|
||||
static volatile _Atomic(void*) mi_out_arg; // = NULL
|
||||
static _Atomic(void*) mi_out_arg; // = NULL
|
||||
|
||||
static mi_output_fun* mi_out_get_default(void** parg) {
|
||||
if (parg != NULL) { *parg = mi_atomic_read_ptr(void,&mi_out_arg); }
|
||||
if (parg != NULL) { *parg = mi_atomic_load_ptr_acquire(void,&mi_out_arg); }
|
||||
mi_output_fun* out = mi_out_default;
|
||||
return (out == NULL ? &mi_out_buf : out);
|
||||
}
|
||||
|
||||
void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept {
|
||||
mi_out_default = (out == NULL ? &mi_out_stderr : out); // stop using the delayed output buffer
|
||||
mi_atomic_write_ptr(void,&mi_out_arg, arg);
|
||||
mi_atomic_store_ptr_release(void,&mi_out_arg, arg);
|
||||
if (out!=NULL) mi_out_buf_flush(out,true,arg); // output all the delayed output now
|
||||
}
|
||||
|
||||
@ -243,7 +243,7 @@ static void mi_add_stderr_output() {
|
||||
// --------------------------------------------------------
|
||||
// Messages, all end up calling `_mi_fputs`.
|
||||
// --------------------------------------------------------
|
||||
static volatile _Atomic(uintptr_t) error_count; // = 0; // when MAX_ERROR_COUNT stop emitting errors and warnings
|
||||
static _Atomic(uintptr_t) error_count; // = 0; // when MAX_ERROR_COUNT stop emitting errors and warnings
|
||||
|
||||
// When overriding malloc, we may recurse into mi_vfprintf if an allocation
|
||||
// inside the C runtime causes another message.
|
||||
@ -315,13 +315,13 @@ void _mi_verbose_message(const char* fmt, ...) {
|
||||
|
||||
static void mi_show_error_message(const char* fmt, va_list args) {
|
||||
if (!mi_option_is_enabled(mi_option_show_errors) && !mi_option_is_enabled(mi_option_verbose)) return;
|
||||
if (mi_atomic_increment(&error_count) > mi_max_error_count) return;
|
||||
if (mi_atomic_increment_acq_rel(&error_count) > mi_max_error_count) return;
|
||||
mi_vfprintf(NULL, NULL, "mimalloc: error: ", fmt, args);
|
||||
}
|
||||
|
||||
void _mi_warning_message(const char* fmt, ...) {
|
||||
if (!mi_option_is_enabled(mi_option_show_errors) && !mi_option_is_enabled(mi_option_verbose)) return;
|
||||
if (mi_atomic_increment(&error_count) > mi_max_error_count) return;
|
||||
if (mi_atomic_increment_acq_rel(&error_count) > mi_max_error_count) return;
|
||||
va_list args;
|
||||
va_start(args,fmt);
|
||||
mi_vfprintf(NULL, NULL, "mimalloc: warning: ", fmt, args);
|
||||
@ -341,7 +341,7 @@ void _mi_assert_fail(const char* assertion, const char* fname, unsigned line, co
|
||||
// --------------------------------------------------------
|
||||
|
||||
static mi_error_fun* volatile mi_error_handler; // = NULL
|
||||
static volatile _Atomic(void*) mi_error_arg; // = NULL
|
||||
static _Atomic(void*) mi_error_arg; // = NULL
|
||||
|
||||
static void mi_error_default(int err) {
|
||||
UNUSED(err);
|
||||
@ -367,7 +367,7 @@ static void mi_error_default(int err) {
|
||||
|
||||
void mi_register_error(mi_error_fun* fun, void* arg) {
|
||||
mi_error_handler = fun; // can be NULL
|
||||
mi_atomic_write_ptr(void,&mi_error_arg, arg);
|
||||
mi_atomic_store_ptr_release(void,&mi_error_arg, arg);
|
||||
}
|
||||
|
||||
void _mi_error_message(int err, const char* fmt, ...) {
|
||||
@ -378,7 +378,7 @@ void _mi_error_message(int err, const char* fmt, ...) {
|
||||
va_end(args);
|
||||
// and call the error handler which may abort (or return normally)
|
||||
if (mi_error_handler != NULL) {
|
||||
mi_error_handler(err, mi_atomic_read_ptr(void,&mi_error_arg));
|
||||
mi_error_handler(err, mi_atomic_load_ptr_acquire(void,&mi_error_arg));
|
||||
}
|
||||
else {
|
||||
mi_error_default(err);
|
||||
@ -415,7 +415,7 @@ static inline int mi_strnicmp(const char* s, const char* t, size_t n) {
|
||||
// reliably even when this is invoked before the C runtime is initialized.
|
||||
// i.e. when `_mi_preloading() == true`.
|
||||
// Note: on windows, environment names are not case sensitive.
|
||||
#include <windows.h>
|
||||
#include <Windows.h>
|
||||
static bool mi_getenv(const char* name, char* result, size_t result_size) {
|
||||
result[0] = 0;
|
||||
size_t len = GetEnvironmentVariableA(name, result, (DWORD)result_size);
|
||||
|
41
src/os.c
41
src/os.c
@ -24,7 +24,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
|
||||
|
||||
#if defined(_WIN32)
|
||||
#include <windows.h>
|
||||
#include <Windows.h>
|
||||
#elif defined(__wasi__)
|
||||
// stdlib.h is all we need, and has already been included in mimalloc.h
|
||||
#else
|
||||
@ -276,15 +276,15 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment
|
||||
|
||||
static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DWORD flags, bool large_only, bool allow_large, bool* is_large) {
|
||||
mi_assert_internal(!(large_only && !allow_large));
|
||||
static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0;
|
||||
static _Atomic(uintptr_t) large_page_try_ok; // = 0;
|
||||
void* p = NULL;
|
||||
if ((large_only || use_large_os_page(size, try_alignment))
|
||||
&& allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0) {
|
||||
uintptr_t try_ok = mi_atomic_read(&large_page_try_ok);
|
||||
uintptr_t try_ok = mi_atomic_load_acquire(&large_page_try_ok);
|
||||
if (!large_only && try_ok > 0) {
|
||||
// if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive.
|
||||
// therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times.
|
||||
mi_atomic_cas_weak(&large_page_try_ok, try_ok - 1, try_ok);
|
||||
mi_atomic_cas_strong_acq_rel(&large_page_try_ok, &try_ok, try_ok - 1);
|
||||
}
|
||||
else {
|
||||
// large OS pages must always reserve and commit.
|
||||
@ -293,7 +293,7 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment,
|
||||
if (large_only) return p;
|
||||
// fall back to non-large page allocation on error (`p == NULL`).
|
||||
if (p == NULL) {
|
||||
mi_atomic_write(&large_page_try_ok,10); // on error, don't try again for the next N allocations
|
||||
mi_atomic_store_release(&large_page_try_ok,10); // on error, don't try again for the next N allocations
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -370,14 +370,14 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
|
||||
fd = VM_MAKE_TAG(os_tag);
|
||||
#endif
|
||||
if ((large_only || use_large_os_page(size, try_alignment)) && allow_large) {
|
||||
static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0;
|
||||
uintptr_t try_ok = mi_atomic_read(&large_page_try_ok);
|
||||
static _Atomic(uintptr_t) large_page_try_ok; // = 0;
|
||||
uintptr_t try_ok = mi_atomic_load_acquire(&large_page_try_ok);
|
||||
if (!large_only && try_ok > 0) {
|
||||
// If the OS is not configured for large OS pages, or the user does not have
|
||||
// enough permission, the `mmap` will always fail (but it might also fail for other reasons).
|
||||
// Therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times
|
||||
// to avoid too many failing calls to mmap.
|
||||
mi_atomic_cas_weak(&large_page_try_ok, try_ok - 1, try_ok);
|
||||
mi_atomic_cas_strong_acq_rel(&large_page_try_ok, &try_ok, try_ok - 1);
|
||||
}
|
||||
else {
|
||||
int lflags = flags & ~MAP_NORESERVE; // using NORESERVE on huge pages seems to fail on Linux
|
||||
@ -417,7 +417,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
|
||||
#endif
|
||||
if (large_only) return p;
|
||||
if (p == NULL) {
|
||||
mi_atomic_write(&large_page_try_ok, 10); // on error, don't try again for the next N allocations
|
||||
mi_atomic_store_release(&large_page_try_ok, 10); // on error, don't try again for the next N allocations
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -459,21 +459,22 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
|
||||
// On 64-bit systems, we can do efficient aligned allocation by using
|
||||
// the 4TiB to 30TiB area to allocate them.
|
||||
#if (MI_INTPTR_SIZE >= 8) && (defined(_WIN32) || (defined(MI_OS_USE_MMAP) && !defined(MAP_ALIGNED)))
|
||||
static volatile mi_decl_cache_align _Atomic(uintptr_t) aligned_base;
|
||||
static mi_decl_cache_align _Atomic(uintptr_t) aligned_base;
|
||||
|
||||
// Return a 4MiB aligned address that is probably available
|
||||
static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
|
||||
if (try_alignment == 0 || try_alignment > MI_SEGMENT_SIZE) return NULL;
|
||||
if ((size%MI_SEGMENT_SIZE) != 0) return NULL;
|
||||
uintptr_t hint = mi_atomic_add(&aligned_base, size);
|
||||
uintptr_t hint = mi_atomic_add_acq_rel(&aligned_base, size);
|
||||
if (hint == 0 || hint > ((intptr_t)30<<40)) { // try to wrap around after 30TiB (area after 32TiB is used for huge OS pages)
|
||||
uintptr_t init = ((uintptr_t)4 << 40); // start at 4TiB area
|
||||
#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode
|
||||
uintptr_t r = _mi_heap_random_next(mi_get_default_heap());
|
||||
init = init + (MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)); // (randomly 20 bits)*4MiB == 0 to 4TiB
|
||||
#endif
|
||||
mi_atomic_cas_strong(&aligned_base, init, hint + size);
|
||||
hint = mi_atomic_add(&aligned_base, size); // this may still give 0 or > 30TiB but that is ok, it is a hint after all
|
||||
uintptr_t expected = hint + size;
|
||||
mi_atomic_cas_strong_acq_rel(&aligned_base, &expected, init);
|
||||
hint = mi_atomic_add_acq_rel(&aligned_base, size); // this may still give 0 or > 30TiB but that is ok, it is a hint after all
|
||||
}
|
||||
if (hint%try_alignment != 0) return NULL;
|
||||
return (void*)hint;
|
||||
@ -768,12 +769,12 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats)
|
||||
if (p != start) return false;
|
||||
#else
|
||||
#if defined(MADV_FREE)
|
||||
static int advice = MADV_FREE;
|
||||
int err = madvise(start, csize, advice);
|
||||
static _Atomic(uintptr_t) advice = ATOMIC_VAR_INIT(MADV_FREE);
|
||||
int err = madvise(start, csize, (int)mi_atomic_load_relaxed(&advice));
|
||||
if (err != 0 && errno == EINVAL && advice == MADV_FREE) {
|
||||
// if MADV_FREE is not supported, fall back to MADV_DONTNEED from now on
|
||||
advice = MADV_DONTNEED;
|
||||
err = madvise(start, csize, advice);
|
||||
mi_atomic_store_release(&advice, MADV_DONTNEED);
|
||||
err = madvise(start, csize, MADV_DONTNEED);
|
||||
}
|
||||
#elif defined(__wasi__)
|
||||
int err = 0;
|
||||
@ -979,9 +980,9 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
|
||||
|
||||
uintptr_t start = 0;
|
||||
uintptr_t end = 0;
|
||||
uintptr_t expected;
|
||||
uintptr_t huge_start = mi_atomic_load_relaxed(&mi_huge_start);
|
||||
do {
|
||||
start = expected = mi_atomic_read_relaxed(&mi_huge_start);
|
||||
start = huge_start;
|
||||
if (start == 0) {
|
||||
// Initialize the start address after the 32TiB area
|
||||
start = ((uintptr_t)32 << 40); // 32TiB virtual start address
|
||||
@ -992,7 +993,7 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
|
||||
}
|
||||
end = start + size;
|
||||
mi_assert_internal(end % MI_SEGMENT_SIZE == 0);
|
||||
} while (!mi_atomic_cas_strong(&mi_huge_start, end, expected));
|
||||
} while (!mi_atomic_cas_strong_acq_rel(&mi_huge_start, &huge_start, end));
|
||||
|
||||
if (total_size != NULL) *total_size = size;
|
||||
return (uint8_t*)start;
|
||||
|
@ -261,7 +261,7 @@ static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) {
|
||||
heap->page_count--;
|
||||
page->next = NULL;
|
||||
page->prev = NULL;
|
||||
// mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL);
|
||||
// mi_atomic_store_ptr_release(mi_atomic_cast(void*, &page->heap), NULL);
|
||||
mi_page_set_in_full(page,false);
|
||||
}
|
||||
|
||||
@ -276,7 +276,7 @@ static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_
|
||||
(mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
|
||||
|
||||
mi_page_set_in_full(page, mi_page_queue_is_full(queue));
|
||||
// mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), heap);
|
||||
// mi_atomic_store_ptr_release(mi_atomic_cast(void*, &page->heap), heap);
|
||||
page->next = queue->first;
|
||||
page->prev = NULL;
|
||||
if (queue->first != NULL) {
|
||||
@ -344,7 +344,7 @@ size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue
|
||||
for (mi_page_t* page = append->first; page != NULL; page = page->next) {
|
||||
// inline `mi_page_set_heap` to avoid wrong assertion during absorption;
|
||||
// in this case it is ok to be delayed freeing since both "to" and "from" heap are still alive.
|
||||
mi_atomic_write(&page->xheap, (uintptr_t)heap);
|
||||
mi_atomic_store_release(&page->xheap, (uintptr_t)heap);
|
||||
// set the flag to delayed free (not overriding NEVER_DELAYED_FREE) which has as a
|
||||
// side effect that it spins until any DELAYED_FREEING is finished. This ensures
|
||||
// that after appending only the new heap will be used for delayed free operations.
|
||||
|
30
src/page.c
30
src/page.c
@ -123,11 +123,11 @@ bool _mi_page_is_valid(mi_page_t* page) {
|
||||
#endif
|
||||
|
||||
void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never) {
|
||||
mi_thread_free_t tfree;
|
||||
mi_thread_free_t tfreex;
|
||||
mi_delayed_t old_delay;
|
||||
mi_thread_free_t tfree;
|
||||
do {
|
||||
tfree = mi_atomic_read(&page->xthread_free); // note: must acquire as we can break this loop and not do a CAS
|
||||
tfree = mi_atomic_load_acquire(&page->xthread_free); // note: must acquire as we can break/repeat this loop and not do a CAS;
|
||||
tfreex = mi_tf_set_delayed(tfree, delay);
|
||||
old_delay = mi_tf_delayed(tfree);
|
||||
if (mi_unlikely(old_delay == MI_DELAYED_FREEING)) {
|
||||
@ -141,7 +141,7 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool overrid
|
||||
break; // leave never-delayed flag set
|
||||
}
|
||||
} while ((old_delay == MI_DELAYED_FREEING) ||
|
||||
!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree));
|
||||
!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex));
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
@ -155,13 +155,12 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool overrid
|
||||
static void _mi_page_thread_free_collect(mi_page_t* page)
|
||||
{
|
||||
mi_block_t* head;
|
||||
mi_thread_free_t tfree;
|
||||
mi_thread_free_t tfreex;
|
||||
mi_thread_free_t tfree = mi_atomic_load_relaxed(&page->xthread_free);
|
||||
do {
|
||||
tfree = mi_atomic_read_relaxed(&page->xthread_free);
|
||||
head = mi_tf_block(tfree);
|
||||
tfreex = mi_tf_set_block(tfree,NULL);
|
||||
} while (!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree));
|
||||
} while (!mi_atomic_cas_weak_acq_rel(&page->xthread_free, &tfree, tfreex));
|
||||
|
||||
// return if the list is empty
|
||||
if (head == NULL) return;
|
||||
@ -273,11 +272,9 @@ static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) {
|
||||
(put there by other threads if they deallocated in a full page)
|
||||
----------------------------------------------------------- */
|
||||
void _mi_heap_delayed_free(mi_heap_t* heap) {
|
||||
// take over the list (note: no atomic exchange is it is often NULL)
|
||||
mi_block_t* block;
|
||||
do {
|
||||
block = mi_atomic_read_ptr_relaxed(mi_block_t,&heap->thread_delayed_free);
|
||||
} while (block != NULL && !mi_atomic_cas_ptr_weak(mi_block_t,&heap->thread_delayed_free, NULL, block));
|
||||
// take over the list (note: no atomic exchange since it is often NULL)
|
||||
mi_block_t* block = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free);
|
||||
while (block != NULL && !mi_atomic_cas_ptr_weak_acq_rel(mi_block_t, &heap->thread_delayed_free, &block, NULL)) { /* nothing */ };
|
||||
|
||||
// and free them all
|
||||
while(block != NULL) {
|
||||
@ -286,11 +283,10 @@ void _mi_heap_delayed_free(mi_heap_t* heap) {
|
||||
if (!_mi_free_delayed_block(block)) {
|
||||
// we might already start delayed freeing while another thread has not yet
|
||||
// reset the delayed_freeing flag; in that case delay it further by reinserting.
|
||||
mi_block_t* dfree;
|
||||
mi_block_t* dfree = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free);
|
||||
do {
|
||||
dfree = mi_atomic_read_ptr_relaxed(mi_block_t,&heap->thread_delayed_free);
|
||||
mi_block_set_nextx(heap, block, dfree, heap->keys);
|
||||
} while (!mi_atomic_cas_ptr_weak(mi_block_t,&heap->thread_delayed_free, block, dfree));
|
||||
} while (!mi_atomic_cas_ptr_weak_release(mi_block_t,&heap->thread_delayed_free, &dfree, block));
|
||||
}
|
||||
block = next;
|
||||
}
|
||||
@ -750,20 +746,20 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) {
|
||||
----------------------------------------------------------- */
|
||||
|
||||
static mi_deferred_free_fun* volatile deferred_free = NULL;
|
||||
static volatile _Atomic(void*) deferred_arg; // = NULL
|
||||
static _Atomic(void*) deferred_arg; // = NULL
|
||||
|
||||
void _mi_deferred_free(mi_heap_t* heap, bool force) {
|
||||
heap->tld->heartbeat++;
|
||||
if (deferred_free != NULL && !heap->tld->recurse) {
|
||||
heap->tld->recurse = true;
|
||||
deferred_free(force, heap->tld->heartbeat, mi_atomic_read_ptr_relaxed(void,&deferred_arg));
|
||||
deferred_free(force, heap->tld->heartbeat, mi_atomic_load_ptr_relaxed(void,&deferred_arg));
|
||||
heap->tld->recurse = false;
|
||||
}
|
||||
}
|
||||
|
||||
void mi_register_deferred_free(mi_deferred_free_fun* fn, void* arg) mi_attr_noexcept {
|
||||
deferred_free = fn;
|
||||
mi_atomic_write_ptr(void,&deferred_arg, arg);
|
||||
mi_atomic_store_ptr_release(void,&deferred_arg, arg);
|
||||
}
|
||||
|
||||
|
||||
|
35
src/random.c
35
src/random.c
@ -155,27 +155,36 @@ uintptr_t _mi_random_next(mi_random_ctx_t* ctx) {
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
To initialize a fresh random context we rely on the OS:
|
||||
- Windows : BCryptGenRandom
|
||||
- Windows : RtlGenRandom
|
||||
- osX,bsd,wasi: arc4random_buf
|
||||
- Linux : getrandom,/dev/urandom
|
||||
If we cannot get good randomness, we fall back to weak randomness based on a timer and ASLR.
|
||||
-----------------------------------------------------------------------------*/
|
||||
|
||||
#if defined(_WIN32)
|
||||
/*
|
||||
// We prefer BCryptGenRandom over RtlGenRandom but it leads to a crash a when using dynamic override combined with the C++ runtime :-(
|
||||
#pragma comment (lib,"bcrypt.lib")
|
||||
#include <bcrypt.h>
|
||||
static bool os_random_buf(void* buf, size_t buf_len) {
|
||||
return (BCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0);
|
||||
}
|
||||
/*
|
||||
#define SystemFunction036 NTAPI SystemFunction036
|
||||
#include <NTSecAPI.h>
|
||||
#undef SystemFunction036
|
||||
static bool os_random_buf(void* buf, size_t buf_len) {
|
||||
RtlGenRandom(buf, (ULONG)buf_len);
|
||||
return true;
|
||||
}
|
||||
*/
|
||||
#define RtlGenRandom SystemFunction036
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
BOOLEAN NTAPI RtlGenRandom(PVOID RandomBuffer, ULONG RandomBufferLength);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
static bool os_random_buf(void* buf, size_t buf_len) {
|
||||
mi_assert_internal(buf_len >= sizeof(uintptr_t));
|
||||
memset(buf, 0, buf_len);
|
||||
RtlGenRandom(buf, (ULONG)buf_len);
|
||||
return (((uintptr_t*)buf)[0] != 0); // sanity check (but RtlGenRandom should never fail)
|
||||
}
|
||||
|
||||
#elif defined(ANDROID) || defined(XP_DARWIN) || defined(__APPLE__) || defined(__DragonFly__) || \
|
||||
defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
|
||||
defined(__sun) || defined(__wasi__)
|
||||
@ -200,12 +209,12 @@ static bool os_random_buf(void* buf, size_t buf_len) {
|
||||
#ifndef GRND_NONBLOCK
|
||||
#define GRND_NONBLOCK (1)
|
||||
#endif
|
||||
static volatile _Atomic(uintptr_t) no_getrandom; // = 0
|
||||
if (mi_atomic_read(&no_getrandom)==0) {
|
||||
static _Atomic(uintptr_t) no_getrandom; // = 0
|
||||
if (mi_atomic_load_acquire(&no_getrandom)==0) {
|
||||
ssize_t ret = syscall(SYS_getrandom, buf, buf_len, GRND_NONBLOCK);
|
||||
if (ret >= 0) return (buf_len == (size_t)ret);
|
||||
if (ret != ENOSYS) return false;
|
||||
mi_atomic_write(&no_getrandom,1); // don't call again, and fall back to /dev/urandom
|
||||
mi_atomic_store_release(&no_getrandom,1); // don't call again, and fall back to /dev/urandom
|
||||
}
|
||||
#endif
|
||||
int flags = O_RDONLY;
|
||||
@ -234,7 +243,7 @@ static bool os_random_buf(void* buf, size_t buf_len) {
|
||||
#endif
|
||||
|
||||
#if defined(_WIN32)
|
||||
#include <windows.h>
|
||||
#include <Windows.h>
|
||||
#elif defined(__APPLE__)
|
||||
#include <mach/mach_time.h>
|
||||
#else
|
||||
|
60
src/region.c
60
src/region.c
@ -86,13 +86,13 @@ typedef union mi_region_info_u {
|
||||
// A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with
|
||||
// a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block.
|
||||
typedef struct mem_region_s {
|
||||
volatile _Atomic(uintptr_t) info; // mi_region_info_t.value
|
||||
volatile _Atomic(void*) start; // start of the memory area
|
||||
_Atomic(uintptr_t) info; // mi_region_info_t.value
|
||||
_Atomic(void*) start; // start of the memory area
|
||||
mi_bitmap_field_t in_use; // bit per in-use block
|
||||
mi_bitmap_field_t dirty; // track if non-zero per block
|
||||
mi_bitmap_field_t commit; // track if committed per block
|
||||
mi_bitmap_field_t reset; // track if reset per block
|
||||
volatile _Atomic(uintptr_t) arena_memid; // if allocated from a (huge page) arena
|
||||
_Atomic(uintptr_t) arena_memid; // if allocated from a (huge page) arena
|
||||
uintptr_t padding; // round to 8 fields
|
||||
} mem_region_t;
|
||||
|
||||
@ -100,7 +100,7 @@ typedef struct mem_region_s {
|
||||
static mem_region_t regions[MI_REGION_MAX];
|
||||
|
||||
// Allocated regions
|
||||
static volatile _Atomic(uintptr_t) regions_count; // = 0;
|
||||
static _Atomic(uintptr_t) regions_count; // = 0;
|
||||
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
@ -123,9 +123,9 @@ static size_t mi_good_commit_size(size_t size) {
|
||||
// Return if a pointer points into a region reserved by us.
|
||||
bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
|
||||
if (p==NULL) return false;
|
||||
size_t count = mi_atomic_read_relaxed(®ions_count);
|
||||
size_t count = mi_atomic_load_relaxed(®ions_count);
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
uint8_t* start = mi_atomic_read_ptr_relaxed(uint8_t,®ions[i].start);
|
||||
uint8_t* start = (uint8_t*)mi_atomic_load_ptr_relaxed(uint8_t, ®ions[i].start);
|
||||
if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true;
|
||||
}
|
||||
return false;
|
||||
@ -133,7 +133,7 @@ bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
|
||||
|
||||
|
||||
static void* mi_region_blocks_start(const mem_region_t* region, mi_bitmap_index_t bit_idx) {
|
||||
uint8_t* start = mi_atomic_read_ptr(uint8_t,®ion->start);
|
||||
uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t, &((mem_region_t*)region)->start);
|
||||
mi_assert_internal(start != NULL);
|
||||
return (start + (bit_idx * MI_SEGMENT_SIZE));
|
||||
}
|
||||
@ -171,7 +171,7 @@ static bool mi_memid_is_arena(size_t id, mem_region_t** region, mi_bitmap_index_
|
||||
static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld)
|
||||
{
|
||||
// not out of regions yet?
|
||||
if (mi_atomic_read_relaxed(®ions_count) >= MI_REGION_MAX - 1) return false;
|
||||
if (mi_atomic_load_relaxed(®ions_count) >= MI_REGION_MAX - 1) return false;
|
||||
|
||||
// try to allocate a fresh region from the OS
|
||||
bool region_commit = (commit && mi_option_is_enabled(mi_option_eager_region_commit));
|
||||
@ -184,9 +184,9 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large,
|
||||
mi_assert_internal(!region_large || region_commit);
|
||||
|
||||
// claim a fresh slot
|
||||
const uintptr_t idx = mi_atomic_increment(®ions_count);
|
||||
const uintptr_t idx = mi_atomic_increment_acq_rel(®ions_count);
|
||||
if (idx >= MI_REGION_MAX) {
|
||||
mi_atomic_decrement(®ions_count);
|
||||
mi_atomic_decrement_acq_rel(®ions_count);
|
||||
_mi_arena_free(start, MI_REGION_SIZE, arena_memid, region_commit, tld->stats);
|
||||
_mi_warning_message("maximum regions used: %zu GiB (perhaps recompile with a larger setting for MI_HEAP_REGION_MAX_SIZE)", _mi_divide_up(MI_HEAP_REGION_MAX_SIZE, GiB));
|
||||
return false;
|
||||
@ -195,13 +195,13 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large,
|
||||
// allocated, initialize and claim the initial blocks
|
||||
mem_region_t* r = ®ions[idx];
|
||||
r->arena_memid = arena_memid;
|
||||
mi_atomic_write(&r->in_use, 0);
|
||||
mi_atomic_write(&r->dirty, (is_zero ? 0 : MI_BITMAP_FIELD_FULL));
|
||||
mi_atomic_write(&r->commit, (region_commit ? MI_BITMAP_FIELD_FULL : 0));
|
||||
mi_atomic_write(&r->reset, 0);
|
||||
mi_atomic_store_release(&r->in_use, 0);
|
||||
mi_atomic_store_release(&r->dirty, (is_zero ? 0 : MI_BITMAP_FIELD_FULL));
|
||||
mi_atomic_store_release(&r->commit, (region_commit ? MI_BITMAP_FIELD_FULL : 0));
|
||||
mi_atomic_store_release(&r->reset, 0);
|
||||
*bit_idx = 0;
|
||||
mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL);
|
||||
mi_atomic_write_ptr(uint8_t*,&r->start, start);
|
||||
mi_atomic_store_ptr_release(uint8_t*,&r->start, start);
|
||||
|
||||
// and share it
|
||||
mi_region_info_t info;
|
||||
@ -209,7 +209,7 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large,
|
||||
info.x.valid = true;
|
||||
info.x.is_large = region_large;
|
||||
info.x.numa_node = (short)_mi_os_numa_node(tld);
|
||||
mi_atomic_write(&r->info, info.value); // now make it available to others
|
||||
mi_atomic_store_release(&r->info, info.value); // now make it available to others
|
||||
*region = r;
|
||||
return true;
|
||||
}
|
||||
@ -221,7 +221,7 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large,
|
||||
static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool allow_large ) {
|
||||
// initialized at all?
|
||||
mi_region_info_t info;
|
||||
info.value = mi_atomic_read_relaxed(®ion->info);
|
||||
info.value = mi_atomic_load_relaxed(&((mem_region_t*)region)->info);
|
||||
if (info.value==0) return false;
|
||||
|
||||
// numa correct
|
||||
@ -240,7 +240,7 @@ static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, boo
|
||||
static bool mi_region_try_claim(int numa_node, size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld)
|
||||
{
|
||||
// try all regions for a free slot
|
||||
const size_t count = mi_atomic_read(®ions_count);
|
||||
const size_t count = mi_atomic_load_acquire(®ions_count);
|
||||
size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? Starting at 0 seems to increase latency though
|
||||
for (size_t visited = 0; visited < count; visited++, idx++) {
|
||||
if (idx >= count) idx = 0; // wrap around
|
||||
@ -280,8 +280,8 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo
|
||||
mi_assert_internal(mi_bitmap_is_claimed(®ion->in_use, 1, blocks, bit_idx));
|
||||
|
||||
mi_region_info_t info;
|
||||
info.value = mi_atomic_read(®ion->info);
|
||||
uint8_t* start = mi_atomic_read_ptr(uint8_t,®ion->start);
|
||||
info.value = mi_atomic_load_acquire(®ion->info);
|
||||
uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t,®ion->start);
|
||||
mi_assert_internal(!(info.x.is_large && !*is_large));
|
||||
mi_assert_internal(start != NULL);
|
||||
|
||||
@ -400,7 +400,7 @@ void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_re
|
||||
const size_t blocks = mi_region_block_count(size);
|
||||
mi_assert_internal(blocks + bit_idx <= MI_BITMAP_FIELD_BITS);
|
||||
mi_region_info_t info;
|
||||
info.value = mi_atomic_read(®ion->info);
|
||||
info.value = mi_atomic_load_acquire(®ion->info);
|
||||
mi_assert_internal(info.value != 0);
|
||||
void* blocks_start = mi_region_blocks_start(region, bit_idx);
|
||||
mi_assert_internal(blocks_start == p); // not a pointer in our area?
|
||||
@ -442,23 +442,21 @@ void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_re
|
||||
-----------------------------------------------------------------------------*/
|
||||
void _mi_mem_collect(mi_os_tld_t* tld) {
|
||||
// free every region that has no segments in use.
|
||||
uintptr_t rcount = mi_atomic_read_relaxed(®ions_count);
|
||||
uintptr_t rcount = mi_atomic_load_relaxed(®ions_count);
|
||||
for (size_t i = 0; i < rcount; i++) {
|
||||
mem_region_t* region = ®ions[i];
|
||||
if (mi_atomic_read_relaxed(®ion->info) != 0) {
|
||||
if (mi_atomic_load_relaxed(®ion->info) != 0) {
|
||||
// if no segments used, try to claim the whole region
|
||||
uintptr_t m;
|
||||
do {
|
||||
m = mi_atomic_read_relaxed(®ion->in_use);
|
||||
} while(m == 0 && !mi_atomic_cas_weak(®ion->in_use, MI_BITMAP_FIELD_FULL, 0 ));
|
||||
uintptr_t m = mi_atomic_load_relaxed(®ion->in_use);
|
||||
while (m == 0 && !mi_atomic_cas_weak_release(®ion->in_use, &m, MI_BITMAP_FIELD_FULL)) { /* nothing */ };
|
||||
if (m == 0) {
|
||||
// on success, free the whole region
|
||||
uint8_t* start = mi_atomic_read_ptr(uint8_t,®ions[i].start);
|
||||
size_t arena_memid = mi_atomic_read_relaxed(®ions[i].arena_memid);
|
||||
uintptr_t commit = mi_atomic_read_relaxed(®ions[i].commit);
|
||||
uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t,®ions[i].start);
|
||||
size_t arena_memid = mi_atomic_load_relaxed(®ions[i].arena_memid);
|
||||
uintptr_t commit = mi_atomic_load_relaxed(®ions[i].commit);
|
||||
memset(®ions[i], 0, sizeof(mem_region_t));
|
||||
// and release the whole region
|
||||
mi_atomic_write(®ion->info, 0);
|
||||
mi_atomic_store_release(®ion->info, 0);
|
||||
if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) {
|
||||
_mi_abandoned_await_readers(); // ensure no pending reads
|
||||
_mi_arena_free(start, MI_REGION_SIZE, arena_memid, (~commit == 0), tld->stats);
|
||||
|
@ -685,6 +685,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
|
||||
}
|
||||
|
||||
// zero the segment info? -- not always needed as it is zero initialized from the OS
|
||||
mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL); // tsan
|
||||
if (!is_zero) {
|
||||
ptrdiff_t ofs = offsetof(mi_segment_t, next);
|
||||
size_t prefix = offsetof(mi_segment_t, slices) - ofs;
|
||||
@ -891,77 +892,75 @@ static mi_tagged_segment_t mi_tagged_segment(mi_segment_t* segment, mi_tagged_se
|
||||
// This is a list of visited abandoned pages that were full at the time.
|
||||
// this list migrates to `abandoned` when that becomes NULL. The use of
|
||||
// this list reduces contention and the rate at which segments are visited.
|
||||
static mi_decl_cache_align volatile _Atomic(mi_segment_t*) abandoned_visited; // = NULL
|
||||
static mi_decl_cache_align _Atomic(mi_segment_t*) abandoned_visited; // = NULL
|
||||
|
||||
// The abandoned page list (tagged as it supports pop)
|
||||
static mi_decl_cache_align volatile _Atomic(mi_tagged_segment_t) abandoned; // = NULL
|
||||
static mi_decl_cache_align _Atomic(mi_tagged_segment_t) abandoned; // = NULL
|
||||
|
||||
// We also maintain a count of current readers of the abandoned list
|
||||
// in order to prevent resetting/decommitting segment memory if it might
|
||||
// still be read.
|
||||
static mi_decl_cache_align volatile _Atomic(uintptr_t) abandoned_readers; // = 0
|
||||
static mi_decl_cache_align _Atomic(uintptr_t) abandoned_readers; // = 0
|
||||
|
||||
// Push on the visited list
|
||||
static void mi_abandoned_visited_push(mi_segment_t* segment) {
|
||||
mi_assert_internal(segment->thread_id == 0);
|
||||
mi_assert_internal(segment->abandoned_next == NULL);
|
||||
mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t,&segment->abandoned_next) == NULL);
|
||||
mi_assert_internal(segment->next == NULL);
|
||||
mi_assert_internal(segment->used > 0);
|
||||
mi_segment_t* anext;
|
||||
mi_segment_t* anext = mi_atomic_load_ptr_relaxed(mi_segment_t, &abandoned_visited);
|
||||
do {
|
||||
anext = mi_atomic_read_ptr_relaxed(mi_segment_t, &abandoned_visited);
|
||||
segment->abandoned_next = anext;
|
||||
} while (!mi_atomic_cas_ptr_weak(mi_segment_t, &abandoned_visited, segment, anext));
|
||||
mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, anext);
|
||||
} while (!mi_atomic_cas_ptr_weak_release(mi_segment_t, &abandoned_visited, &anext, segment));
|
||||
}
|
||||
|
||||
// Move the visited list to the abandoned list.
|
||||
static bool mi_abandoned_visited_revisit(void)
|
||||
{
|
||||
// quick check if the visited list is empty
|
||||
if (mi_atomic_read_ptr_relaxed(mi_segment_t,&abandoned_visited)==NULL) return false;
|
||||
if (mi_atomic_load_ptr_relaxed(mi_segment_t, &abandoned_visited) == NULL) return false;
|
||||
|
||||
// grab the whole visited list
|
||||
mi_segment_t* first = mi_atomic_exchange_ptr(mi_segment_t, &abandoned_visited, NULL);
|
||||
mi_segment_t* first = mi_atomic_exchange_ptr_acq_rel(mi_segment_t, &abandoned_visited, NULL);
|
||||
if (first == NULL) return false;
|
||||
|
||||
// first try to swap directly if the abandoned list happens to be NULL
|
||||
const mi_tagged_segment_t ts = mi_atomic_read_relaxed(&abandoned);
|
||||
mi_tagged_segment_t afirst;
|
||||
mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned);
|
||||
if (mi_tagged_segment_ptr(ts)==NULL) {
|
||||
afirst = mi_tagged_segment(first, ts);
|
||||
if (mi_atomic_cas_strong(&abandoned, afirst, ts)) return true;
|
||||
if (mi_atomic_cas_strong_acq_rel(&abandoned, &ts, afirst)) return true;
|
||||
}
|
||||
|
||||
// find the last element of the visited list: O(n)
|
||||
mi_segment_t* last = first;
|
||||
while (last->abandoned_next != NULL) {
|
||||
last = last->abandoned_next;
|
||||
mi_segment_t* next;
|
||||
while ((next = mi_atomic_load_ptr_relaxed(mi_segment_t, &last->abandoned_next)) != NULL) {
|
||||
last = next;
|
||||
}
|
||||
|
||||
// and atomically prepend to the abandoned list
|
||||
// (no need to increase the readers as we don't access the abandoned segments)
|
||||
mi_tagged_segment_t anext;
|
||||
mi_tagged_segment_t anext = mi_atomic_load_relaxed(&abandoned);
|
||||
do {
|
||||
anext = mi_atomic_read_relaxed(&abandoned);
|
||||
last->abandoned_next = mi_tagged_segment_ptr(anext);
|
||||
mi_atomic_store_ptr_release(mi_segment_t, &last->abandoned_next, mi_tagged_segment_ptr(anext));
|
||||
afirst = mi_tagged_segment(first, anext);
|
||||
} while (!mi_atomic_cas_weak(&abandoned, afirst, anext));
|
||||
} while (!mi_atomic_cas_weak_release(&abandoned, &anext, afirst));
|
||||
return true;
|
||||
}
|
||||
|
||||
// Push on the abandoned list.
|
||||
static void mi_abandoned_push(mi_segment_t* segment) {
|
||||
mi_assert_internal(segment->thread_id == 0);
|
||||
mi_assert_internal(segment->abandoned_next == NULL);
|
||||
mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL);
|
||||
mi_assert_internal(segment->next == NULL);
|
||||
mi_assert_internal(segment->used > 0);
|
||||
mi_tagged_segment_t ts;
|
||||
mi_tagged_segment_t next;
|
||||
mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned);
|
||||
do {
|
||||
ts = mi_atomic_read_relaxed(&abandoned);
|
||||
segment->abandoned_next = mi_tagged_segment_ptr(ts);
|
||||
mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, mi_tagged_segment_ptr(ts));
|
||||
next = mi_tagged_segment(segment, ts);
|
||||
} while (!mi_atomic_cas_weak(&abandoned, next, ts));
|
||||
} while (!mi_atomic_cas_weak_release(&abandoned, &ts, next));
|
||||
}
|
||||
|
||||
// Wait until there are no more pending reads on segments that used to be in the abandoned list
|
||||
@ -969,7 +968,7 @@ static void mi_abandoned_push(mi_segment_t* segment) {
|
||||
void _mi_abandoned_await_readers(void) {
|
||||
uintptr_t n;
|
||||
do {
|
||||
n = mi_atomic_read(&abandoned_readers);
|
||||
n = mi_atomic_load_acquire(&abandoned_readers);
|
||||
if (n != 0) mi_atomic_yield();
|
||||
} while (n != 0);
|
||||
}
|
||||
@ -978,7 +977,7 @@ void _mi_abandoned_await_readers(void) {
|
||||
static mi_segment_t* mi_abandoned_pop(void) {
|
||||
mi_segment_t* segment;
|
||||
// Check efficiently if it is empty (or if the visited list needs to be moved)
|
||||
mi_tagged_segment_t ts = mi_atomic_read_relaxed(&abandoned);
|
||||
mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned);
|
||||
segment = mi_tagged_segment_ptr(ts);
|
||||
if (mi_likely(segment == NULL)) {
|
||||
if (mi_likely(!mi_abandoned_visited_revisit())) { // try to swap in the visited list on NULL
|
||||
@ -988,19 +987,21 @@ static mi_segment_t* mi_abandoned_pop(void) {
|
||||
|
||||
// Do a pop. We use a reader count to prevent
|
||||
// a segment to be decommitted while a read is still pending,
|
||||
// and a tagged pointer to prevent A-B-A link corruption.
|
||||
mi_atomic_increment(&abandoned_readers); // ensure no segment gets decommitted
|
||||
// and a tagged pointer to prevent A-B-A link corruption.
|
||||
// (this is called from `region.c:_mi_mem_free` for example)
|
||||
mi_atomic_increment_relaxed(&abandoned_readers); // ensure no segment gets decommitted
|
||||
mi_tagged_segment_t next = 0;
|
||||
ts = mi_atomic_load_acquire(&abandoned);
|
||||
do {
|
||||
ts = mi_atomic_read(&abandoned);
|
||||
segment = mi_tagged_segment_ptr(ts);
|
||||
if (segment != NULL) {
|
||||
next = mi_tagged_segment(segment->abandoned_next, ts); // note: reads the segment's `abandoned_next` field so should not be decommitted
|
||||
mi_segment_t* anext = mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next);
|
||||
next = mi_tagged_segment(anext, ts); // note: reads the segment's `abandoned_next` field so should not be decommitted
|
||||
}
|
||||
} while (segment != NULL && !mi_atomic_cas_weak(&abandoned, next, ts));
|
||||
mi_atomic_decrement(&abandoned_readers); // release reader lock
|
||||
} while (segment != NULL && !mi_atomic_cas_weak_acq_rel(&abandoned, &ts, next));
|
||||
mi_atomic_decrement_relaxed(&abandoned_readers); // release reader lock
|
||||
if (segment != NULL) {
|
||||
segment->abandoned_next = NULL;
|
||||
mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL);
|
||||
}
|
||||
return segment;
|
||||
}
|
||||
@ -1012,7 +1013,7 @@ static mi_segment_t* mi_abandoned_pop(void) {
|
||||
static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
|
||||
mi_assert_internal(segment->used == segment->abandoned);
|
||||
mi_assert_internal(segment->used > 0);
|
||||
mi_assert_internal(segment->abandoned_next == NULL);
|
||||
mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL);
|
||||
mi_assert_internal(segment->abandoned_visits == 0);
|
||||
mi_assert_expensive(mi_segment_is_valid(segment,tld));
|
||||
|
||||
@ -1036,7 +1037,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
|
||||
_mi_stat_increase(&tld->stats->segments_abandoned, 1);
|
||||
mi_segments_track_size(-((long)mi_segment_size(segment)), tld);
|
||||
segment->thread_id = 0;
|
||||
segment->abandoned_next = NULL;
|
||||
mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL);
|
||||
segment->abandoned_visits = 1; // from 0 to 1 to signify it is abandoned
|
||||
mi_abandoned_push(segment);
|
||||
}
|
||||
@ -1118,7 +1119,7 @@ static bool mi_segment_check_free(mi_segment_t* segment, size_t slices_needed, s
|
||||
// Reclaim an abandoned segment; returns NULL if the segment was freed
|
||||
// set `right_page_reclaimed` to `true` if it reclaimed a page of the right `block_size` that was not full.
|
||||
static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, size_t requested_block_size, bool* right_page_reclaimed, mi_segments_tld_t* tld) {
|
||||
mi_assert_internal(segment->abandoned_next == NULL);
|
||||
mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL);
|
||||
mi_assert_expensive(mi_segment_is_valid(segment, tld));
|
||||
if (right_page_reclaimed != NULL) { *right_page_reclaimed = false; }
|
||||
|
||||
@ -1306,12 +1307,13 @@ void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block
|
||||
// huge page segments are always abandoned and can be freed immediately by any thread
|
||||
mi_assert_internal(segment->kind==MI_SEGMENT_HUGE);
|
||||
mi_assert_internal(segment == _mi_page_segment(page));
|
||||
mi_assert_internal(mi_atomic_read_relaxed(&segment->thread_id)==0);
|
||||
mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id)==0);
|
||||
|
||||
// claim it and free
|
||||
mi_heap_t* heap = mi_heap_get_default(); // issue #221; don't use the internal get_default_heap as we need to ensure the thread is initialized.
|
||||
// paranoia: if this it the last reference, the cas should always succeed
|
||||
if (mi_atomic_cas_strong(&segment->thread_id, heap->thread_id, 0)) {
|
||||
uintptr_t expected_tid = 0;
|
||||
if (mi_atomic_cas_strong_acq_rel(&segment->thread_id, &expected_tid, heap->thread_id)) {
|
||||
mi_block_set_next(page, block, page->free);
|
||||
page->free = block;
|
||||
page->used--;
|
||||
@ -1328,6 +1330,11 @@ void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block
|
||||
// mi_segments_track_size((long)segment->segment_size, tld);
|
||||
_mi_segment_page_free(page, true, &tld->segments);
|
||||
}
|
||||
#if (MI_DEBUG!=0)
|
||||
else {
|
||||
mi_assert_internal(false);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
@ -1371,7 +1378,7 @@ mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_segment
|
||||
#define MI_SEGMENT_MAP_SIZE (MI_SEGMENT_MAP_BITS / 8)
|
||||
#define MI_SEGMENT_MAP_WSIZE (MI_SEGMENT_MAP_SIZE / MI_INTPTR_SIZE)
|
||||
|
||||
static volatile _Atomic(uintptr_t) mi_segment_map[MI_SEGMENT_MAP_WSIZE]; // 2KiB per TB with 64MiB segments
|
||||
static _Atomic(uintptr_t) mi_segment_map[MI_SEGMENT_MAP_WSIZE]; // 2KiB per TB with 64MiB segments
|
||||
|
||||
static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) {
|
||||
mi_assert_internal(_mi_ptr_segment(segment) == segment); // is it aligned on MI_SEGMENT_SIZE?
|
||||
@ -1385,12 +1392,11 @@ static void mi_segment_map_allocated_at(const mi_segment_t* segment) {
|
||||
size_t index = mi_segment_map_index_of(segment, &bitidx);
|
||||
mi_assert_internal(index < MI_SEGMENT_MAP_WSIZE);
|
||||
if (index==0) return;
|
||||
uintptr_t mask;
|
||||
uintptr_t mask = mi_segment_map[index];
|
||||
uintptr_t newmask;
|
||||
do {
|
||||
mask = mi_segment_map[index];
|
||||
newmask = (mask | ((uintptr_t)1 << bitidx));
|
||||
} while (!mi_atomic_cas_weak(&mi_segment_map[index], newmask, mask));
|
||||
} while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask));
|
||||
}
|
||||
|
||||
static void mi_segment_map_freed_at(const mi_segment_t* segment) {
|
||||
@ -1398,12 +1404,11 @@ static void mi_segment_map_freed_at(const mi_segment_t* segment) {
|
||||
size_t index = mi_segment_map_index_of(segment, &bitidx);
|
||||
mi_assert_internal(index < MI_SEGMENT_MAP_WSIZE);
|
||||
if (index == 0) return;
|
||||
uintptr_t mask;
|
||||
uintptr_t mask = mi_segment_map[index];
|
||||
uintptr_t newmask;
|
||||
do {
|
||||
mask = mi_segment_map[index];
|
||||
do {
|
||||
newmask = (mask & ~((uintptr_t)1 << bitidx));
|
||||
} while (!mi_atomic_cas_weak(&mi_segment_map[index], newmask, mask));
|
||||
} while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask));
|
||||
}
|
||||
|
||||
// Determine the segment belonging to a pointer or NULL if it is not in a valid segment.
|
||||
|
28
src/stats.c
28
src/stats.c
@ -26,13 +26,13 @@ static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) {
|
||||
if (mi_is_in_main(stat))
|
||||
{
|
||||
// add atomically (for abandoned pages)
|
||||
mi_atomic_addi64(&stat->current,amount);
|
||||
mi_atomic_maxi64(&stat->peak, mi_atomic_readi64(&stat->current));
|
||||
int64_t current = mi_atomic_addi64_relaxed(&stat->current, amount);
|
||||
mi_atomic_maxi64_relaxed(&stat->peak, current + amount);
|
||||
if (amount > 0) {
|
||||
mi_atomic_addi64(&stat->allocated,amount);
|
||||
mi_atomic_addi64_relaxed(&stat->allocated,amount);
|
||||
}
|
||||
else {
|
||||
mi_atomic_addi64(&stat->freed, -amount);
|
||||
mi_atomic_addi64_relaxed(&stat->freed, -amount);
|
||||
}
|
||||
}
|
||||
else {
|
||||
@ -50,8 +50,8 @@ static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) {
|
||||
|
||||
void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount) {
|
||||
if (mi_is_in_main(stat)) {
|
||||
mi_atomic_addi64( &stat->count, 1 );
|
||||
mi_atomic_addi64( &stat->total, (int64_t)amount );
|
||||
mi_atomic_addi64_relaxed( &stat->count, 1 );
|
||||
mi_atomic_addi64_relaxed( &stat->total, (int64_t)amount );
|
||||
}
|
||||
else {
|
||||
stat->count++;
|
||||
@ -71,17 +71,17 @@ void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount) {
|
||||
static void mi_stat_add(mi_stat_count_t* stat, const mi_stat_count_t* src, int64_t unit) {
|
||||
if (stat==src) return;
|
||||
if (src->allocated==0 && src->freed==0) return;
|
||||
mi_atomic_addi64( &stat->allocated, src->allocated * unit);
|
||||
mi_atomic_addi64( &stat->current, src->current * unit);
|
||||
mi_atomic_addi64( &stat->freed, src->freed * unit);
|
||||
mi_atomic_addi64_relaxed( &stat->allocated, src->allocated * unit);
|
||||
mi_atomic_addi64_relaxed( &stat->current, src->current * unit);
|
||||
mi_atomic_addi64_relaxed( &stat->freed, src->freed * unit);
|
||||
// peak scores do not work across threads..
|
||||
mi_atomic_addi64( &stat->peak, src->peak * unit);
|
||||
mi_atomic_addi64_relaxed( &stat->peak, src->peak * unit);
|
||||
}
|
||||
|
||||
static void mi_stat_counter_add(mi_stat_counter_t* stat, const mi_stat_counter_t* src, int64_t unit) {
|
||||
if (stat==src) return;
|
||||
mi_atomic_addi64( &stat->total, src->total * unit);
|
||||
mi_atomic_addi64( &stat->count, src->count * unit);
|
||||
mi_atomic_addi64_relaxed( &stat->total, src->total * unit);
|
||||
mi_atomic_addi64_relaxed( &stat->count, src->count * unit);
|
||||
}
|
||||
|
||||
// must be thread safe as it is called from stats_merge
|
||||
@ -384,7 +384,7 @@ void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept {
|
||||
// Basic timer for convenience; use milli-seconds to avoid doubles
|
||||
// ----------------------------------------------------------------
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#include <Windows.h>
|
||||
static mi_msecs_t mi_to_msecs(LARGE_INTEGER t) {
|
||||
static LARGE_INTEGER mfreq; // = 0
|
||||
if (mfreq.QuadPart == 0LL) {
|
||||
@ -439,7 +439,7 @@ mi_msecs_t _mi_clock_end(mi_msecs_t start) {
|
||||
// --------------------------------------------------------
|
||||
|
||||
#if defined(_WIN32)
|
||||
#include <windows.h>
|
||||
#include <Windows.h>
|
||||
#include <psapi.h>
|
||||
#pragma comment(lib,"psapi.lib")
|
||||
|
||||
|
@ -19,7 +19,7 @@
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#include <Windows.h>
|
||||
static void msleep(unsigned long msecs) { Sleep(msecs); }
|
||||
#else
|
||||
#include <unistd.h>
|
||||
|
@ -189,7 +189,7 @@ static void test_stress(void) {
|
||||
}
|
||||
}
|
||||
// mi_collect(false);
|
||||
#ifndef NDEBUG
|
||||
#if !defined(NDEBUG) || defined(MI_TSAN)
|
||||
if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
|
||||
#endif
|
||||
}
|
||||
@ -260,7 +260,7 @@ static void (*thread_entry_fun)(intptr_t) = &stress;
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
#include <windows.h>
|
||||
#include <Windows.h>
|
||||
|
||||
static DWORD WINAPI thread_entry(LPVOID param) {
|
||||
thread_entry_fun((intptr_t)param);
|
||||
|
Loading…
Reference in New Issue
Block a user