Merge branch 'dev-exp-tls' into dev-exp

This commit is contained in:
daan 2020-02-09 18:34:23 -08:00
commit 609703a7f3
11 changed files with 338 additions and 184 deletions

View File

@ -5,11 +5,12 @@ set(CMAKE_C_STANDARD 11)
set(CMAKE_CXX_STANDARD 17)
option(MI_OVERRIDE "Override the standard malloc interface" ON)
option(MI_INTERPOSE "Use interpose to override standard malloc on macOS" ON)
option(MI_DEBUG_FULL "Use full internal heap invariant checking in DEBUG mode" OFF)
option(MI_SECURE "Use full security mitigations (like guard pages, allocation randomization, double-free mitigation, and free-list corruption detection)" OFF)
option(MI_USE_CXX "Use the C++ compiler to compile the library" OFF)
option(MI_SEE_ASM "Generate assembly files" OFF)
option(MI_INTERPOSE "Use interpose to override standard malloc on macOS" ON)
option(MI_OSX_ZONE "Use malloc zone to override standard malloc on macOS" OFF) # enables interpose as well
option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanism (Unix)" OFF)
option(MI_BUILD_TESTS "Build test executables" ON)
option(MI_CHECK_FULL "Use full internal invariant checking in DEBUG mode (deprecated, use MI_DEBUG_FULL instead)" OFF)
@ -61,14 +62,19 @@ endif()
if(MI_OVERRIDE MATCHES "ON")
message(STATUS "Override standard malloc (MI_OVERRIDE=ON)")
if(APPLE)
if(MI_OSX_ZONE MATCHES "ON")
# use zone's on macOS
message(STATUS " Use malloc zone to override malloc (MI_OSX_ZONE=ON)")
list(APPEND mi_sources src/alloc-override-osx.c)
if(NOT MI_INTERPOSE MATCHES "ON")
message(STATUS " (enabling INTERPOSE as well since zone's require this)")
set(MI_INTERPOSE "ON")
endif()
endif()
if(MI_INTERPOSE MATCHES "ON")
# use interpose on macOS
message(STATUS " Use interpose to override malloc (MI_INTERPOSE=ON)")
list(APPEND mi_defines MI_INTERPOSE)
else()
# use zone's on macOS
message(STATUS " Use zone's to override malloc (MI_INTERPOSE=OFF)")
list(APPEND mi_sources src/alloc-override-osx.c)
endif()
endif()
endif()
@ -247,7 +253,7 @@ if (MI_BUILD_TESTS MATCHES "ON")
target_compile_definitions(mimalloc-test-stress PRIVATE ${mi_defines})
target_compile_options(mimalloc-test-stress PRIVATE ${mi_cflags})
target_include_directories(mimalloc-test-stress PRIVATE include)
target_link_libraries(mimalloc-test-stress PRIVATE mimalloc-static ${mi_libraries})
target_link_libraries(mimalloc-test-stress PRIVATE mimalloc ${mi_libraries})
enable_testing()
add_test(test_api, mimalloc-test-api)

View File

@ -10,10 +10,6 @@ terms of the MIT license. A copy of the license can be found in the file
#include "mimalloc-types.h"
#if defined(MI_MALLOC_OVERRIDE) && (defined(__APPLE__) || defined(__OpenBSD__) || defined(__DragonFly__))
#define MI_TLS_RECURSE_GUARD
#endif
#if (MI_DEBUG>0)
#define mi_trace_message(...) _mi_trace_message(__VA_ARGS__)
#else
@ -33,7 +29,7 @@ terms of the MIT license. A copy of the license can be found in the file
#else
#define mi_decl_noinline
#define mi_decl_thread __thread // hope for the best :-)
#define mi_decl_cache_align
#define mi_decl_cache_align
#endif
@ -51,6 +47,7 @@ void _mi_random_init(mi_random_ctx_t* ctx);
void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx);
uintptr_t _mi_random_next(mi_random_ctx_t* ctx);
uintptr_t _mi_heap_random_next(mi_heap_t* heap);
uintptr_t _os_random_weak(uintptr_t extra_seed);
static inline uintptr_t _mi_random_shuffle(uintptr_t x);
// init.c
@ -237,7 +234,7 @@ static inline size_t _mi_wsize_from_size(size_t size) {
// Overflow detecting multiply
static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) {
static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) {
#if __has_builtin(__builtin_umul_overflow) || __GNUC__ >= 5
#include <limits.h> // UINT_MAX, ULONG_MAX
#if (SIZE_MAX == UINT_MAX)
@ -270,26 +267,76 @@ static inline bool mi_count_size_overflow(size_t count, size_t size, size_t* tot
}
/* -----------------------------------------------------------
The thread local default heap
----------------------------------------------------------- */
/* ----------------------------------------------------------------------------------------
The thread local default heap: `_mi_get_default_heap` returns the thread local heap.
On most platforms (Windows, Linux, FreeBSD, NetBSD, etc), this just returns a
__thread local variable (`_mi_heap_default`). With the initial-exec TLS model this ensures
that the storage will always be available (allocated on the thread stacks).
On some platforms though we cannot use that when overriding `malloc` since the underlying
TLS implementation (or the loader) will call itself `malloc` on a first access and recurse.
We try to circumvent this in an efficient way:
- macOSX : we use an unused TLS slot from the OS allocated slots (MI_TLS_SLOT). On OSX, the
loader itself calls `malloc` even before the modules are initialized.
- OpenBSD: we use an unused slot from the pthread block (MI_TLS_PTHREAD_SLOT_OFS).
- DragonFly: not yet working.
------------------------------------------------------------------------------------------- */
extern const mi_heap_t _mi_heap_empty; // read-only empty heap, initial value of the thread local default heap
extern mi_heap_t _mi_heap_main; // statically allocated main backing heap
extern bool _mi_process_is_initialized;
mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap
#if defined(MI_MALLOC_OVERRIDE)
#if defined(__MACH__) // OSX
#define MI_TLS_SLOT 89 // seems unused?
// other possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89)
// see <https://github.com/rweichler/substrate/blob/master/include/pthread_machdep.h>
#elif defined(__OpenBSD__)
// use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16)
// see <https://github.com/openbsd/src/blob/master/lib/libc/include/thread_private.h#L371>
#define MI_TLS_PTHREAD_SLOT_OFS (6*sizeof(int) + 4*sizeof(void*) + 24)
#elif defined(__DragonFly__)
#warning "mimalloc is not working correctly on DragonFly yet."
#define MI_TLS_PTHREAD_SLOT_OFS (4 + 1*sizeof(void*)) // offset `uniqueid` (also used by gdb?) <https://github.com/DragonFlyBSD/DragonFlyBSD/blob/master/lib/libthread_xu/thread/thr_private.h#L458>
#endif
#endif
#if defined(MI_TLS_SLOT)
static inline void* mi_tls_slot(size_t slot) mi_attr_noexcept; // forward declaration
#elif defined(MI_TLS_PTHREAD_SLOT_OFS)
#include <pthread.h>
static inline mi_heap_t** mi_tls_pthread_heap_slot(void) {
pthread_t self = pthread_self();
#if defined(__DragonFly__)
if (self==NULL) {
static mi_heap_t* pheap_main = _mi_heap_main_get();
return &pheap_main;
}
#endif
return (mi_heap_t**)((uint8_t*)self + MI_TLS_PTHREAD_SLOT_OFS);
}
#elif defined(MI_TLS_PTHREAD)
#include <pthread.h>
extern pthread_key_t _mi_heap_default_key;
#else
extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from
#endif
static inline mi_heap_t* mi_get_default_heap(void) {
#ifdef MI_TLS_RECURSE_GUARD
// on some BSD platforms, like macOS, the dynamic loader calls `malloc`
// to initialize thread local data. To avoid recursion, we need to avoid
// accessing the thread local `_mi_default_heap` until our module is loaded
// and use the statically allocated main heap until that time.
// TODO: patch ourselves dynamically to avoid this check every time?
if (!_mi_process_is_initialized) return &_mi_heap_main;
#endif
#if defined(MI_TLS_SLOT)
mi_heap_t* heap = (mi_heap_t*)mi_tls_slot(MI_TLS_SLOT);
return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap);
#elif defined(MI_TLS_PTHREAD_SLOT_OFS)
mi_heap_t* heap = *mi_tls_pthread_heap_slot();
return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap);
#elif defined(MI_TLS_PTHREAD)
mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key));
return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap);
#else
#if defined(MI_TLS_RECURSE_GUARD)
if (mi_unlikely(!_mi_process_is_initialized)) return _mi_heap_main_get();
#endif
return _mi_heap_default;
#endif
}
static inline bool mi_heap_is_default(const mi_heap_t* heap) {
@ -306,6 +353,8 @@ static inline bool mi_heap_is_initialized(mi_heap_t* heap) {
}
static inline uintptr_t _mi_ptr_cookie(const void* p) {
extern mi_heap_t _mi_heap_main;
mi_assert_internal(_mi_heap_main.cookie != 0);
return ((uintptr_t)p ^ _mi_heap_main.cookie);
}
@ -351,7 +400,7 @@ static inline uintptr_t _mi_segment_page_idx_of(const mi_segment_t* segment, con
// Get the page containing the pointer
static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) {
uintptr_t idx = _mi_segment_page_idx_of(segment, p);
uintptr_t idx = _mi_segment_page_idx_of(segment, p);
return &((mi_segment_t*)segment)->pages[idx];
}
@ -424,14 +473,14 @@ static inline mi_thread_free_t mi_tf_set_block(mi_thread_free_t tf, mi_block_t*
return mi_tf_make(block, mi_tf_delayed(tf));
}
// are all blocks in a page freed?
// are all blocks in a page freed?
// note: needs up-to-date used count, (as the `xthread_free` list may not be empty). see `_mi_page_collect_free`.
static inline bool mi_page_all_free(const mi_page_t* page) {
mi_assert_internal(page != NULL);
return (page->used == 0);
}
// are there any available blocks?
// are there any available blocks?
static inline bool mi_page_has_any_available(const mi_page_t* page) {
mi_assert_internal(page != NULL && page->reserved > 0);
return (page->used < page->reserved || (mi_page_thread_free(page) != NULL));
@ -479,11 +528,11 @@ static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) {
/* -------------------------------------------------------------------
Encoding/Decoding the free list next pointers
This is to protect against buffer overflow exploits where the
free list is mutated. Many hardened allocators xor the next pointer `p`
This is to protect against buffer overflow exploits where the
free list is mutated. Many hardened allocators xor the next pointer `p`
with a secret key `k1`, as `p^k1`. This prevents overwriting with known
values but might be still too weak: if the attacker can guess
the pointer `p` this can reveal `k1` (since `p^k1^p == k1`).
values but might be still too weak: if the attacker can guess
the pointer `p` this can reveal `k1` (since `p^k1^p == k1`).
Moreover, if multiple blocks can be read as well, the attacker can
xor both as `(p1^k1) ^ (p2^k1) == p1^p2` which may reveal a lot
about the pointers (and subsequently `k1`).
@ -491,9 +540,9 @@ about the pointers (and subsequently `k1`).
Instead mimalloc uses an extra key `k2` and encodes as `((p^k2)<<<k1)+k1`.
Since these operations are not associative, the above approaches do not
work so well any more even if the `p` can be guesstimated. For example,
for the read case we can subtract two entries to discard the `+k1` term,
for the read case we can subtract two entries to discard the `+k1` term,
but that leads to `((p1^k2)<<<k1) - ((p2^k2)<<<k1)` at best.
We include the left-rotation since xor and addition are otherwise linear
We include the left-rotation since xor and addition are otherwise linear
in the lowest bit. Finally, both keys are unique per page which reduces
the re-use of keys by a large factor.
@ -619,9 +668,8 @@ static inline size_t _mi_os_numa_node_count(void) {
// -------------------------------------------------------------------
// Getting the thread id should be performant
// as it is called in the fast path of `_mi_free`,
// so we specialize for various platforms.
// Getting the thread id should be performant as it is called in the
// fast path of `_mi_free` and we specialize for various platforms.
// -------------------------------------------------------------------
#if defined(_WIN32)
#define WIN32_LEAN_AND_MEAN
@ -630,24 +678,55 @@ static inline uintptr_t _mi_thread_id(void) mi_attr_noexcept {
// Windows: works on Intel and ARM in both 32- and 64-bit
return (uintptr_t)NtCurrentTeb();
}
#elif (defined(__GNUC__) || defined(__clang__)) && \
#elif defined(__GNUC__) && \
(defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))
// TLS register on x86 is in the FS or GS register
// see: https://akkadia.org/drepper/tls.pdf
// TLS register on x86 is in the FS or GS register, see: https://akkadia.org/drepper/tls.pdf
static inline void* mi_tls_slot(size_t slot) mi_attr_noexcept {
void* res;
const size_t ofs = (slot*sizeof(void*));
#if defined(__i386__)
__asm__("movl %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // 32-bit always uses GS
#elif defined(__MACH__) && defined(__x86_64__)
__asm__("movq %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 macOSX uses GS
#elif defined(__x86_64__)
__asm__("movq %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 Linux, BSD uses FS
#elif defined(__arm__)
void** tcb; UNUSED(ofs);
asm volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb));
res = tcb[slot];
#elif defined(__aarch64__)
void** tcb; UNUSED(ofs);
asm volatile ("mrs %0, tpidr_el0" : "=r" (tcb));
res = tcb[slot];
#endif
return res;
}
// setting is only used on macOSX for now
static inline void mi_tls_slot_set(size_t slot, void* value) mi_attr_noexcept {
const size_t ofs = (slot*sizeof(void*));
#if defined(__i386__)
__asm__("movl %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // 32-bit always uses GS
#elif defined(__MACH__) && defined(__x86_64__)
__asm__("movq %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 macOSX uses GS
#elif defined(__x86_64__)
__asm__("movq %1,%%fs:%1" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 Linux, BSD uses FS
#elif defined(__arm__)
void** tcb; UNUSED(ofs);
asm volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb));
tcb[slot] = value;
#elif defined(__aarch64__)
void** tcb; UNUSED(ofs);
asm volatile ("mrs %0, tpidr_el0" : "=r" (tcb));
tcb[slot] = value;
#endif
}
static inline uintptr_t _mi_thread_id(void) mi_attr_noexcept {
uintptr_t tid;
#if defined(__i386__)
__asm__("movl %%gs:0, %0" : "=r" (tid) : : ); // 32-bit always uses GS
#elif defined(__MACH__)
__asm__("movq %%gs:0, %0" : "=r" (tid) : : ); // x86_64 macOS uses GS
#elif defined(__x86_64__)
__asm__("movq %%fs:0, %0" : "=r" (tid) : : ); // x86_64 Linux, BSD uses FS
#elif defined(__arm__)
asm volatile ("mrc p15, 0, %0, c13, c0, 3" : "=r" (tid));
#elif defined(__aarch64__)
asm volatile ("mrs %0, tpidr_el0" : "=r" (tid));
#endif
return tid;
// in all our targets, slot 0 is the pointer to the thread control block
return (uintptr_t)mi_tls_slot(0);
}
#else
// otherwise use standard C

View File

@ -32,8 +32,8 @@ terms of the MIT license. A copy of the license can be found in the file
void* operator new[](std::size_t n, const std::nothrow_t& tag) noexcept { (void)(tag); return mi_new_nothrow(n); }
#if (__cplusplus >= 201402L || _MSC_VER >= 1916)
void operator delete (void* p, std::size_t n) { mi_free_size(p,n); };
void operator delete[](void* p, std::size_t n) { mi_free_size(p,n); };
void operator delete (void* p, std::size_t n) noexcept { mi_free_size(p,n); };
void operator delete[](void* p, std::size_t n) noexcept { mi_free_size(p,n); };
#endif
#if (__cplusplus > 201402L || defined(__cpp_aligned_new))

View File

@ -17,6 +17,12 @@ terms of the MIT license. A copy of the license can be found in the file
/* ------------------------------------------------------
Override system malloc on macOS
This is done through the malloc zone interface.
It seems we also need to interpose (see `alloc-override.c`)
or otherwise we get zone errors as there are usually
already allocations done by the time we take over the
zone. Unfortunately, that means we need to replace
the `free` with a checked free (`cfree`) impacting
performance.
------------------------------------------------------ */
#include <AvailabilityMacros.h>
@ -35,34 +41,42 @@ extern malloc_zone_t* malloc_default_purgeable_zone(void) __attribute__((weak_im
------------------------------------------------------ */
static size_t zone_size(malloc_zone_t* zone, const void* p) {
UNUSED(zone); UNUSED(p);
return 0; // as we cannot guarantee that `p` comes from us, just return 0
}
static void* zone_malloc(malloc_zone_t* zone, size_t size) {
UNUSED(zone);
return mi_malloc(size);
}
static void* zone_calloc(malloc_zone_t* zone, size_t count, size_t size) {
UNUSED(zone);
return mi_calloc(count, size);
}
static void* zone_valloc(malloc_zone_t* zone, size_t size) {
UNUSED(zone);
return mi_malloc_aligned(size, _mi_os_page_size());
}
static void zone_free(malloc_zone_t* zone, void* p) {
UNUSED(zone);
return mi_free(p);
}
static void* zone_realloc(malloc_zone_t* zone, void* p, size_t newsize) {
UNUSED(zone);
return mi_realloc(p, newsize);
}
static void* zone_memalign(malloc_zone_t* zone, size_t alignment, size_t size) {
UNUSED(zone);
return mi_malloc_aligned(size,alignment);
}
static void zone_destroy(malloc_zone_t* zone) {
UNUSED(zone);
// todo: ignore for now?
}
@ -83,11 +97,13 @@ static void zone_batch_free(malloc_zone_t* zone, void** ps, unsigned count) {
}
static size_t zone_pressure_relief(malloc_zone_t* zone, size_t size) {
UNUSED(zone); UNUSED(size);
mi_collect(false);
return 0;
}
static void zone_free_definite_size(malloc_zone_t* zone, void* p, size_t size) {
UNUSED(size);
zone_free(zone,p);
}
@ -102,34 +118,43 @@ static kern_return_t intro_enumerator(task_t task, void* p,
vm_range_recorder_t recorder)
{
// todo: enumerate all memory
UNUSED(task); UNUSED(p); UNUSED(type_mask); UNUSED(zone_address);
UNUSED(reader); UNUSED(recorder);
return KERN_SUCCESS;
}
static size_t intro_good_size(malloc_zone_t* zone, size_t size) {
UNUSED(zone);
return mi_good_size(size);
}
static boolean_t intro_check(malloc_zone_t* zone) {
UNUSED(zone);
return true;
}
static void intro_print(malloc_zone_t* zone, boolean_t verbose) {
UNUSED(zone); UNUSED(verbose);
mi_stats_print(NULL);
}
static void intro_log(malloc_zone_t* zone, void* p) {
UNUSED(zone); UNUSED(p);
// todo?
}
static void intro_force_lock(malloc_zone_t* zone) {
UNUSED(zone);
// todo?
}
static void intro_force_unlock(malloc_zone_t* zone) {
UNUSED(zone);
// todo?
}
static void intro_statistics(malloc_zone_t* zone, malloc_statistics_t* stats) {
UNUSED(zone);
// todo...
stats->blocks_in_use = 0;
stats->size_in_use = 0;
@ -138,6 +163,7 @@ static void intro_statistics(malloc_zone_t* zone, malloc_statistics_t* stats) {
}
static boolean_t intro_zone_locked(malloc_zone_t* zone) {
UNUSED(zone);
return false;
}
@ -161,7 +187,6 @@ static malloc_zone_t* mi_get_default_zone()
}
}
static void __attribute__((constructor)) _mi_macos_override_malloc()
{
static malloc_introspection_t intro;
@ -201,6 +226,7 @@ static void __attribute__((constructor)) _mi_macos_override_malloc()
zone.free_definite_size = &zone_free_definite_size;
zone.pressure_relief = &zone_pressure_relief;
intro.zone_locked = &intro_zone_locked;
intro.statistics = &intro_statistics;
// force the purgeable zone to exist to avoid strange bugs
if (malloc_default_purgeable_zone) {
@ -225,6 +251,7 @@ static void __attribute__((constructor)) _mi_macos_override_malloc()
malloc_zone_unregister(purgeable_zone);
malloc_zone_register(purgeable_zone);
}
}
#endif // MI_MALLOC_OVERRIDE

View File

@ -13,7 +13,7 @@ terms of the MIT license. A copy of the license can be found in the file
#error "It is only possible to override "malloc" on Windows when building as a DLL (and linking the C runtime as a DLL)"
#endif
#if defined(MI_MALLOC_OVERRIDE) && !defined(_WIN32)
#if defined(MI_MALLOC_OVERRIDE) && !(defined(_WIN32)) // || (defined(__MACH__) && !defined(MI_INTERPOSE)))
// ------------------------------------------------------
// Override system malloc
@ -47,26 +47,31 @@ terms of the MIT license. A copy of the license can be found in the file
const void* replacement;
const void* target;
};
#define MI_INTERPOSEX(oldfun,newfun) { (const void*)&newfun, (const void*)&oldfun }
#define MI_INTERPOSE_MI(fun) MI_INTERPOSEX(fun,mi_##fun)
#define MI_INTERPOSE_FUN(oldfun,newfun) { (const void*)&newfun, (const void*)&oldfun }
#define MI_INTERPOSE_MI(fun) MI_INTERPOSE_FUN(fun,mi_##fun)
__attribute__((used)) static struct mi_interpose_s _mi_interposes[] __attribute__((section("__DATA, __interpose"))) =
{
MI_INTERPOSE_MI(malloc),
MI_INTERPOSE_MI(calloc),
MI_INTERPOSE_MI(realloc),
MI_INTERPOSE_MI(free),
MI_INTERPOSE_MI(strdup),
MI_INTERPOSE_MI(strndup)
MI_INTERPOSE_MI(strndup),
MI_INTERPOSE_MI(realpath),
MI_INTERPOSE_MI(posix_memalign),
MI_INTERPOSE_MI(reallocf),
MI_INTERPOSE_MI(valloc),
// some code allocates from a zone but deallocates using plain free :-( (like NxHashResizeToCapacity <https://github.com/nneonneo/osx-10.9-opensource/blob/master/objc4-551.1/runtime/hashtable2.mm>)
MI_INTERPOSE_FUN(free,mi_cfree), // use safe free that checks if pointers are from us
};
#elif defined(_MSC_VER)
// cannot override malloc unless using a dll.
// we just override new/delete which does work in a static library.
#else
// On all other systems forward to our API
void* malloc(size_t size) mi_attr_noexcept MI_FORWARD1(mi_malloc, size);
void* calloc(size_t size, size_t n) mi_attr_noexcept MI_FORWARD2(mi_calloc, size, n);
void* realloc(void* p, size_t newsize) mi_attr_noexcept MI_FORWARD2(mi_realloc, p, newsize);
void free(void* p) mi_attr_noexcept MI_FORWARD0(mi_free, p);
void* malloc(size_t size) MI_FORWARD1(mi_malloc, size);
void* calloc(size_t size, size_t n) MI_FORWARD2(mi_calloc, size, n);
void* realloc(void* p, size_t newsize) MI_FORWARD2(mi_realloc, p, newsize);
void free(void* p) MI_FORWARD0(mi_free, p);
#endif
#if (defined(__GNUC__) || defined(__clang__)) && !defined(__MACH__)
@ -94,8 +99,8 @@ terms of the MIT license. A copy of the license can be found in the file
void* operator new[](std::size_t n, const std::nothrow_t& tag) noexcept { UNUSED(tag); return mi_new_nothrow(n); }
#if (__cplusplus >= 201402L || _MSC_VER >= 1916)
void operator delete (void* p, std::size_t n) MI_FORWARD02(mi_free_size,p,n);
void operator delete[](void* p, std::size_t n) MI_FORWARD02(mi_free_size,p,n);
void operator delete (void* p, std::size_t n) noexcept MI_FORWARD02(mi_free_size,p,n);
void operator delete[](void* p, std::size_t n) noexcept MI_FORWARD02(mi_free_size,p,n);
#endif
#if (__cplusplus > 201402L || defined(__cpp_aligned_new)) && (!defined(__GNUC__) || (__GNUC__ > 5))
@ -194,4 +199,3 @@ int posix_memalign(void** p, size_t alignment, size_t size) { return mi_posix_me
#endif
#endif // MI_MALLOC_OVERRIDE && !_WIN32

View File

@ -212,7 +212,7 @@ static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* bl
size_t delta;
bool ok = mi_page_decode_padding(page, block, &delta, &bsize);
mi_assert_internal(ok); mi_assert_internal(delta <= bsize);
return (ok ? bsize - delta : 0);
return (ok ? bsize - delta : 0);
}
static bool mi_verify_padding(const mi_page_t* page, const mi_block_t* block, size_t* size, size_t* wrong) {
@ -259,7 +259,7 @@ static void mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, co
mi_padding_t* padding = (mi_padding_t*)((uint8_t*)block + bsize);
padding->delta = (uint32_t)new_delta;
}
#else
#else
static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) {
UNUSED(page);
UNUSED(block);
@ -359,7 +359,7 @@ static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block
}
else if (mi_unlikely(mi_page_is_in_full(page))) {
_mi_page_unfull(page);
}
}
}
else {
_mi_free_block_mt(page,block);
@ -401,7 +401,7 @@ void mi_free(void* p) mi_attr_noexcept
"(this may still be a valid very large allocation (over 64MiB))\n", p);
if (mi_likely(_mi_ptr_cookie(segment) == segment->cookie)) {
_mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p);
}
}
}
#endif
#if (MI_DEBUG!=0 || MI_SECURE>=4)
@ -421,11 +421,11 @@ void mi_free(void* p) mi_attr_noexcept
mi_heap_stat_decrease(heap, malloc, bsize);
if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { // huge page stats are accounted for in `_mi_page_retire`
mi_heap_stat_decrease(heap, normal[_mi_bin(bsize)], 1);
}
}
#endif
if (mi_likely(tid == segment->thread_id && page->flags.full_aligned == 0)) { // the thread id matches and it is not a full page, nor has aligned blocks
// local, and not full or aligned
// local, and not full or aligned
if (mi_unlikely(mi_check_is_double_free(page,block))) return;
mi_check_padding(page, block);
#if (MI_DEBUG!=0)
@ -436,7 +436,7 @@ void mi_free(void* p) mi_attr_noexcept
page->used--;
if (mi_unlikely(mi_page_all_free(page))) {
_mi_page_retire(page);
}
}
}
else {
// non-local, aligned blocks, or a full page; use the more generic path
@ -473,7 +473,7 @@ size_t mi_usable_size(const void* p) mi_attr_noexcept {
const mi_segment_t* const segment = _mi_ptr_segment(p);
const mi_page_t* const page = _mi_segment_page_of(segment, p);
const mi_block_t* const block = (const mi_block_t*)p;
const size_t size = mi_page_usable_size_of(page, block);
const size_t size = mi_page_usable_size_of(page, block);
if (mi_unlikely(mi_page_has_aligned(page))) {
ptrdiff_t const adjust = (uint8_t*)p - (uint8_t*)_mi_page_ptr_unalign(segment,page,p);
mi_assert_internal(adjust >= 0 && (size_t)adjust <= size);

View File

@ -34,7 +34,7 @@ const mi_page_t _mi_page_empty = {
#if defined(MI_PADDING) && (MI_INTPTR_SIZE >= 8)
#define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY(), MI_PAGE_EMPTY() }
#elif defined(MI_PADDING)
#elif defined(MI_PADDING)
#define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY(), MI_PAGE_EMPTY(), MI_PAGE_EMPTY() }
#else
#define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY() }
@ -107,32 +107,28 @@ mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty;
#define tld_main_stats ((mi_stats_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,stats)))
#define tld_main_os ((mi_os_tld_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,os)))
extern mi_heap_t _mi_heap_main;
static mi_tld_t tld_main = {
0, false,
&_mi_heap_main,
{ { NULL, NULL }, {NULL ,NULL}, {NULL ,NULL, 0},
0, 0, 0, 0, 0, 0, NULL,
tld_main_stats, tld_main_os
{ { NULL, NULL }, {NULL ,NULL}, {NULL ,NULL, 0},
0, 0, 0, 0, 0, 0, NULL,
tld_main_stats, tld_main_os
}, // segments
{ 0, tld_main_stats }, // os
{ MI_STATS_NULL } // stats
};
#if MI_INTPTR_SIZE==8
#define MI_INIT_COOKIE (0xCDCDCDCDCDCDCDCDUL)
#else
#define MI_INIT_COOKIE (0xCDCDCDCDUL)
#endif
mi_heap_t _mi_heap_main = {
&tld_main,
MI_SMALL_PAGES_EMPTY,
MI_PAGE_QUEUES_EMPTY,
ATOMIC_VAR_INIT(NULL),
0, // thread id
MI_INIT_COOKIE, // initial cookie
{ MI_INIT_COOKIE, MI_INIT_COOKIE }, // the key of the main heap can be fixed (unlike page keys that need to be secure!)
{ {0}, {0}, 0 }, // random
0, // initial cookie
{ 0, 0 }, // the key of the main heap can be fixed (unlike page keys that need to be secure!)
{ {0x846ca68b}, {0}, 0 }, // random
0, // page count
false // can reclaim
};
@ -142,6 +138,22 @@ bool _mi_process_is_initialized = false; // set to `true` in `mi_process_init`.
mi_stats_t _mi_stats_main = { MI_STATS_NULL };
static void mi_heap_main_init(void) {
if (_mi_heap_main.cookie == 0) {
_mi_heap_main.thread_id = _mi_thread_id();
_mi_heap_main.cookie = _os_random_weak((uintptr_t)&mi_heap_main_init);
_mi_random_init(&_mi_heap_main.random);
_mi_heap_main.keys[0] = _mi_heap_random_next(&_mi_heap_main);
_mi_heap_main.keys[1] = _mi_heap_random_next(&_mi_heap_main);
}
}
mi_heap_t* _mi_heap_main_get(void) {
mi_heap_main_init();
return &_mi_heap_main;
}
/* -----------------------------------------------------------
Initialization and freeing of the thread local heaps
----------------------------------------------------------- */
@ -154,14 +166,16 @@ typedef struct mi_thread_data_s {
// Initialize the thread local default heap, called from `mi_thread_init`
static bool _mi_heap_init(void) {
if (mi_heap_is_initialized(_mi_heap_default)) return true;
if (mi_heap_is_initialized(mi_get_default_heap())) return true;
if (_mi_is_main_thread()) {
// mi_assert_internal(_mi_heap_main.thread_id != 0); // can happen on freeBSD where alloc is called before any initialization
// the main heap is statically allocated
mi_heap_main_init();
_mi_heap_set_default_direct(&_mi_heap_main);
mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_get_default_heap());
//mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_get_default_heap());
}
else {
// use `_mi_os_alloc` to allocate directly from the OS
// use `_mi_os_alloc` to allocate directly from the OS
mi_thread_data_t* td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t),&_mi_stats_main); // Todo: more efficient allocation?
if (td == NULL) {
_mi_error_message(ENOMEM, "failed to allocate thread local heap memory\n");
@ -176,7 +190,7 @@ static bool _mi_heap_init(void) {
heap->cookie = _mi_heap_random_next(heap) | 1;
heap->keys[0] = _mi_heap_random_next(heap);
heap->keys[1] = _mi_heap_random_next(heap);
heap->tld = tld;
heap->tld = tld;
tld->heap_backing = heap;
tld->segments.stats = &tld->stats;
tld->segments.os = &tld->os;
@ -253,14 +267,15 @@ static void _mi_thread_done(mi_heap_t* default_heap);
// use thread local storage keys to detect thread ending
#include <windows.h>
#include <fibersapi.h>
static DWORD mi_fls_key;
static DWORD mi_fls_key = (DWORD)(-1);
static void NTAPI mi_fls_done(PVOID value) {
if (value!=NULL) _mi_thread_done((mi_heap_t*)value);
}
#elif defined(MI_USE_PTHREADS)
// use pthread locol storage keys to detect thread ending
// use pthread local storage keys to detect thread ending
// (and used with MI_TLS_PTHREADS for the default heap)
#include <pthread.h>
static pthread_key_t mi_pthread_key;
pthread_key_t _mi_heap_default_key = (pthread_key_t)(-1);
static void mi_pthread_done(void* value) {
if (value!=NULL) _mi_thread_done((mi_heap_t*)value);
}
@ -280,8 +295,10 @@ static void mi_process_setup_auto_thread_done(void) {
#elif defined(_WIN32) && !defined(MI_SHARED_LIB)
mi_fls_key = FlsAlloc(&mi_fls_done);
#elif defined(MI_USE_PTHREADS)
pthread_key_create(&mi_pthread_key, &mi_pthread_done);
mi_assert_internal(_mi_heap_default_key == (pthread_key_t)(-1));
pthread_key_create(&_mi_heap_default_key, &mi_pthread_done);
#endif
_mi_heap_set_default_direct(&_mi_heap_main);
}
@ -323,21 +340,31 @@ static void _mi_thread_done(mi_heap_t* heap) {
void _mi_heap_set_default_direct(mi_heap_t* heap) {
mi_assert_internal(heap != NULL);
#if defined(MI_TLS_SLOT)
mi_tls_slot_set(MI_TLS_SLOT,heap);
#elif defined(MI_TLS_PTHREAD_SLOT_OFS)
*mi_tls_pthread_heap_slot() = heap;
#elif defined(MI_TLS_PTHREAD)
// we use _mi_heap_default_key
#else
_mi_heap_default = heap;
#endif
// ensure the default heap is passed to `_mi_thread_done`
// setting to a non-NULL value also ensures `mi_thread_done` is called.
#if defined(_WIN32) && defined(MI_SHARED_LIB)
// nothing to do as it is done in DllMain
#elif defined(_WIN32) && !defined(MI_SHARED_LIB)
mi_assert_internal(mi_fls_key != 0);
FlsSetValue(mi_fls_key, heap);
#elif defined(MI_USE_PTHREADS)
pthread_setspecific(mi_pthread_key, heap);
if (_mi_heap_default_key != (pthread_key_t)(-1)) { // can happen during recursive invocation on freeBSD
pthread_setspecific(_mi_heap_default_key, heap);
}
#endif
}
// --------------------------------------------------------
// Run functions on process init/done, and thread init/done
// --------------------------------------------------------
@ -389,11 +416,16 @@ static void mi_allocator_done() {
// Called once by the process loader
static void mi_process_load(void) {
mi_heap_main_init();
#if defined(MI_TLS_RECURSE_GUARD)
volatile mi_heap_t* dummy = _mi_heap_default; // access TLS to allocate it before setting tls_initialized to true;
UNUSED(dummy);
#endif
os_preloading = false;
atexit(&mi_process_done);
_mi_options_init();
mi_process_init();
//mi_stats_reset();
//mi_stats_reset();-
if (mi_redirected) _mi_verbose_message("malloc is redirected.\n");
// show message from the redirector (if present)
@ -408,22 +440,12 @@ static void mi_process_load(void) {
void mi_process_init(void) mi_attr_noexcept {
// ensure we are called once
if (_mi_process_is_initialized) return;
// access _mi_heap_default before setting _mi_process_is_initialized to ensure
// that the TLS slot is allocated without getting into recursion on macOS
// when using dynamic linking with interpose.
mi_get_default_heap();
_mi_process_is_initialized = true;
_mi_heap_main.thread_id = _mi_thread_id();
_mi_verbose_message("process init: 0x%zx\n", _mi_heap_main.thread_id);
_mi_random_init(&_mi_heap_main.random);
#ifndef __APPLE__ // TODO: fix this? cannot update cookie if allocation already happened..
_mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main);
_mi_heap_main.keys[0] = _mi_heap_random_next(&_mi_heap_main);
_mi_heap_main.keys[1] = _mi_heap_random_next(&_mi_heap_main);
#endif
mi_process_setup_auto_thread_done();
_mi_verbose_message("process init: 0x%zx\n", _mi_thread_id());
_mi_os_init();
mi_heap_main_init();
#if (MI_DEBUG)
_mi_verbose_message("debug level : %d\n", MI_DEBUG);
#endif

View File

@ -70,7 +70,11 @@ static mi_option_desc_t options[_mi_option_last] =
{ 1, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free
{ 0, UNINIT, MI_OPTION(abandoned_page_reset) },// reset free page memory when a thread terminates
{ 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit)
#if defined(__NetBSD__)
{ 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed
#else
{ 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed
#endif
{ 100, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds
{ 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes.
{ 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose
@ -239,16 +243,30 @@ static volatile _Atomic(uintptr_t) error_count; // = 0; // when MAX_ERROR_COUNT
// inside the C runtime causes another message.
static mi_decl_thread bool recurse = false;
static bool mi_recurse_enter(void) {
#ifdef MI_TLS_RECURSE_GUARD
if (_mi_preloading()) return true;
#endif
if (recurse) return false;
recurse = true;
return true;
}
static void mi_recurse_exit(void) {
#ifdef MI_TLS_RECURSE_GUARD
if (_mi_preloading()) return;
#endif
recurse = false;
}
void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message) {
if (recurse) return;
if (!mi_recurse_enter()) return;
if (out==NULL || (FILE*)out==stdout || (FILE*)out==stderr) { // TODO: use mi_out_stderr for stderr?
out = mi_out_get_default(&arg);
}
recurse = true;
if (prefix != NULL) out(prefix,arg);
out(message,arg);
recurse = false;
return;
mi_recurse_exit();
}
// Define our own limited `fprintf` that avoids memory allocation.
@ -256,14 +274,12 @@ void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* me
static void mi_vfprintf( mi_output_fun* out, void* arg, const char* prefix, const char* fmt, va_list args ) {
char buf[512];
if (fmt==NULL) return;
if (recurse) return;
recurse = true;
if (!mi_recurse_enter()) return;
vsnprintf(buf,sizeof(buf)-1,fmt,args);
recurse = false;
mi_recurse_exit();
_mi_fputs(out,arg,prefix,buf);
}
void _mi_fprintf( mi_output_fun* out, void* arg, const char* fmt, ... ) {
va_list args;
va_start(args,fmt);
@ -290,7 +306,7 @@ void _mi_verbose_message(const char* fmt, ...) {
static void mi_show_error_message(const char* fmt, va_list args) {
if (!mi_option_is_enabled(mi_option_show_errors) && !mi_option_is_enabled(mi_option_verbose)) return;
if (mi_atomic_increment(&error_count) > mi_max_error_count) return;
mi_vfprintf(NULL, NULL, "mimalloc: error: ", fmt, args);
mi_vfprintf(NULL, NULL, "mimalloc: error: ", fmt, args);
}
void _mi_warning_message(const char* fmt, ...) {

View File

@ -11,7 +11,7 @@ terms of the MIT license. A copy of the license can be found in the file
/* ----------------------------------------------------------------------------
We use our own PRNG to keep predictable performance of random number generation
and to avoid implementations that use a lock. We only use the OS provided
and to avoid implementations that use a lock. We only use the OS provided
random source to initialize the initial seeds. Since we do not need ultimate
performance but we do rely on the security (for secret cookies in secure mode)
we use a cryptographically secure generator (chacha20).
@ -21,11 +21,11 @@ we use a cryptographically secure generator (chacha20).
/* ----------------------------------------------------------------------------
Chacha20 implementation as the original algorithm with a 64-bit nonce
Chacha20 implementation as the original algorithm with a 64-bit nonce
and counter: https://en.wikipedia.org/wiki/Salsa20
The input matrix has sixteen 32-bit values:
Position 0 to 3: constant key
Position 4 to 11: the key
Position 4 to 11: the key
Position 12 to 13: the counter.
Position 14 to 15: the nonce.
@ -44,8 +44,8 @@ static inline void qround(uint32_t x[16], size_t a, size_t b, size_t c, size_t d
x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 7);
}
static void chacha_block(mi_random_ctx_t* ctx)
{
static void chacha_block(mi_random_ctx_t* ctx)
{
// scramble into `x`
uint32_t x[16];
for (size_t i = 0; i < 16; i++) {
@ -72,8 +72,8 @@ static void chacha_block(mi_random_ctx_t* ctx)
ctx->input[12] += 1;
if (ctx->input[12] == 0) {
ctx->input[13] += 1;
if (ctx->input[13] == 0) { // and keep increasing into the nonce
ctx->input[14] += 1;
if (ctx->input[13] == 0) { // and keep increasing into the nonce
ctx->input[14] += 1;
}
}
}
@ -83,7 +83,7 @@ static uint32_t chacha_next32(mi_random_ctx_t* ctx) {
chacha_block(ctx);
ctx->output_available = 16; // (assign again to suppress static analysis warning)
}
const uint32_t x = ctx->output[16 - ctx->output_available];
const uint32_t x = ctx->output[16 - ctx->output_available];
ctx->output[16 - ctx->output_available] = 0; // reset once the data is handed out
ctx->output_available--;
return x;
@ -94,9 +94,9 @@ static inline uint32_t read32(const uint8_t* p, size_t idx32) {
return ((uint32_t)p[i+0] | (uint32_t)p[i+1] << 8 | (uint32_t)p[i+2] << 16 | (uint32_t)p[i+3] << 24);
}
static void chacha_init(mi_random_ctx_t* ctx, const uint8_t key[32], uint64_t nonce)
static void chacha_init(mi_random_ctx_t* ctx, const uint8_t key[32], uint64_t nonce)
{
// since we only use chacha for randomness (and not encryption) we
// since we only use chacha for randomness (and not encryption) we
// do not _need_ to read 32-bit values as little endian but we do anyways
// just for being compatible :-)
memset(ctx, 0, sizeof(*ctx));
@ -110,7 +110,7 @@ static void chacha_init(mi_random_ctx_t* ctx, const uint8_t key[32], uint64_t no
ctx->input[12] = 0;
ctx->input[13] = 0;
ctx->input[14] = (uint32_t)nonce;
ctx->input[15] = (uint32_t)(nonce >> 32);
ctx->input[15] = (uint32_t)(nonce >> 32);
}
static void chacha_split(mi_random_ctx_t* ctx, uint64_t nonce, mi_random_ctx_t* ctx_new) {
@ -184,7 +184,7 @@ static bool os_random_buf(void* buf, size_t buf_len) {
arc4random_buf(buf, buf_len);
return true;
}
#elif defined(__linux__)
#elif defined(__linux__)
#include <sys/syscall.h>
#include <unistd.h>
#include <sys/types.h>
@ -241,8 +241,8 @@ static bool os_random_buf(void* buf, size_t buf_len) {
#include <time.h>
#endif
static uintptr_t os_random_weak(uintptr_t extra_seed) {
uintptr_t x = (uintptr_t)&os_random_weak ^ extra_seed; // ASLR makes the address random
uintptr_t _os_random_weak(uintptr_t extra_seed) {
uintptr_t x = (uintptr_t)&_os_random_weak ^ extra_seed; // ASLR makes the address random
#if defined(_WIN32)
LARGE_INTEGER pcount;
QueryPerformanceCounter(&pcount);
@ -267,10 +267,10 @@ static uintptr_t os_random_weak(uintptr_t extra_seed) {
void _mi_random_init(mi_random_ctx_t* ctx) {
uint8_t key[32];
if (!os_random_buf(key, sizeof(key))) {
// if we fail to get random data from the OS, we fall back to a
// if we fail to get random data from the OS, we fall back to a
// weak random source based on the current time
_mi_warning_message("unable to use secure randomness\n");
uintptr_t x = os_random_weak(0);
uintptr_t x = _os_random_weak(0);
for (size_t i = 0; i < 8; i++) { // key is eight 32-bit words.
x = _mi_random_shuffle(x);
((uint32_t*)key)[i] = (uint32_t)x;
@ -280,7 +280,7 @@ void _mi_random_init(mi_random_ctx_t* ctx) {
}
/* --------------------------------------------------------
test vectors from <https://tools.ietf.org/html/rfc8439>
test vectors from <https://tools.ietf.org/html/rfc8439>
----------------------------------------------------------- */
/*
static bool array_equals(uint32_t* x, uint32_t* y, size_t n) {

View File

@ -17,9 +17,9 @@ static uint8_t* mi_segment_raw_page_start(const mi_segment_t* segment, const mi_
/* --------------------------------------------------------------------------------
Segment allocation
We allocate pages inside bigger "segments" (4mb on 64-bit). This is to avoid
splitting VMA's on Linux and reduce fragmentation on other OS's.
Each thread owns its own segments.
We allocate pages inside bigger "segments" (4mb on 64-bit). This is to avoid
splitting VMA's on Linux and reduce fragmentation on other OS's.
Each thread owns its own segments.
Currently we have:
- small pages (64kb), 64 in one segment
@ -154,14 +154,14 @@ static bool mi_segment_is_valid(const mi_segment_t* segment, mi_segments_tld_t*
for (size_t i = 0; i < segment->capacity; i++) {
const mi_page_t* const page = &segment->pages[i];
if (!page->segment_in_use) {
nfree++;
nfree++;
}
if (page->segment_in_use || page->is_reset) {
mi_assert_expensive(!mi_pages_reset_contains(page, tld));
}
}
mi_assert_internal(nfree + segment->used == segment->capacity);
mi_assert_internal(segment->thread_id == _mi_thread_id() || (segment->thread_id==0)); // or 0
// mi_assert_internal(segment->thread_id == _mi_thread_id() || (segment->thread_id==0)); // or 0
mi_assert_internal(segment->page_kind == MI_PAGE_HUGE ||
(mi_segment_page_size(segment) * segment->capacity == segment->segment_size));
return true;
@ -287,7 +287,7 @@ static void mi_pages_reset_add(mi_segment_t* segment, mi_page_t* page, mi_segmen
mi_assert_expensive(!mi_pages_reset_contains(page, tld));
mi_assert_internal(_mi_page_segment(page)==segment);
if (!mi_option_is_enabled(mi_option_page_reset)) return;
if (segment->mem_is_fixed || page->segment_in_use || !page->is_committed || page->is_reset) return;
if (segment->mem_is_fixed || page->segment_in_use || !page->is_committed || page->is_reset) return;
if (mi_option_get(mi_option_reset_delay) == 0) {
// reset immediately?
@ -296,7 +296,7 @@ static void mi_pages_reset_add(mi_segment_t* segment, mi_page_t* page, mi_segmen
else {
// otherwise push on the delayed page reset queue
mi_page_queue_t* pq = &tld->pages_reset;
// push on top
// push on top
mi_page_reset_set_expire(page);
page->next = pq->first;
page->prev = NULL;
@ -317,7 +317,7 @@ static void mi_pages_reset_remove(mi_page_t* page, mi_segments_tld_t* tld) {
mi_page_queue_t* pq = &tld->pages_reset;
mi_assert_internal(pq!=NULL);
mi_assert_internal(!page->segment_in_use);
mi_assert_internal(mi_pages_reset_contains(page, tld));
mi_assert_internal(mi_pages_reset_contains(page, tld));
if (page->prev != NULL) page->prev->next = page->next;
if (page->next != NULL) page->next->prev = page->prev;
if (page == pq->last) pq->last = page->prev;
@ -333,19 +333,19 @@ static void mi_pages_reset_remove_all_in_segment(mi_segment_t* segment, bool for
if (!page->segment_in_use && page->is_committed && !page->is_reset) {
mi_pages_reset_remove(page, tld);
if (force_reset) {
mi_page_reset(segment, page, 0, tld);
mi_page_reset(segment, page, 0, tld);
}
}
else {
mi_assert_internal(mi_page_not_in_queue(page,tld));
}
}
}
}
static void mi_reset_delayed(mi_segments_tld_t* tld) {
if (!mi_option_is_enabled(mi_option_page_reset)) return;
mi_msecs_t now = _mi_clock_now();
mi_page_queue_t* pq = &tld->pages_reset;
mi_page_queue_t* pq = &tld->pages_reset;
// from oldest up to the first that has not expired yet
mi_page_t* page = pq->last;
while (page != NULL && mi_page_reset_is_expired(page,now)) {
@ -359,7 +359,7 @@ static void mi_reset_delayed(mi_segments_tld_t* tld) {
pq->last = page;
if (page != NULL){
page->next = NULL;
}
}
else {
pq->first = NULL;
}
@ -540,7 +540,7 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) {
}
mi_assert_internal(tld->cache_count == 0);
mi_assert_internal(tld->cache == NULL);
#if MI_DEBUG>=2
#if MI_DEBUG>=2
if (!_mi_is_main_thread()) {
mi_assert_internal(tld->pages_reset.first == NULL);
mi_assert_internal(tld->pages_reset.last == NULL);
@ -684,7 +684,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) {
UNUSED(force);
mi_assert(segment != NULL);
mi_assert(segment != NULL);
// note: don't reset pages even on abandon as the whole segment is freed? (and ready for reuse)
bool force_reset = (force && mi_option_is_enabled(mi_option_abandoned_page_reset));
mi_pages_reset_remove_all_in_segment(segment, force_reset, tld);
@ -716,7 +716,7 @@ static bool mi_segment_has_free(const mi_segment_t* segment) {
static void mi_segment_page_claim(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) {
mi_assert_internal(_mi_page_segment(page) == segment);
mi_assert_internal(!page->segment_in_use);
mi_assert_internal(!page->segment_in_use);
// set in-use before doing unreset to prevent delayed reset
mi_pages_reset_remove(page, tld);
page->segment_in_use = true;
@ -756,7 +756,7 @@ static void mi_segment_page_claim(mi_segment_t* segment, mi_page_t* page, mi_seg
static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld);
// clear page data; can be called on abandoned segments
static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, bool allow_reset, mi_segments_tld_t* tld)
static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, bool allow_reset, mi_segments_tld_t* tld)
{
mi_assert_internal(page->segment_in_use);
mi_assert_internal(mi_page_all_free(page));
@ -791,7 +791,7 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, bool a
segment->used--;
// add to the free page list for reuse/reset
if (allow_reset) {
if (allow_reset) {
mi_pages_reset_add(segment, page, tld);
}
@ -848,12 +848,12 @@ Note: the current implementation is one possible design;
another way might be to keep track of abandoned segments
in the regions. This would have the advantage of keeping
all concurrent code in one place and not needing to deal
with ABA issues. The drawback is that it is unclear how to
scan abandoned segments efficiently in that case as they
with ABA issues. The drawback is that it is unclear how to
scan abandoned segments efficiently in that case as they
would be spread among all other segments in the regions.
----------------------------------------------------------- */
// Use the bottom 20-bits (on 64-bit) of the aligned segment pointers
// Use the bottom 20-bits (on 64-bit) of the aligned segment pointers
// to put in a tag that increments on update to avoid the A-B-A problem.
#define MI_TAGGED_MASK MI_SEGMENT_MASK
typedef uintptr_t mi_tagged_segment_t;
@ -869,7 +869,7 @@ static mi_tagged_segment_t mi_tagged_segment(mi_segment_t* segment, mi_tagged_se
}
// This is a list of visited abandoned pages that were full at the time.
// this list migrates to `abandoned` when that becomes NULL. The use of
// this list migrates to `abandoned` when that becomes NULL. The use of
// this list reduces contention and the rate at which segments are visited.
static mi_decl_cache_align volatile _Atomic(mi_segment_t*) abandoned_visited; // = NULL
@ -895,7 +895,7 @@ static void mi_abandoned_visited_push(mi_segment_t* segment) {
}
// Move the visited list to the abandoned list.
static bool mi_abandoned_visited_revisit(void)
static bool mi_abandoned_visited_revisit(void)
{
// quick check if the visited list is empty
if (mi_atomic_read_ptr_relaxed(mi_segment_t,&abandoned_visited)==NULL) return false;
@ -961,12 +961,12 @@ static mi_segment_t* mi_abandoned_pop(void) {
segment = mi_tagged_segment_ptr(ts);
if (mi_likely(segment == NULL)) {
if (mi_likely(!mi_abandoned_visited_revisit())) { // try to swap in the visited list on NULL
return NULL;
return NULL;
}
}
// Do a pop. We use a reader count to prevent
// a segment to be decommitted while a read is still pending,
// a segment to be decommitted while a read is still pending,
// and a tagged pointer to prevent A-B-A link corruption.
// (this is called from `memory.c:_mi_mem_free` for example)
mi_atomic_increment(&abandoned_readers); // ensure no segment gets decommitted
@ -1031,7 +1031,7 @@ void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) {
----------------------------------------------------------- */
// Possibly clear pages and check if free space is available
static bool mi_segment_check_free(mi_segment_t* segment, size_t block_size, bool* all_pages_free)
static bool mi_segment_check_free(mi_segment_t* segment, size_t block_size, bool* all_pages_free)
{
mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE);
bool has_page = false;
@ -1039,17 +1039,17 @@ static bool mi_segment_check_free(mi_segment_t* segment, size_t block_size, bool
size_t pages_used_empty = 0;
for (size_t i = 0; i < segment->capacity; i++) {
mi_page_t* page = &segment->pages[i];
if (page->segment_in_use) {
if (page->segment_in_use) {
pages_used++;
// ensure used count is up to date and collect potential concurrent frees
_mi_page_free_collect(page, false);
_mi_page_free_collect(page, false);
if (mi_page_all_free(page)) {
// if everything free already, page can be reused for some block size
// note: don't clear the page yet as we can only OS reset it once it is reclaimed
pages_used_empty++;
has_page = true;
}
else if (page->xblock_size == block_size && mi_page_has_any_available(page)) {
else if (page->xblock_size == block_size && mi_page_has_any_available(page)) {
// a page has available free blocks of the right size
has_page = true;
}
@ -1058,7 +1058,7 @@ static bool mi_segment_check_free(mi_segment_t* segment, size_t block_size, bool
// whole empty page
has_page = true;
}
}
}
mi_assert_internal(pages_used == segment->used && pages_used >= pages_used_empty);
if (all_pages_free != NULL) {
*all_pages_free = ((pages_used - pages_used_empty) == 0);
@ -1107,7 +1107,7 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap,
if (right_page_reclaimed != NULL) { *right_page_reclaimed = true; }
}
}
}
}
else if (page->is_committed && !page->is_reset) { // not in-use, and not reset yet
// note: do not reset as this includes pages that were not touched before
// mi_pages_reset_add(segment, page, tld);
@ -1148,17 +1148,17 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size,
// free the segment (by forced reclaim) to make it available to other threads.
// note1: we prefer to free a segment as that might lead to reclaiming another
// segment that is still partially used.
// note2: we could in principle optimize this by skipping reclaim and directly
// note2: we could in principle optimize this by skipping reclaim and directly
// freeing but that would violate some invariants temporarily)
mi_segment_reclaim(segment, heap, 0, NULL, tld);
}
else if (has_page && segment->page_kind == page_kind) {
// found a free page of the right kind, or page of the right block_size with free space
// found a free page of the right kind, or page of the right block_size with free space
// we return the result of reclaim (which is usually `segment`) as it might free
// the segment due to concurrent frees (in which case `NULL` is returned).
return mi_segment_reclaim(segment, heap, block_size, reclaimed, tld);
}
else if (segment->abandoned_visits >= 3) {
else if (segment->abandoned_visits >= 3) {
// always reclaim on 3rd visit to limit the list length.
mi_segment_reclaim(segment, heap, 0, NULL, tld);
}
@ -1172,12 +1172,12 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size,
/* -----------------------------------------------------------
Reclaim or allocate
Reclaim or allocate
----------------------------------------------------------- */
static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t block_size, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t block_size, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
{
mi_assert_internal(page_kind <= MI_PAGE_LARGE);
mi_assert_internal(page_kind <= MI_PAGE_LARGE);
mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE);
// 1. try to get a segment from our cache
mi_segment_t* segment = mi_segment_cache_pop(MI_SEGMENT_SIZE, tld);
@ -1198,7 +1198,7 @@ static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t block_s
return segment;
}
// 3. otherwise allocate a fresh segment
return mi_segment_alloc(0, page_kind, page_shift, tld, os_tld);
return mi_segment_alloc(0, page_kind, page_shift, tld, os_tld);
}
@ -1223,11 +1223,11 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t*
// Allocate a page inside a segment. Requires that the page has free pages
static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tld_t* tld) {
mi_assert_internal(mi_segment_has_free(segment));
return mi_segment_find_free(segment, tld);
return mi_segment_find_free(segment, tld);
}
static mi_page_t* mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_page_kind_t kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
// find an available segment the segment free queue
// find an available segment the segment free queue
mi_segment_queue_t* const free_queue = mi_segment_free_queue_of_kind(kind, tld);
if (mi_segment_queue_is_empty(free_queue)) {
// possibly allocate or reclaim a fresh segment
@ -1312,7 +1312,7 @@ void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block
}
/* -----------------------------------------------------------
Page allocation
Page allocation
----------------------------------------------------------- */
mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {

View File

@ -188,7 +188,7 @@ static void test_stress(void) {
free_items(p);
}
}
mi_collect(false);
// mi_collect(false);
#ifndef NDEBUG
if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
#endif
@ -206,7 +206,7 @@ static void leak(intptr_t tid) {
}
}
static void test_leak(void) {
static void test_leak(void) {
for (int n = 0; n < ITER; n++) {
run_os_threads(THREADS, &leak);
mi_collect(false);
@ -242,14 +242,14 @@ int main(int argc, char** argv) {
// Run ITER full iterations where half the objects in the transfer buffer survive to the next round.
srand(0x7feb352d);
mi_stats_reset();
// mi_stats_reset();
#ifdef STRESS
test_stress();
#else
test_leak();
#endif
#endif
mi_collect(true);
// mi_collect(true);
mi_stats_print(NULL);
//bench_end_program();
return 0;
@ -262,7 +262,7 @@ static void (*thread_entry_fun)(intptr_t) = &stress;
#include <windows.h>
static DWORD WINAPI thread_entry(LPVOID param) {
static DWORD WINAPI thread_entry(LPVOID param) {
thread_entry_fun((intptr_t)param);
return 0;
}