first working tls on macOS using interpose; still slow

This commit is contained in:
daan 2020-01-29 22:46:44 -08:00
parent b3dae128de
commit 03b363a1c2
9 changed files with 155 additions and 120 deletions

View File

@ -247,7 +247,7 @@ if (MI_BUILD_TESTS MATCHES "ON")
target_compile_definitions(mimalloc-test-stress PRIVATE ${mi_defines})
target_compile_options(mimalloc-test-stress PRIVATE ${mi_cflags})
target_include_directories(mimalloc-test-stress PRIVATE include)
target_link_libraries(mimalloc-test-stress PRIVATE mimalloc-static ${mi_libraries})
target_link_libraries(mimalloc-test-stress PRIVATE mimalloc ${mi_libraries})
enable_testing()
add_test(test_api, mimalloc-test-api)

View File

@ -33,7 +33,7 @@ terms of the MIT license. A copy of the license can be found in the file
#else
#define mi_decl_noinline
#define mi_decl_thread __thread // hope for the best :-)
#define mi_decl_cache_align
#define mi_decl_cache_align
#endif
@ -51,6 +51,7 @@ void _mi_random_init(mi_random_ctx_t* ctx);
void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx);
uintptr_t _mi_random_next(mi_random_ctx_t* ctx);
uintptr_t _mi_heap_random_next(mi_heap_t* heap);
uintptr_t _os_random_weak(uintptr_t extra_seed);
static inline uintptr_t _mi_random_shuffle(uintptr_t x);
// init.c
@ -233,7 +234,7 @@ static inline size_t _mi_wsize_from_size(size_t size) {
// Overflow detecting multiply
static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) {
static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) {
#if __has_builtin(__builtin_umul_overflow) || __GNUC__ >= 5
#include <limits.h> // UINT_MAX, ULONG_MAX
#if (SIZE_MAX == UINT_MAX)
@ -274,18 +275,24 @@ extern const mi_heap_t _mi_heap_empty; // read-only empty heap, initial value o
extern mi_heap_t _mi_heap_main; // statically allocated main backing heap
extern bool _mi_process_is_initialized;
extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from
static inline mi_heap_t* mi_get_default_heap(void) {
#ifdef MI_TLS_RECURSE_GUARD
extern mi_heap_t* _mi_get_default_heap_tls_safe(void);
static inline mi_heap_t* mi_get_default_heap(void) {
// on some BSD platforms, like macOS, the dynamic loader calls `malloc`
// to initialize thread local data. To avoid recursion, we need to avoid
// accessing the thread local `_mi_default_heap` until our module is loaded
// and use the statically allocated main heap until that time.
// TODO: patch ourselves dynamically to avoid this check every time?
if (!_mi_process_is_initialized) return &_mi_heap_main;
#endif
return _mi_get_default_heap_tls_safe();
#else
extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from
static inline mi_heap_t* mi_get_default_heap(void) {
return _mi_heap_default;
#endif
}
static inline bool mi_heap_is_default(const mi_heap_t* heap) {
@ -302,6 +309,7 @@ static inline bool mi_heap_is_initialized(mi_heap_t* heap) {
}
static inline uintptr_t _mi_ptr_cookie(const void* p) {
mi_assert_internal(_mi_heap_main.cookie != 0);
return ((uintptr_t)p ^ _mi_heap_main.cookie);
}
@ -345,7 +353,7 @@ static inline uintptr_t _mi_segment_page_idx_of(const mi_segment_t* segment, con
// Get the page containing the pointer
static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) {
uintptr_t idx = _mi_segment_page_idx_of(segment, p);
uintptr_t idx = _mi_segment_page_idx_of(segment, p);
return &((mi_segment_t*)segment)->pages[idx];
}
@ -411,14 +419,14 @@ static inline mi_thread_free_t mi_tf_set_block(mi_thread_free_t tf, mi_block_t*
return mi_tf_make(block, mi_tf_delayed(tf));
}
// are all blocks in a page freed?
// are all blocks in a page freed?
// note: needs up-to-date used count, (as the `xthread_free` list may not be empty). see `_mi_page_collect_free`.
static inline bool mi_page_all_free(const mi_page_t* page) {
mi_assert_internal(page != NULL);
return (page->used == 0);
}
// are there any available blocks?
// are there any available blocks?
static inline bool mi_page_has_any_available(const mi_page_t* page) {
mi_assert_internal(page != NULL && page->reserved > 0);
return (page->used < page->reserved || (mi_page_thread_free(page) != NULL));
@ -466,11 +474,11 @@ static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) {
/* -------------------------------------------------------------------
Encoding/Decoding the free list next pointers
This is to protect against buffer overflow exploits where the
free list is mutated. Many hardened allocators xor the next pointer `p`
This is to protect against buffer overflow exploits where the
free list is mutated. Many hardened allocators xor the next pointer `p`
with a secret key `k1`, as `p^k1`. This prevents overwriting with known
values but might be still too weak: if the attacker can guess
the pointer `p` this can reveal `k1` (since `p^k1^p == k1`).
values but might be still too weak: if the attacker can guess
the pointer `p` this can reveal `k1` (since `p^k1^p == k1`).
Moreover, if multiple blocks can be read as well, the attacker can
xor both as `(p1^k1) ^ (p2^k1) == p1^p2` which may reveal a lot
about the pointers (and subsequently `k1`).
@ -478,9 +486,9 @@ about the pointers (and subsequently `k1`).
Instead mimalloc uses an extra key `k2` and encodes as `((p^k2)<<<k1)+k1`.
Since these operations are not associative, the above approaches do not
work so well any more even if the `p` can be guesstimated. For example,
for the read case we can subtract two entries to discard the `+k1` term,
for the read case we can subtract two entries to discard the `+k1` term,
but that leads to `((p1^k2)<<<k1) - ((p2^k2)<<<k1)` at best.
We include the left-rotation since xor and addition are otherwise linear
We include the left-rotation since xor and addition are otherwise linear
in the lowest bit. Finally, both keys are unique per page which reduces
the re-use of keys by a large factor.

View File

@ -41,6 +41,10 @@ terms of the MIT license. A copy of the license can be found in the file
#endif
#if defined(__APPLE__) && defined(MI_SHARED_LIB_EXPORT) && defined(MI_INTERPOSE)
static void mi_free_tls_safe(void* p) {
if (mi_unlikely(_mi_preloading())) return;
mi_free(p);
}
// use interposing so `DYLD_INSERT_LIBRARIES` works without `DYLD_FORCE_FLAT_NAMESPACE=1`
// See: <https://books.google.com/books?id=K8vUkpOXhN4C&pg=PA73>
struct mi_interpose_s {
@ -54,7 +58,7 @@ terms of the MIT license. A copy of the license can be found in the file
MI_INTERPOSE_MI(malloc),
MI_INTERPOSE_MI(calloc),
MI_INTERPOSE_MI(realloc),
MI_INTERPOSE_MI(free),
MI_INTERPOSEX(free,mi_free_tls_safe),
MI_INTERPOSE_MI(strdup),
MI_INTERPOSE_MI(strndup)
};
@ -194,4 +198,3 @@ int posix_memalign(void** p, size_t alignment, size_t size) { return mi_posix_me
#endif
#endif // MI_MALLOC_OVERRIDE && !_WIN32

View File

@ -21,7 +21,7 @@ terms of the MIT license. A copy of the license can be found in the file
// Fast allocation in a page: just pop from the free list.
// Fall back to generic allocation only if the list is empty.
extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept {
extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept {
mi_assert_internal(page->xblock_size==0||mi_page_block_size(page) >= size);
mi_block_t* block = page->free;
if (mi_unlikely(block == NULL)) {

View File

@ -104,9 +104,9 @@ mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty;
static mi_tld_t tld_main = {
0, false,
&_mi_heap_main,
{ { NULL, NULL }, {NULL ,NULL}, {NULL ,NULL, 0},
0, 0, 0, 0, 0, 0, NULL,
tld_main_stats, tld_main_os
{ { NULL, NULL }, {NULL ,NULL}, {NULL ,NULL, 0},
0, 0, 0, 0, 0, 0, NULL,
tld_main_stats, tld_main_os
}, // segments
{ 0, tld_main_stats }, // os
{ MI_STATS_NULL } // stats
@ -124,9 +124,9 @@ mi_heap_t _mi_heap_main = {
MI_PAGE_QUEUES_EMPTY,
ATOMIC_VAR_INIT(NULL),
0, // thread id
MI_INIT_COOKIE, // initial cookie
{ MI_INIT_COOKIE, MI_INIT_COOKIE }, // the key of the main heap can be fixed (unlike page keys that need to be secure!)
{ {0}, {0}, 0 }, // random
0, // initial cookie
{ 0, 0 }, // the key of the main heap can be fixed (unlike page keys that need to be secure!)
{ {0x846ca68b}, {0}, 0 }, // random
0, // page count
false // can reclaim
};
@ -148,14 +148,15 @@ typedef struct mi_thread_data_s {
// Initialize the thread local default heap, called from `mi_thread_init`
static bool _mi_heap_init(void) {
if (mi_heap_is_initialized(_mi_heap_default)) return true;
if (mi_heap_is_initialized(mi_get_default_heap())) return true;
if (_mi_is_main_thread()) {
mi_assert_internal(_mi_heap_main.thread_id != 0);
// the main heap is statically allocated
_mi_heap_set_default_direct(&_mi_heap_main);
mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_get_default_heap());
//mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_get_default_heap());
}
else {
// use `_mi_os_alloc` to allocate directly from the OS
// use `_mi_os_alloc` to allocate directly from the OS
mi_thread_data_t* td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t),&_mi_stats_main); // Todo: more efficient allocation?
if (td == NULL) {
_mi_error_message(ENOMEM, "failed to allocate thread local heap memory\n");
@ -170,7 +171,7 @@ static bool _mi_heap_init(void) {
heap->cookie = _mi_heap_random_next(heap) | 1;
heap->key[0] = _mi_heap_random_next(heap);
heap->key[1] = _mi_heap_random_next(heap);
heap->tld = tld;
heap->tld = tld;
tld->heap_backing = heap;
tld->segments.stats = &tld->stats;
tld->segments.os = &tld->os;
@ -265,8 +266,9 @@ static void _mi_thread_done(mi_heap_t* default_heap);
#endif
// Set up handlers so `mi_thread_done` is called automatically
static bool tls_initialized = false; // fine if it races
static void mi_process_setup_auto_thread_done(void) {
static bool tls_initialized = false; // fine if it races
if (tls_initialized) return;
tls_initialized = true;
#if defined(_WIN32) && defined(MI_SHARED_LIB)
@ -317,7 +319,9 @@ static void _mi_thread_done(mi_heap_t* heap) {
void _mi_heap_set_default_direct(mi_heap_t* heap) {
mi_assert_internal(heap != NULL);
#ifndef MI_TLS_RECURSE_GUARD
_mi_heap_default = heap;
#endif
// ensure the default heap is passed to `_mi_thread_done`
// setting to a non-NULL value also ensures `mi_thread_done` is called.
@ -330,7 +334,11 @@ void _mi_heap_set_default_direct(mi_heap_t* heap) {
#endif
}
mi_heap_t* _mi_get_default_heap_tls_safe(void) {
if (mi_unlikely(mi_pthread_key==0)) return (mi_heap_t*)&_mi_heap_empty;
mi_heap_t* heap = pthread_getspecific(mi_pthread_key);
return (mi_likely(heap!=NULL) ? heap : (mi_heap_t*)&_mi_heap_empty);
}
// --------------------------------------------------------
// Run functions on process init/done, and thread init/done
@ -339,6 +347,7 @@ static void mi_process_done(void);
static bool os_preloading = true; // true until this module is initialized
static bool mi_redirected = false; // true if malloc redirects to mi_malloc
bool _mi_tls_initialized = false;
// Returns true if this module has not been initialized; Don't use C runtime routines until it returns false.
bool _mi_preloading() {
@ -383,7 +392,10 @@ static void mi_allocator_done() {
// Called once by the process loader
static void mi_process_load(void) {
volatile mi_heap_t* dummy = _mi_heap_default; // access TLS to allocate it before setting tls_initialized to true;
UNUSED(dummy);
os_preloading = false;
_mi_tls_initialized = true;
atexit(&mi_process_done);
_mi_options_init();
mi_process_init();
@ -398,26 +410,26 @@ static void mi_process_load(void) {
}
}
void _mi_heap_main_init(void) {
if (_mi_heap_main.cookie == 0) {
_mi_heap_main.thread_id = _mi_thread_id();
_mi_heap_main.cookie = _os_random_weak((uintptr_t)&_mi_heap_main_init);
_mi_random_init(&_mi_heap_main.random);
_mi_heap_main.key[0] = _mi_heap_random_next(&_mi_heap_main);
_mi_heap_main.key[1] = _mi_heap_random_next(&_mi_heap_main);
}
}
// Initialize the process; called by thread_init or the process loader
void mi_process_init(void) mi_attr_noexcept {
// ensure we are called once
if (_mi_process_is_initialized) return;
// access _mi_heap_default before setting _mi_process_is_initialized to ensure
// that the TLS slot is allocated without getting into recursion on macOS
// when using dynamic linking with interpose.
mi_get_default_heap();
_mi_process_is_initialized = true;
_mi_heap_main.thread_id = _mi_thread_id();
_mi_verbose_message("process init: 0x%zx\n", _mi_heap_main.thread_id);
_mi_random_init(&_mi_heap_main.random);
#ifndef __APPLE__ // TODO: fix this? cannot update cookie if allocation already happened..
_mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main);
_mi_heap_main.key[0] = _mi_heap_random_next(&_mi_heap_main);
_mi_heap_main.key[1] = _mi_heap_random_next(&_mi_heap_main);
#endif
mi_process_setup_auto_thread_done();
_mi_verbose_message("process init: 0x%zx\n", _mi_thread_id());
_mi_os_init();
_mi_heap_main_init();
#if (MI_DEBUG)
_mi_verbose_message("debug level : %d\n", MI_DEBUG);
#endif

View File

@ -53,7 +53,7 @@ static mi_option_desc_t options[_mi_option_last] =
// stable options
{ MI_DEBUG, UNINIT, MI_OPTION(show_errors) },
{ 0, UNINIT, MI_OPTION(show_stats) },
{ 0, UNINIT, MI_OPTION(verbose) },
{ 1, UNINIT, MI_OPTION(verbose) },
// the following options are experimental and not all combinations make sense.
{ 1, UNINIT, MI_OPTION(eager_commit) }, // commit on demand
@ -239,16 +239,30 @@ static volatile _Atomic(uintptr_t) error_count; // = 0; // when MAX_ERROR_COUNT
// inside the C runtime causes another message.
static mi_decl_thread bool recurse = false;
static bool mi_recurse_enter(void) {
#ifdef MI_TLS_RECURSE_GUARD
if (_mi_preloading()) return true;
#endif
if (recurse) return false;
recurse = true;
return true;
}
static void mi_recurse_exit(void) {
#ifdef MI_TLS_RECURSE_GUARD
if (_mi_preloading()) return;
#endif
recurse = false;
}
void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message) {
if (recurse) return;
if (!mi_recurse_enter()) return;
if (out==NULL || (FILE*)out==stdout || (FILE*)out==stderr) { // TODO: use mi_out_stderr for stderr?
out = mi_out_get_default(&arg);
}
recurse = true;
if (prefix != NULL) out(prefix,arg);
out(message,arg);
recurse = false;
return;
mi_recurse_exit();
}
// Define our own limited `fprintf` that avoids memory allocation.
@ -256,14 +270,12 @@ void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* me
static void mi_vfprintf( mi_output_fun* out, void* arg, const char* prefix, const char* fmt, va_list args ) {
char buf[512];
if (fmt==NULL) return;
if (recurse) return;
recurse = true;
if (!mi_recurse_enter()) return;
vsnprintf(buf,sizeof(buf)-1,fmt,args);
recurse = false;
mi_recurse_exit();
_mi_fputs(out,arg,prefix,buf);
}
void _mi_fprintf( mi_output_fun* out, void* arg, const char* fmt, ... ) {
va_list args;
va_start(args,fmt);
@ -290,7 +302,7 @@ void _mi_verbose_message(const char* fmt, ...) {
static void mi_show_error_message(const char* fmt, va_list args) {
if (!mi_option_is_enabled(mi_option_show_errors) && !mi_option_is_enabled(mi_option_verbose)) return;
if (mi_atomic_increment(&error_count) > mi_max_error_count) return;
mi_vfprintf(NULL, NULL, "mimalloc: error: ", fmt, args);
mi_vfprintf(NULL, NULL, "mimalloc: error: ", fmt, args);
}
void _mi_warning_message(const char* fmt, ...) {

View File

@ -11,7 +11,7 @@ terms of the MIT license. A copy of the license can be found in the file
/* ----------------------------------------------------------------------------
We use our own PRNG to keep predictable performance of random number generation
and to avoid implementations that use a lock. We only use the OS provided
and to avoid implementations that use a lock. We only use the OS provided
random source to initialize the initial seeds. Since we do not need ultimate
performance but we do rely on the security (for secret cookies in secure mode)
we use a cryptographically secure generator (chacha20).
@ -21,11 +21,11 @@ we use a cryptographically secure generator (chacha20).
/* ----------------------------------------------------------------------------
Chacha20 implementation as the original algorithm with a 64-bit nonce
Chacha20 implementation as the original algorithm with a 64-bit nonce
and counter: https://en.wikipedia.org/wiki/Salsa20
The input matrix has sixteen 32-bit values:
Position 0 to 3: constant key
Position 4 to 11: the key
Position 4 to 11: the key
Position 12 to 13: the counter.
Position 14 to 15: the nonce.
@ -44,8 +44,8 @@ static inline void qround(uint32_t x[16], size_t a, size_t b, size_t c, size_t d
x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 7);
}
static void chacha_block(mi_random_ctx_t* ctx)
{
static void chacha_block(mi_random_ctx_t* ctx)
{
// scramble into `x`
uint32_t x[16];
for (size_t i = 0; i < 16; i++) {
@ -72,8 +72,8 @@ static void chacha_block(mi_random_ctx_t* ctx)
ctx->input[12] += 1;
if (ctx->input[12] == 0) {
ctx->input[13] += 1;
if (ctx->input[13] == 0) { // and keep increasing into the nonce
ctx->input[14] += 1;
if (ctx->input[13] == 0) { // and keep increasing into the nonce
ctx->input[14] += 1;
}
}
}
@ -83,7 +83,7 @@ static uint32_t chacha_next32(mi_random_ctx_t* ctx) {
chacha_block(ctx);
ctx->output_available = 16; // (assign again to suppress static analysis warning)
}
const uint32_t x = ctx->output[16 - ctx->output_available];
const uint32_t x = ctx->output[16 - ctx->output_available];
ctx->output[16 - ctx->output_available] = 0; // reset once the data is handed out
ctx->output_available--;
return x;
@ -94,9 +94,9 @@ static inline uint32_t read32(const uint8_t* p, size_t idx32) {
return ((uint32_t)p[i+0] | (uint32_t)p[i+1] << 8 | (uint32_t)p[i+2] << 16 | (uint32_t)p[i+3] << 24);
}
static void chacha_init(mi_random_ctx_t* ctx, const uint8_t key[32], uint64_t nonce)
static void chacha_init(mi_random_ctx_t* ctx, const uint8_t key[32], uint64_t nonce)
{
// since we only use chacha for randomness (and not encryption) we
// since we only use chacha for randomness (and not encryption) we
// do not _need_ to read 32-bit values as little endian but we do anyways
// just for being compatible :-)
memset(ctx, 0, sizeof(*ctx));
@ -110,7 +110,7 @@ static void chacha_init(mi_random_ctx_t* ctx, const uint8_t key[32], uint64_t no
ctx->input[12] = 0;
ctx->input[13] = 0;
ctx->input[14] = (uint32_t)nonce;
ctx->input[15] = (uint32_t)(nonce >> 32);
ctx->input[15] = (uint32_t)(nonce >> 32);
}
static void chacha_split(mi_random_ctx_t* ctx, uint64_t nonce, mi_random_ctx_t* ctx_new) {
@ -184,7 +184,7 @@ static bool os_random_buf(void* buf, size_t buf_len) {
arc4random_buf(buf, buf_len);
return true;
}
#elif defined(__linux__)
#elif defined(__linux__)
#include <sys/syscall.h>
#include <unistd.h>
#include <sys/types.h>
@ -241,8 +241,8 @@ static bool os_random_buf(void* buf, size_t buf_len) {
#include <time.h>
#endif
static uintptr_t os_random_weak(uintptr_t extra_seed) {
uintptr_t x = (uintptr_t)&os_random_weak ^ extra_seed; // ASLR makes the address random
uintptr_t _os_random_weak(uintptr_t extra_seed) {
uintptr_t x = (uintptr_t)&_os_random_weak ^ extra_seed; // ASLR makes the address random
#if defined(_WIN32)
LARGE_INTEGER pcount;
QueryPerformanceCounter(&pcount);
@ -267,10 +267,10 @@ static uintptr_t os_random_weak(uintptr_t extra_seed) {
void _mi_random_init(mi_random_ctx_t* ctx) {
uint8_t key[32];
if (!os_random_buf(key, sizeof(key))) {
// if we fail to get random data from the OS, we fall back to a
// if we fail to get random data from the OS, we fall back to a
// weak random source based on the current time
_mi_warning_message("unable to use secure randomness\n");
uintptr_t x = os_random_weak(0);
uintptr_t x = _os_random_weak(0);
for (size_t i = 0; i < 8; i++) { // key is eight 32-bit words.
x = _mi_random_shuffle(x);
((uint32_t*)key)[i] = (uint32_t)x;
@ -280,7 +280,7 @@ void _mi_random_init(mi_random_ctx_t* ctx) {
}
/* --------------------------------------------------------
test vectors from <https://tools.ietf.org/html/rfc8439>
test vectors from <https://tools.ietf.org/html/rfc8439>
----------------------------------------------------------- */
/*
static bool array_equals(uint32_t* x, uint32_t* y, size_t n) {

View File

@ -17,9 +17,9 @@ static uint8_t* mi_segment_raw_page_start(const mi_segment_t* segment, const mi_
/* --------------------------------------------------------------------------------
Segment allocation
We allocate pages inside bigger "segments" (4mb on 64-bit). This is to avoid
splitting VMA's on Linux and reduce fragmentation on other OS's.
Each thread owns its own segments.
We allocate pages inside bigger "segments" (4mb on 64-bit). This is to avoid
splitting VMA's on Linux and reduce fragmentation on other OS's.
Each thread owns its own segments.
Currently we have:
- small pages (64kb), 64 in one segment
@ -154,14 +154,14 @@ static bool mi_segment_is_valid(const mi_segment_t* segment, mi_segments_tld_t*
for (size_t i = 0; i < segment->capacity; i++) {
const mi_page_t* const page = &segment->pages[i];
if (!page->segment_in_use) {
nfree++;
nfree++;
}
if (page->segment_in_use || page->is_reset) {
mi_assert_expensive(!mi_pages_reset_contains(page, tld));
}
}
mi_assert_internal(nfree + segment->used == segment->capacity);
mi_assert_internal(segment->thread_id == _mi_thread_id() || (segment->thread_id==0)); // or 0
// mi_assert_internal(segment->thread_id == _mi_thread_id() || (segment->thread_id==0)); // or 0
mi_assert_internal(segment->page_kind == MI_PAGE_HUGE ||
(mi_segment_page_size(segment) * segment->capacity == segment->segment_size));
return true;
@ -286,7 +286,7 @@ static void mi_pages_reset_add(mi_segment_t* segment, mi_page_t* page, mi_segmen
mi_assert_expensive(!mi_pages_reset_contains(page, tld));
mi_assert_internal(_mi_page_segment(page)==segment);
if (!mi_option_is_enabled(mi_option_page_reset)) return;
if (segment->mem_is_fixed || page->segment_in_use || !page->is_committed || page->is_reset) return;
if (segment->mem_is_fixed || page->segment_in_use || !page->is_committed || page->is_reset) return;
if (mi_option_get(mi_option_reset_delay) == 0) {
// reset immediately?
@ -295,7 +295,7 @@ static void mi_pages_reset_add(mi_segment_t* segment, mi_page_t* page, mi_segmen
else {
// otherwise push on the delayed page reset queue
mi_page_queue_t* pq = &tld->pages_reset;
// push on top
// push on top
mi_page_reset_set_expire(page);
page->next = pq->first;
page->prev = NULL;
@ -316,7 +316,7 @@ static void mi_pages_reset_remove(mi_page_t* page, mi_segments_tld_t* tld) {
mi_page_queue_t* pq = &tld->pages_reset;
mi_assert_internal(pq!=NULL);
mi_assert_internal(!page->segment_in_use);
mi_assert_internal(mi_pages_reset_contains(page, tld));
mi_assert_internal(mi_pages_reset_contains(page, tld));
if (page->prev != NULL) page->prev->next = page->next;
if (page->next != NULL) page->next->prev = page->prev;
if (page == pq->last) pq->last = page->prev;
@ -332,19 +332,19 @@ static void mi_pages_reset_remove_all_in_segment(mi_segment_t* segment, bool for
if (!page->segment_in_use && page->is_committed && !page->is_reset) {
mi_pages_reset_remove(page, tld);
if (force_reset) {
mi_page_reset(segment, page, 0, tld);
mi_page_reset(segment, page, 0, tld);
}
}
else {
mi_assert_internal(mi_page_not_in_queue(page,tld));
}
}
}
}
static void mi_reset_delayed(mi_segments_tld_t* tld) {
if (!mi_option_is_enabled(mi_option_page_reset)) return;
mi_msecs_t now = _mi_clock_now();
mi_page_queue_t* pq = &tld->pages_reset;
mi_page_queue_t* pq = &tld->pages_reset;
// from oldest up to the first that has not expired yet
mi_page_t* page = pq->last;
while (page != NULL && mi_page_reset_is_expired(page,now)) {
@ -358,7 +358,7 @@ static void mi_reset_delayed(mi_segments_tld_t* tld) {
pq->last = page;
if (page != NULL){
page->next = NULL;
}
}
else {
pq->first = NULL;
}
@ -540,7 +540,7 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) {
}
mi_assert_internal(tld->cache_count == 0);
mi_assert_internal(tld->cache == NULL);
#if MI_DEBUG>=2
#if MI_DEBUG>=2
if (!_mi_is_main_thread()) {
mi_assert_internal(tld->pages_reset.first == NULL);
mi_assert_internal(tld->pages_reset.last == NULL);
@ -684,7 +684,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) {
UNUSED(force);
mi_assert(segment != NULL);
mi_assert(segment != NULL);
// note: don't reset pages even on abandon as the whole segment is freed? (and ready for reuse)
bool force_reset = (force && mi_option_is_enabled(mi_option_abandoned_page_reset));
mi_pages_reset_remove_all_in_segment(segment, force_reset, tld);
@ -716,7 +716,7 @@ static bool mi_segment_has_free(const mi_segment_t* segment) {
static void mi_segment_page_claim(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) {
mi_assert_internal(_mi_page_segment(page) == segment);
mi_assert_internal(!page->segment_in_use);
mi_assert_internal(!page->segment_in_use);
// set in-use before doing unreset to prevent delayed reset
mi_pages_reset_remove(page, tld);
page->segment_in_use = true;
@ -756,7 +756,7 @@ static void mi_segment_page_claim(mi_segment_t* segment, mi_page_t* page, mi_seg
static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld);
// clear page data; can be called on abandoned segments
static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, bool allow_reset, mi_segments_tld_t* tld)
static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, bool allow_reset, mi_segments_tld_t* tld)
{
mi_assert_internal(page->segment_in_use);
mi_assert_internal(mi_page_all_free(page));
@ -787,7 +787,7 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, bool a
segment->used--;
// add to the free page list for reuse/reset
if (allow_reset) {
if (allow_reset) {
mi_pages_reset_add(segment, page, tld);
}
}
@ -841,12 +841,12 @@ Note: the current implementation is one possible design;
another way might be to keep track of abandoned segments
in the regions. This would have the advantage of keeping
all concurrent code in one place and not needing to deal
with ABA issues. The drawback is that it is unclear how to
scan abandoned segments efficiently in that case as they
with ABA issues. The drawback is that it is unclear how to
scan abandoned segments efficiently in that case as they
would be spread among all other segments in the regions.
----------------------------------------------------------- */
// Use the bottom 20-bits (on 64-bit) of the aligned segment pointers
// Use the bottom 20-bits (on 64-bit) of the aligned segment pointers
// to put in a tag that increments on update to avoid the A-B-A problem.
#define MI_TAGGED_MASK MI_SEGMENT_MASK
typedef uintptr_t mi_tagged_segment_t;
@ -862,7 +862,7 @@ static mi_tagged_segment_t mi_tagged_segment(mi_segment_t* segment, mi_tagged_se
}
// This is a list of visited abandoned pages that were full at the time.
// this list migrates to `abandoned` when that becomes NULL. The use of
// this list migrates to `abandoned` when that becomes NULL. The use of
// this list reduces contention and the rate at which segments are visited.
static mi_decl_cache_align volatile _Atomic(mi_segment_t*) abandoned_visited; // = NULL
@ -888,7 +888,7 @@ static void mi_abandoned_visited_push(mi_segment_t* segment) {
}
// Move the visited list to the abandoned list.
static bool mi_abandoned_visited_revisit(void)
static bool mi_abandoned_visited_revisit(void)
{
// quick check if the visited list is empty
if (mi_atomic_read_ptr_relaxed(mi_segment_t,&abandoned_visited)==NULL) return false;
@ -954,12 +954,12 @@ static mi_segment_t* mi_abandoned_pop(void) {
segment = mi_tagged_segment_ptr(ts);
if (mi_likely(segment == NULL)) {
if (mi_likely(!mi_abandoned_visited_revisit())) { // try to swap in the visited list on NULL
return NULL;
return NULL;
}
}
// Do a pop. We use a reader count to prevent
// a segment to be decommitted while a read is still pending,
// a segment to be decommitted while a read is still pending,
// and a tagged pointer to prevent A-B-A link corruption.
// (this is called from `memory.c:_mi_mem_free` for example)
mi_atomic_increment(&abandoned_readers); // ensure no segment gets decommitted
@ -1024,7 +1024,7 @@ void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) {
----------------------------------------------------------- */
// Possibly clear pages and check if free space is available
static bool mi_segment_check_free(mi_segment_t* segment, size_t block_size, bool* all_pages_free)
static bool mi_segment_check_free(mi_segment_t* segment, size_t block_size, bool* all_pages_free)
{
mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE);
bool has_page = false;
@ -1032,17 +1032,17 @@ static bool mi_segment_check_free(mi_segment_t* segment, size_t block_size, bool
size_t pages_used_empty = 0;
for (size_t i = 0; i < segment->capacity; i++) {
mi_page_t* page = &segment->pages[i];
if (page->segment_in_use) {
if (page->segment_in_use) {
pages_used++;
// ensure used count is up to date and collect potential concurrent frees
_mi_page_free_collect(page, false);
_mi_page_free_collect(page, false);
if (mi_page_all_free(page)) {
// if everything free already, page can be reused for some block size
// note: don't clear the page yet as we can only OS reset it once it is reclaimed
pages_used_empty++;
has_page = true;
}
else if (page->xblock_size == block_size && mi_page_has_any_available(page)) {
else if (page->xblock_size == block_size && mi_page_has_any_available(page)) {
// a page has available free blocks of the right size
has_page = true;
}
@ -1051,7 +1051,7 @@ static bool mi_segment_check_free(mi_segment_t* segment, size_t block_size, bool
// whole empty page
has_page = true;
}
}
}
mi_assert_internal(pages_used == segment->used && pages_used >= pages_used_empty);
if (all_pages_free != NULL) {
*all_pages_free = ((pages_used - pages_used_empty) == 0);
@ -1100,7 +1100,7 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap,
if (right_page_reclaimed != NULL) { *right_page_reclaimed = true; }
}
}
}
}
else if (page->is_committed && !page->is_reset) { // not in-use, and not reset yet
// note: do not reset as this includes pages that were not touched before
// mi_pages_reset_add(segment, page, tld);
@ -1141,17 +1141,17 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size,
// free the segment (by forced reclaim) to make it available to other threads.
// note1: we prefer to free a segment as that might lead to reclaiming another
// segment that is still partially used.
// note2: we could in principle optimize this by skipping reclaim and directly
// note2: we could in principle optimize this by skipping reclaim and directly
// freeing but that would violate some invariants temporarily)
mi_segment_reclaim(segment, heap, 0, NULL, tld);
}
else if (has_page && segment->page_kind == page_kind) {
// found a free page of the right kind, or page of the right block_size with free space
// found a free page of the right kind, or page of the right block_size with free space
// we return the result of reclaim (which is usually `segment`) as it might free
// the segment due to concurrent frees (in which case `NULL` is returned).
return mi_segment_reclaim(segment, heap, block_size, reclaimed, tld);
}
else if (segment->abandoned_visits >= 3) {
else if (segment->abandoned_visits >= 3) {
// always reclaim on 3rd visit to limit the list length.
mi_segment_reclaim(segment, heap, 0, NULL, tld);
}
@ -1165,12 +1165,12 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size,
/* -----------------------------------------------------------
Reclaim or allocate
Reclaim or allocate
----------------------------------------------------------- */
static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t block_size, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t block_size, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
{
mi_assert_internal(page_kind <= MI_PAGE_LARGE);
mi_assert_internal(page_kind <= MI_PAGE_LARGE);
mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE);
// 1. try to get a segment from our cache
mi_segment_t* segment = mi_segment_cache_pop(MI_SEGMENT_SIZE, tld);
@ -1191,7 +1191,7 @@ static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t block_s
return segment;
}
// 3. otherwise allocate a fresh segment
return mi_segment_alloc(0, page_kind, page_shift, tld, os_tld);
return mi_segment_alloc(0, page_kind, page_shift, tld, os_tld);
}
@ -1216,11 +1216,11 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t*
// Allocate a page inside a segment. Requires that the page has free pages
static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tld_t* tld) {
mi_assert_internal(mi_segment_has_free(segment));
return mi_segment_find_free(segment, tld);
return mi_segment_find_free(segment, tld);
}
static mi_page_t* mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_page_kind_t kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
// find an available segment the segment free queue
// find an available segment the segment free queue
mi_segment_queue_t* const free_queue = mi_segment_free_queue_of_kind(kind, tld);
if (mi_segment_queue_is_empty(free_queue)) {
// possibly allocate or reclaim a fresh segment
@ -1275,7 +1275,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld
}
/* -----------------------------------------------------------
Page allocation
Page allocation
----------------------------------------------------------- */
mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {

View File

@ -20,7 +20,7 @@ terms of the MIT license.
#include <stdint.h>
#include <stdbool.h>
#include <string.h>
#include <mimalloc.h>
// #include <mimalloc.h>
// > mimalloc-test-stress [THREADS] [SCALE] [ITER]
//
@ -38,7 +38,7 @@ static bool allow_large_objects = true; // allow very large objects?
static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`?
#ifdef USE_STD_MALLOC
#ifndef USE_STD_MALLOC
#define custom_calloc(n,s) calloc(n,s)
#define custom_realloc(p,s) realloc(p,s)
#define custom_free(p) free(p)
@ -188,7 +188,7 @@ static void test_stress(void) {
free_items(p);
}
}
mi_collect(false);
// mi_collect(false);
#ifndef NDEBUG
if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
#endif
@ -206,7 +206,7 @@ static void leak(intptr_t tid) {
}
}
static void test_leak(void) {
static void test_leak(void) {
for (int n = 0; n < ITER; n++) {
run_os_threads(THREADS, &leak);
mi_collect(false);
@ -242,15 +242,15 @@ int main(int argc, char** argv) {
// Run ITER full iterations where half the objects in the transfer buffer survive to the next round.
srand(0x7feb352d);
mi_stats_reset();
// mi_stats_reset();
#ifdef STRESS
test_stress();
#else
test_leak();
#endif
#endif
mi_collect(true);
mi_stats_print(NULL);
// mi_collect(true);
// mi_stats_print(NULL);
//bench_end_program();
return 0;
}
@ -262,7 +262,7 @@ static void (*thread_entry_fun)(intptr_t) = &stress;
#include <windows.h>
static DWORD WINAPI thread_entry(LPVOID param) {
static DWORD WINAPI thread_entry(LPVOID param) {
thread_entry_fun((intptr_t)param);
return 0;
}