merge from dev (visit abandoned, upstream of python/cpython#114133)

This commit is contained in:
Daan 2024-06-02 17:03:13 -07:00
commit f77adf4a18
21 changed files with 755 additions and 315 deletions

View File

@ -263,7 +263,7 @@ typedef struct mi_heap_area_s {
typedef bool (mi_cdecl mi_block_visit_fun)(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg);
mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_blocks, mi_block_visit_fun* visitor, void* arg);
mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_blocks, mi_block_visit_fun* visitor, void* arg);
// Experimental
mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept;
@ -289,8 +289,25 @@ mi_decl_export bool mi_manage_os_memory_ex(void* start, size_t size, bool is_co
mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id);
#endif
// Experimental: allow sub-processes whose memory segments stay separated (and no reclamation between them)
// Used for example for separate interpreter's in one process.
typedef void* mi_subproc_id_t;
mi_decl_export mi_subproc_id_t mi_subproc_main(void);
mi_decl_export mi_subproc_id_t mi_subproc_new(void);
mi_decl_export void mi_subproc_delete(mi_subproc_id_t subproc);
mi_decl_export void mi_subproc_add_current_thread(mi_subproc_id_t subproc); // this should be called right after a thread is created (and no allocation has taken place yet)
// Experimental: visit abandoned heap areas (from threads that have been terminated)
mi_decl_export bool mi_abandoned_visit_blocks(mi_subproc_id_t subproc_id, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg);
// Experimental: create a new heap with a specified heap tag. Set `allow_destroy` to false to allow the thread
// to reclaim abandoned memory (with a compatible heap_tag and arena_id) but in that case `mi_heap_destroy` will
// fall back to `mi_heap_delete`.
mi_decl_export mi_decl_nodiscard mi_heap_t* mi_heap_new_ex(int heap_tag, bool allow_destroy, mi_arena_id_t arena_id);
// deprecated
mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept;
mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept;
// ------------------------------------------------------
@ -348,6 +365,7 @@ typedef enum mi_option_e {
mi_option_abandoned_reclaim_on_free, // allow to reclaim an abandoned segment on a free (=1)
mi_option_disallow_arena_alloc, // 1 = do not use arena's for allocation (except if using specific arena id's)
mi_option_retry_on_oom, // retry on out-of-memory for N milli seconds (=400), set to 0 to disable retries. (only on windows)
mi_option_visit_abandoned, // allow visiting heap blocks from abandoned threads (=0)
_mi_option_last,
// legacy option names
mi_option_large_os_pages = mi_option_allow_large_os_pages,

View File

@ -8,6 +8,17 @@ terms of the MIT license. A copy of the license can be found in the file
#ifndef MIMALLOC_ATOMIC_H
#define MIMALLOC_ATOMIC_H
// include windows.h or pthreads.h
#if defined(_WIN32)
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
#elif !defined(__wasi__) && (!defined(__EMSCRIPTEN__) || defined(__EMSCRIPTEN_PTHREADS__))
#define MI_USE_PTHREADS
#include <pthread.h>
#endif
// --------------------------------------------------------------------------------------------
// Atomics
// We need to be portable between C, C++, and MSVC.
@ -24,9 +35,9 @@ terms of the MIT license. A copy of the license can be found in the file
#define mi_atomic(name) std::atomic_##name
#define mi_memory_order(name) std::memory_order_##name
#if (__cplusplus >= 202002L) // c++20, see issue #571
#define MI_ATOMIC_VAR_INIT(x) x
#define MI_ATOMIC_VAR_INIT(x) x
#elif !defined(ATOMIC_VAR_INIT)
#define MI_ATOMIC_VAR_INIT(x) x
#define MI_ATOMIC_VAR_INIT(x) x
#else
#define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x)
#endif
@ -133,10 +144,6 @@ static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x) {
#elif defined(_MSC_VER)
// Legacy MSVC plain C compilation wrapper that uses Interlocked operations to model C11 atomics.
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
#include <intrin.h>
#ifdef _WIN64
typedef LONG64 msc_intptr_t;
@ -302,11 +309,16 @@ static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub) {
return (intptr_t)mi_atomic_addi(p, -sub);
}
// ----------------------------------------------------------------------
// Once and Guard
// ----------------------------------------------------------------------
typedef _Atomic(uintptr_t) mi_atomic_once_t;
// Returns true only on the first invocation
static inline bool mi_atomic_once( mi_atomic_once_t* once ) {
if (mi_atomic_load_relaxed(once) != 0) return false; // quick test
if (mi_atomic_load_relaxed(once) != 0) return false; // quick test
uintptr_t expected = 0;
return mi_atomic_cas_strong_acq_rel(once, &expected, (uintptr_t)1); // try to set to 1
}
@ -322,17 +334,16 @@ typedef _Atomic(uintptr_t) mi_atomic_guard_t;
// ----------------------------------------------------------------------
// Yield
// ----------------------------------------------------------------------
#if defined(__cplusplus)
#include <thread>
static inline void mi_atomic_yield(void) {
std::this_thread::yield();
}
#elif defined(_WIN32)
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
static inline void mi_atomic_yield(void) {
YieldProcessor();
}
@ -390,4 +401,107 @@ static inline void mi_atomic_yield(void) {
#endif
// ----------------------------------------------------------------------
// Locks are only used for abandoned segment visiting in `arena.c`
// ----------------------------------------------------------------------
#if defined(_WIN32)
#define mi_lock_t CRITICAL_SECTION
static inline bool mi_lock_try_acquire(mi_lock_t* lock) {
return TryEnterCriticalSection(lock);
}
static inline bool mi_lock_acquire(mi_lock_t* lock) {
EnterCriticalSection(lock);
return true;
}
static inline void mi_lock_release(mi_lock_t* lock) {
LeaveCriticalSection(lock);
}
static inline void mi_lock_init(mi_lock_t* lock) {
InitializeCriticalSection(lock);
}
static inline void mi_lock_done(mi_lock_t* lock) {
DeleteCriticalSection(lock);
}
#elif defined(MI_USE_PTHREADS)
#define mi_lock_t pthread_mutex_t
static inline bool mi_lock_try_acquire(mi_lock_t* lock) {
return (pthread_mutex_trylock(lock) == 0);
}
static inline bool mi_lock_acquire(mi_lock_t* lock) {
return (pthread_mutex_lock(lock) == 0);
}
static inline void mi_lock_release(mi_lock_t* lock) {
pthread_mutex_unlock(lock);
}
static inline void mi_lock_init(mi_lock_t* lock) {
pthread_mutex_init(lock, NULL);
}
static inline void mi_lock_done(mi_lock_t* lock) {
pthread_mutex_destroy(lock);
}
/*
#elif defined(__cplusplus)
#include <mutex>
#define mi_lock_t std::mutex
static inline bool mi_lock_try_acquire(mi_lock_t* lock) {
return lock->lock_try_acquire();
}
static inline bool mi_lock_acquire(mi_lock_t* lock) {
lock->lock();
return true;
}
static inline void mi_lock_release(mi_lock_t* lock) {
lock->unlock();
}
static inline void mi_lock_init(mi_lock_t* lock) {
(void)(lock);
}
static inline void mi_lock_done(mi_lock_t* lock) {
(void)(lock);
}
*/
#else
// fall back to poor man's locks.
// this should only be the case in a single-threaded environment (like __wasi__)
#define mi_lock_t _Atomic(uintptr_t)
static inline bool mi_lock_try_acquire(mi_lock_t* lock) {
uintptr_t expected = 0;
return mi_atomic_cas_strong_acq_rel(lock, &expected, (uintptr_t)1);
}
static inline bool mi_lock_acquire(mi_lock_t* lock) {
for (int i = 0; i < 1000; i++) { // for at most 1000 tries?
if (mi_lock_try_acquire(lock)) return true;
mi_atomic_yield();
}
return true;
}
static inline void mi_lock_release(mi_lock_t* lock) {
mi_atomic_store_release(lock, (uintptr_t)0);
}
static inline void mi_lock_init(mi_lock_t* lock) {
mi_lock_release(lock);
}
static inline void mi_lock_done(mi_lock_t* lock) {
(void)(lock);
}
#endif
#endif // __MIMALLOC_ATOMIC_H

View File

@ -53,11 +53,6 @@ terms of the MIT license. A copy of the license can be found in the file
#define mi_decl_externc
#endif
// pthreads
#if !defined(_WIN32) && !defined(__wasi__)
#define MI_USE_PTHREADS
#include <pthread.h>
#endif
// "options.c"
void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message);
@ -84,11 +79,12 @@ extern mi_decl_cache_align const mi_page_t _mi_page_empty;
bool _mi_is_main_thread(void);
size_t _mi_current_thread_count(void);
bool _mi_preloading(void); // true while the C runtime is not initialized yet
mi_threadid_t _mi_thread_id(void) mi_attr_noexcept;
mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap
void _mi_thread_done(mi_heap_t* heap);
void _mi_thread_data_collect(void);
void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap);
mi_threadid_t _mi_thread_id(void) mi_attr_noexcept;
mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap
mi_subproc_t* _mi_subproc_from_id(mi_subproc_id_t subproc_id);
// os.c
void _mi_os_init(void); // called from process init
@ -131,15 +127,18 @@ void _mi_arena_unsafe_destroy_all(mi_stats_t* stats);
bool _mi_arena_segment_clear_abandoned(mi_segment_t* segment);
void _mi_arena_segment_mark_abandoned(mi_segment_t* segment);
size_t _mi_arena_segment_abandoned_count(void);
typedef struct mi_arena_field_cursor_s { // abstract
mi_arena_id_t start;
int count;
void* _mi_arena_meta_zalloc(size_t size, mi_memid_t* memid);
void _mi_arena_meta_free(void* p, mi_memid_t memid, size_t size);
typedef struct mi_arena_field_cursor_s { // abstract struct
size_t start;
size_t end;
size_t bitmap_idx;
mi_subproc_t* subproc;
} mi_arena_field_cursor_t;
void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_arena_field_cursor_t* current);
mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* previous);
void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_subproc_t* subproc, mi_arena_field_cursor_t* current);
mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* previous, bool visit_all);
// "segment-map.c"
void _mi_segment_map_allocated_at(const mi_segment_t* segment);
@ -163,6 +162,7 @@ void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld);
void _mi_abandoned_await_readers(void);
void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld);
bool _mi_segment_attempt_reclaim(mi_heap_t* heap, mi_segment_t* segment);
bool _mi_segment_visit_blocks(mi_segment_t* segment, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg);
// "page.c"
void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept mi_attr_malloc;
@ -194,6 +194,8 @@ void _mi_heap_set_default_direct(mi_heap_t* heap);
bool _mi_heap_memid_is_suitable(mi_heap_t* heap, mi_memid_t memid);
void _mi_heap_unsafe_destroy_all(void);
mi_heap_t* _mi_heap_by_tag(mi_heap_t* heap, uint8_t tag);
void _mi_heap_area_init(mi_heap_area_t* area, mi_page_t* page);
bool _mi_heap_area_visit_blocks(const mi_heap_area_t* area, mi_page_t* page, mi_block_visit_fun* visitor, void* arg);
// "stats.c"
void _mi_stats_done(mi_stats_t* stats);
@ -349,6 +351,14 @@ static inline uintptr_t _mi_divide_up(uintptr_t size, size_t divider) {
return (divider == 0 ? size : ((size + divider - 1) / divider));
}
// clamp an integer
static inline size_t _mi_clamp(size_t sz, size_t min, size_t max) {
if (sz < min) return min;
else if (sz > max) return max;
else return sz;
}
// Is memory zero initialized?
static inline bool mi_mem_is_zero(const void* p, size_t size) {
for (size_t i = 0; i < size; i++) {

View File

@ -115,6 +115,7 @@ void _mi_prim_thread_done_auto_done(void);
void _mi_prim_thread_associate_default_heap(mi_heap_t* heap);
//-------------------------------------------------------------------
// Thread id: `_mi_prim_thread_id()`
//
@ -235,10 +236,6 @@ static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
#elif defined(_WIN32)
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
// Windows: works on Intel and ARM in both 32- and 64-bit
return (uintptr_t)NtCurrentTeb();
@ -370,4 +367,6 @@ static inline mi_heap_t* mi_prim_get_default_heap(void) {
#endif // MIMALLOC_PRIM_H

View File

@ -34,7 +34,7 @@ The corresponding `mi_track_free` still uses the block start pointer and origina
The `mi_track_resize` is currently unused but could be called on reallocations within a block.
`mi_track_init` is called at program start.
The following macros are for tools like asan and valgrind to track whether memory is
The following macros are for tools like asan and valgrind to track whether memory is
defined, undefined, or not accessible at all:
#define mi_track_mem_defined(p,size)
@ -82,10 +82,6 @@ defined, undefined, or not accessible at all:
#define MI_TRACK_HEAP_DESTROY 1
#define MI_TRACK_TOOL "ETW"
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
#include "../src/prim/windows/etw.h"
#define mi_track_init() EventRegistermicrosoft_windows_mimalloc();
@ -96,7 +92,7 @@ defined, undefined, or not accessible at all:
// no tracking
#define MI_TRACK_ENABLED 0
#define MI_TRACK_HEAP_DESTROY 0
#define MI_TRACK_HEAP_DESTROY 0
#define MI_TRACK_TOOL "none"
#define mi_track_malloc_size(p,reqsize,size,zero)

View File

@ -319,7 +319,7 @@ typedef struct mi_page_s {
mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`)
uint16_t used; // number of blocks in use (including blocks in `thread_free`)
uint8_t block_size_shift; // if not zero, then `(1 << block_size_shift) == block_size` (only used for fast path in `free.c:_mi_page_ptr_unalign`)
uint8_t heap_tag; // tag of the owning heap, used for separated heaps by object type
uint8_t heap_tag; // tag of the owning heap, used to separate heaps by object type
// padding
size_t block_size; // size available in each block (always `>0`)
uint8_t* page_start; // start of the page area containing the blocks
@ -430,7 +430,7 @@ typedef struct mi_memid_s {
// -----------------------------------------------------------------------------------------
// Segments are large allocated memory blocks (8mb on 64 bit) from arenas or the OS.
// Segments are large allocated memory blocks (32mb on 64 bit) from arenas or the OS.
//
// Inside segments we allocated fixed size mimalloc pages (`mi_page_t`) that contain blocks.
// The start of a segment is this structure with a fixed number of slice entries (`slices`)
@ -442,6 +442,9 @@ typedef struct mi_memid_s {
// For slices, the `block_size` field is repurposed to signify if a slice is used (`1`) or not (`0`).
// Small and medium pages use a fixed amount of slices to reduce slice fragmentation, while
// large and huge pages span a variable amount of slices.
typedef struct mi_subproc_s mi_subproc_t;
typedef struct mi_segment_s {
// constant fields
mi_memid_t memid; // memory id for arena/OS allocation
@ -462,6 +465,10 @@ typedef struct mi_segment_s {
size_t abandoned_visits; // count how often this segment is visited during abondoned reclamation (to force reclaim if it takes too long)
size_t used; // count of pages in use
uintptr_t cookie; // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie`
mi_subproc_t* subproc; // segment belongs to sub process
struct mi_segment_s* abandoned_os_next; // only used for abandoned segments outside arena's, and only if `mi_option_visit_abandoned` is enabled
struct mi_segment_s* abandoned_os_prev;
size_t segment_slices; // for huge segments this may be different from `MI_SLICES_PER_SEGMENT`
size_t segment_info_slices; // initial count of slices that we are using for segment info and possible guard pages.
@ -658,6 +665,18 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount);
#define mi_heap_stat_decrease(heap,stat,amount) mi_stat_decrease( (heap)->tld->stats.stat, amount)
// ------------------------------------------------------
// Sub processes do not reclaim or visit segments
// from other sub processes
// ------------------------------------------------------
struct mi_subproc_s {
_Atomic(size_t) abandoned_count; // count of abandoned segments for this sup-process
mi_lock_t abandoned_os_lock; // lock for the abandoned segments outside of arena's
mi_segment_t* abandoned_os_list; // doubly-linked list of abandoned segments outside of arena's (in OS allocated memory)
mi_memid_t memid; // provenance
};
// ------------------------------------------------------
// Thread Local data
// ------------------------------------------------------
@ -687,8 +706,9 @@ typedef struct mi_segments_tld_s {
size_t current_size; // current size of all segments
size_t peak_size; // peak size of all segments
size_t reclaim_count;// number of reclaimed (abandoned) segments
mi_subproc_t* subproc; // sub-process this thread belongs to.
mi_stats_t* stats; // points to tld stats
mi_os_tld_t* os; // points to os stats
mi_os_tld_t* os; // points to os tld
} mi_segments_tld_t;
// Thread local data

View File

@ -28,7 +28,7 @@ terms of the MIT license. A copy of the license can be found in the file
// Fast allocation in a page: just pop from the free list.
// Fall back to generic allocation only if the list is empty.
// Note: in release mode the (inlined) routine is about 7 instructions with a single test.
extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept
extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept
{
mi_assert_internal(page->block_size == 0 /* empty heap */ || mi_page_block_size(page) >= size);
mi_block_t* const block = page->free;
@ -125,7 +125,7 @@ static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap,
#endif
mi_page_t* page = _mi_heap_get_free_small_page(heap, size + MI_PADDING_SIZE);
void* const p = _mi_page_malloc_zero(heap, page, size + MI_PADDING_SIZE, zero);
void* const p = _mi_page_malloc_zero(heap, page, size + MI_PADDING_SIZE, zero);
mi_track_malloc(p,size,zero);
#if MI_STAT>1
@ -362,7 +362,7 @@ mi_decl_nodiscard mi_decl_restrict char* mi_strndup(const char* s, size_t n) mi_
#ifndef PATH_MAX
#define PATH_MAX MAX_PATH
#endif
#include <windows.h>
mi_decl_nodiscard mi_decl_restrict char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept {
// todo: use GetFullPathNameW to allow longer file names
char buf[PATH_MAX];

View File

@ -36,27 +36,28 @@ The arena allocation needs to be thread safe and we use an atomic bitmap to allo
typedef uintptr_t mi_block_info_t;
#define MI_ARENA_BLOCK_SIZE (MI_SEGMENT_SIZE) // 64MiB (must be at least MI_SEGMENT_ALIGN)
#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 32MiB
#define MI_MAX_ARENAS (112) // not more than 126 (since we use 7 bits in the memid and an arena index + 1)
#define MI_MAX_ARENAS (132) // Limited as the reservation exponentially increases (and takes up .bss)
// A memory arena descriptor
typedef struct mi_arena_s {
mi_arena_id_t id; // arena id; 0 for non-specific
mi_memid_t memid; // memid of the memory area
_Atomic(uint8_t*) start; // the start of the memory area
size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`)
size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`)
size_t meta_size; // size of the arena structure itself (including its bitmaps)
mi_memid_t meta_memid; // memid of the arena structure itself (OS or static allocation)
int numa_node; // associated NUMA node
bool exclusive; // only allow allocations if specifically for this arena
bool is_large; // memory area consists of large- or huge OS pages (always committed)
_Atomic(size_t) search_idx; // optimization to start the search for free blocks
_Atomic(mi_msecs_t) purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`.
mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero?
mi_bitmap_field_t* blocks_committed; // are the blocks committed? (can be NULL for memory that cannot be decommitted)
mi_bitmap_field_t* blocks_purge; // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted)
mi_bitmap_field_t* blocks_abandoned; // blocks that start with an abandoned segment. (This crosses API's but it is convenient to have here)
mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`)
mi_arena_id_t id; // arena id; 0 for non-specific
mi_memid_t memid; // memid of the memory area
_Atomic(uint8_t*) start; // the start of the memory area
size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`)
size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`)
size_t meta_size; // size of the arena structure itself (including its bitmaps)
mi_memid_t meta_memid; // memid of the arena structure itself (OS or static allocation)
int numa_node; // associated NUMA node
bool exclusive; // only allow allocations if specifically for this arena
bool is_large; // memory area consists of large- or huge OS pages (always committed)
mi_lock_t abandoned_visit_lock; // lock is only used when abandoned segments are being visited
_Atomic(size_t) search_idx; // optimization to start the search for free blocks
_Atomic(mi_msecs_t) purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`.
mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero?
mi_bitmap_field_t* blocks_committed; // are the blocks committed? (can be NULL for memory that cannot be decommitted)
mi_bitmap_field_t* blocks_purge; // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted)
mi_bitmap_field_t* blocks_abandoned; // blocks that start with an abandoned segment. (This crosses API's but it is convenient to have here)
mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`)
// do not add further fields here as the dirty, committed, purged, and abandoned bitmaps follow the inuse bitmap fields.
} mi_arena_t;
@ -65,7 +66,6 @@ typedef struct mi_arena_s {
static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS];
static mi_decl_cache_align _Atomic(size_t) mi_arena_count; // = 0
//static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept;
/* -----------------------------------------------------------
@ -175,7 +175,7 @@ static void* mi_arena_static_zalloc(size_t size, size_t alignment, mi_memid_t* m
return p;
}
static void* mi_arena_meta_zalloc(size_t size, mi_memid_t* memid, mi_stats_t* stats) {
void* _mi_arena_meta_zalloc(size_t size, mi_memid_t* memid) {
*memid = _mi_memid_none();
// try static
@ -183,7 +183,7 @@ static void* mi_arena_meta_zalloc(size_t size, mi_memid_t* memid, mi_stats_t* st
if (p != NULL) return p;
// or fall back to the OS
p = _mi_os_alloc(size, memid, stats);
p = _mi_os_alloc(size, memid, &_mi_stats_main);
if (p == NULL) return NULL;
// zero the OS memory if needed
@ -194,9 +194,9 @@ static void* mi_arena_meta_zalloc(size_t size, mi_memid_t* memid, mi_stats_t* st
return p;
}
static void mi_arena_meta_free(void* p, mi_memid_t memid, size_t size, mi_stats_t* stats) {
void _mi_arena_meta_free(void* p, mi_memid_t memid, size_t size) {
if (mi_memkind_is_os(memid.memkind)) {
_mi_os_free(p, size, memid, stats);
_mi_os_free(p, size, memid, &_mi_stats_main);
}
else {
mi_assert(memid.memkind == MI_MEM_STATIC);
@ -361,8 +361,14 @@ static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t re
arena_reserve = arena_reserve/4; // be conservative if virtual reserve is not supported (for WASM for example)
}
arena_reserve = _mi_align_up(arena_reserve, MI_ARENA_BLOCK_SIZE);
arena_reserve = _mi_align_up(arena_reserve, MI_SEGMENT_SIZE);
if (arena_count >= 8 && arena_count <= 128) {
arena_reserve = ((size_t)1<<(arena_count/8)) * arena_reserve; // scale up the arena sizes exponentially
// scale up the arena sizes exponentially every 8 entries (128 entries get to 589TiB)
const size_t multiplier = (size_t)1 << _mi_clamp(arena_count/8, 0, 16 );
size_t reserve = 0;
if (!mi_mul_overflow(multiplier, arena_reserve, &reserve)) {
arena_reserve = reserve;
}
}
if (arena_reserve < req_size) return false; // should be able to at least handle the current allocation size
@ -507,7 +513,7 @@ static bool mi_arena_purge_range(mi_arena_t* arena, size_t idx, size_t startidx,
size_t bitidx = startidx;
bool all_purged = false;
while (bitidx < endidx) {
// count consequetive ones in the purge mask
// count consecutive ones in the purge mask
size_t count = 0;
while (bitidx + count < endidx && (purge & ((size_t)1 << (bitidx + count))) != 0) {
count++;
@ -544,11 +550,12 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi
if (purge != 0) {
size_t bitidx = 0;
while (bitidx < MI_BITMAP_FIELD_BITS) {
// find consequetive range of ones in the purge mask
// find consecutive range of ones in the purge mask
size_t bitlen = 0;
while (bitidx + bitlen < MI_BITMAP_FIELD_BITS && (purge & ((size_t)1 << (bitidx + bitlen))) != 0) {
bitlen++;
}
// temporarily claim the purge range as "in-use" to be thread-safe with allocation
// try to claim the longest range of corresponding in_use bits
const mi_bitmap_index_t bitmap_index = mi_bitmap_index_create(i, bitidx);
while( bitlen > 0 ) {
@ -698,6 +705,7 @@ static void mi_arenas_unsafe_destroy(void) {
for (size_t i = 0; i < max_arena; i++) {
mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]);
if (arena != NULL) {
mi_lock_done(&arena->abandoned_visit_lock);
if (arena->start != NULL && mi_memkind_is_os(arena->memid.memkind)) {
mi_atomic_store_ptr_release(mi_arena_t, &mi_arenas[i], NULL);
_mi_os_free(arena->start, mi_arena_size(arena), arena->memid, &_mi_stats_main);
@ -705,7 +713,7 @@ static void mi_arenas_unsafe_destroy(void) {
else {
new_max_arena = i;
}
mi_arena_meta_free(arena, arena->meta_memid, arena->meta_size, &_mi_stats_main);
_mi_arena_meta_free(arena, arena->meta_memid, arena->meta_size);
}
}
@ -730,7 +738,7 @@ void _mi_arena_unsafe_destroy_all(mi_stats_t* stats) {
bool _mi_arena_contains(const void* p) {
const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
for (size_t i = 0; i < max_arena; i++) {
mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]);
mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
if (arena != NULL && arena->start <= (const uint8_t*)p && arena->start + mi_arena_block_size(arena->block_count) > (const uint8_t*)p) {
return true;
}
@ -748,28 +756,38 @@ bool _mi_arena_contains(const void* p) {
the arena bitmaps.
----------------------------------------------------------- */
// Maintain a count of all abandoned segments
static mi_decl_cache_align _Atomic(size_t)abandoned_count;
size_t _mi_arena_segment_abandoned_count(void) {
return mi_atomic_load_relaxed(&abandoned_count);
}
// reclaim a specific abandoned segment; `true` on success.
// sets the thread_id.
bool _mi_arena_segment_clear_abandoned(mi_segment_t* segment )
{
if (segment->memid.memkind != MI_MEM_ARENA) {
// not in an arena, consider it un-abandoned now.
// but we need to still claim it atomically -- we use the thread_id for that.
if mi_unlikely(segment->memid.memkind != MI_MEM_ARENA) {
// not in an arena
// if abandoned visiting is allowed, we need to take a lock on the abandoned os list
bool has_lock = false;
if (mi_option_is_enabled(mi_option_visit_abandoned)) {
has_lock = mi_lock_try_acquire(&segment->subproc->abandoned_os_lock);
if (!has_lock) {
return false; // failed to acquire the lock, we just give up
}
}
// abandon it, but we need to still claim it atomically -- we use the thread_id for that.
bool reclaimed = false;
size_t expected = 0;
if (mi_atomic_cas_strong_acq_rel(&segment->thread_id, &expected, _mi_thread_id())) {
mi_atomic_decrement_relaxed(&abandoned_count);
return true;
}
else {
return false;
// reclaim
mi_atomic_decrement_relaxed(&segment->subproc->abandoned_count);
reclaimed = true;
// and remove from the abandoned os list (if needed)
mi_segment_t* const next = segment->abandoned_os_next;
mi_segment_t* const prev = segment->abandoned_os_prev;
if (prev != NULL) { prev->abandoned_os_next = next; }
else { segment->subproc->abandoned_os_list = next; }
if (next != NULL) { next->abandoned_os_prev = prev; }
segment->abandoned_os_next = NULL;
segment->abandoned_os_prev = NULL;
}
if (has_lock) { mi_lock_release(&segment->subproc->abandoned_os_lock); }
return reclaimed;
}
// arena segment: use the blocks_abandoned bitmap.
size_t arena_idx;
@ -781,7 +799,7 @@ bool _mi_arena_segment_clear_abandoned(mi_segment_t* segment )
bool was_marked = _mi_bitmap_unclaim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx);
if (was_marked) {
mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == 0);
mi_atomic_decrement_relaxed(&abandoned_count);
mi_atomic_decrement_relaxed(&segment->subproc->abandoned_count);
mi_atomic_store_release(&segment->thread_id, _mi_thread_id());
}
// mi_assert_internal(was_marked);
@ -796,11 +814,30 @@ void _mi_arena_segment_mark_abandoned(mi_segment_t* segment)
{
mi_atomic_store_release(&segment->thread_id, 0);
mi_assert_internal(segment->used == segment->abandoned);
if (segment->memid.memkind != MI_MEM_ARENA) {
// not in an arena; count it as abandoned and return
mi_atomic_increment_relaxed(&abandoned_count);
if mi_unlikely(segment->memid.memkind != MI_MEM_ARENA) {
// not in an arena; count it as abandoned and return (these can be reclaimed on a `free`)
mi_atomic_increment_relaxed(&segment->subproc->abandoned_count);
// if abandoned visiting is allowed, we need to take a lock on the abandoned os list to insert it
if (mi_option_is_enabled(mi_option_visit_abandoned)) {
if (!mi_lock_acquire(&segment->subproc->abandoned_os_lock)) {
_mi_error_message(EFAULT, "internal error: failed to acquire the abandoned (os) segment lock to mark abandonment");
}
else {
// push on the front of the list
mi_segment_t* next = segment->subproc->abandoned_os_list;
mi_assert_internal(next == NULL || next->abandoned_os_prev == NULL);
mi_assert_internal(segment->abandoned_os_prev == NULL);
mi_assert_internal(segment->abandoned_os_next == NULL);
if (next != NULL) { next->abandoned_os_prev = segment; }
segment->abandoned_os_prev = NULL;
segment->abandoned_os_next = next;
segment->subproc->abandoned_os_list = segment;
mi_lock_release(&segment->subproc->abandoned_os_lock);
}
}
return;
}
// segment is in an arena, mark it in the arena `blocks_abandoned` bitmap
size_t arena_idx;
size_t bitmap_idx;
mi_arena_memid_indices(segment->memid, &arena_idx, &bitmap_idx);
@ -808,69 +845,156 @@ void _mi_arena_segment_mark_abandoned(mi_segment_t* segment)
mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_idx]);
mi_assert_internal(arena != NULL);
const bool was_unmarked = _mi_bitmap_claim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx, NULL);
if (was_unmarked) { mi_atomic_increment_relaxed(&abandoned_count); }
if (was_unmarked) { mi_atomic_increment_relaxed(&segment->subproc->abandoned_count); }
mi_assert_internal(was_unmarked);
mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx));
}
// start a cursor at a randomized arena
void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_arena_field_cursor_t* current) {
void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_subproc_t* subproc, mi_arena_field_cursor_t* current) {
mi_assert_internal(heap == NULL || heap->tld->segments.subproc == subproc);
current->bitmap_idx = 0;
current->subproc = subproc;
const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
current->start = (max_arena == 0 ? 0 : (mi_arena_id_t)( _mi_heap_random_next(heap) % max_arena));
current->count = 0;
current->bitmap_idx = 0;
if (heap != NULL && heap->arena_id != _mi_arena_id_none()) {
// for a heap that is bound to one arena, only visit that arena
current->start = mi_arena_id_index(heap->arena_id);
current->end = current->start + 1;
}
else {
// otherwise visit all starting at a random location
current->start = (heap == NULL || max_arena == 0 ? 0 : (mi_arena_id_t)(_mi_heap_random_next(heap) % max_arena));
current->end = current->start + max_arena;
}
mi_assert_internal(current->start <= max_arena);
}
static mi_segment_t* mi_arena_segment_clear_abandoned_at(mi_arena_t* arena, mi_subproc_t* subproc, mi_bitmap_index_t bitmap_idx) {
// try to reclaim an abandoned segment in the arena atomically
if (!_mi_bitmap_unclaim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx)) return NULL;
mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx));
mi_segment_t* segment = (mi_segment_t*)mi_arena_block_start(arena, bitmap_idx);
mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == 0);
// check that the segment belongs to our sub-process
// note: this is the reason we need a lock in the case abandoned visiting is enabled.
// without the lock an abandoned visit may otherwise fail to visit all segments.
// for regular reclaim it is fine to miss one sometimes so without abandoned visiting we don't need the arena lock.
if (segment->subproc != subproc) {
// it is from another subprocess, re-mark it and continue searching
const bool was_zero = _mi_bitmap_claim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx, NULL);
mi_assert_internal(was_zero); MI_UNUSED(was_zero);
return NULL;
}
else {
// success, we unabandoned a segment in our sub-process
mi_atomic_decrement_relaxed(&subproc->abandoned_count);
return segment;
}
}
// reclaim abandoned segments
// this does not set the thread id (so it appears as still abandoned)
mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* previous )
mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* previous, bool visit_all )
{
const int max_arena = (int)mi_atomic_load_relaxed(&mi_arena_count);
if (max_arena <= 0 || mi_atomic_load_relaxed(&abandoned_count) == 0) return NULL;
const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
if (max_arena <= 0 || mi_atomic_load_relaxed(&previous->subproc->abandoned_count) == 0) return NULL;
int count = previous->count;
size_t field_idx = mi_bitmap_index_field(previous->bitmap_idx);
size_t bit_idx = mi_bitmap_index_bit_in_field(previous->bitmap_idx) + 1;
// visit arena's (from previous)
for (; count < max_arena; count++, field_idx = 0, bit_idx = 0) {
mi_arena_id_t arena_idx = previous->start + count;
if (arena_idx >= max_arena) { arena_idx = arena_idx % max_arena; } // wrap around
// visit arena's (from the previous cursor)
for ( ; previous->start < previous->end; previous->start++, field_idx = 0, bit_idx = 0) {
// index wraps around
size_t arena_idx = (previous->start >= max_arena ? previous->start % max_arena : previous->start);
mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_idx]);
if (arena != NULL) {
bool has_lock = false;
// visit the abandoned fields (starting at previous_idx)
for ( ; field_idx < arena->field_count; field_idx++, bit_idx = 0) {
for (; field_idx < arena->field_count; field_idx++, bit_idx = 0) {
size_t field = mi_atomic_load_relaxed(&arena->blocks_abandoned[field_idx]);
if mi_unlikely(field != 0) { // skip zero fields quickly
// we only take the arena lock if there are actually abandoned segments present
if (!has_lock && mi_option_is_enabled(mi_option_visit_abandoned)) {
has_lock = (visit_all ? mi_lock_acquire(&arena->abandoned_visit_lock) : mi_lock_try_acquire(&arena->abandoned_visit_lock));
if (!has_lock) {
if (visit_all) {
_mi_error_message(EFAULT, "internal error: failed to visit all abandoned segments due to failure to acquire the visitor lock");
}
// skip to next arena
break;
}
}
mi_assert_internal(has_lock || !mi_option_is_enabled(mi_option_visit_abandoned));
// visit each set bit in the field (todo: maybe use `ctz` here?)
for ( ; bit_idx < MI_BITMAP_FIELD_BITS; bit_idx++) {
for (; bit_idx < MI_BITMAP_FIELD_BITS; bit_idx++) {
// pre-check if the bit is set
size_t mask = ((size_t)1 << bit_idx);
if mi_unlikely((field & mask) == mask) {
mi_bitmap_index_t bitmap_idx = mi_bitmap_index_create(field_idx, bit_idx);
// try to reclaim it atomically
if (_mi_bitmap_unclaim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx)) {
mi_atomic_decrement_relaxed(&abandoned_count);
previous->bitmap_idx = bitmap_idx;
previous->count = count;
mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx));
mi_segment_t* segment = (mi_segment_t*)mi_arena_block_start(arena, bitmap_idx);
mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == 0);
previous->bitmap_idx = mi_bitmap_index_create(field_idx, bit_idx);
mi_segment_t* const segment = mi_arena_segment_clear_abandoned_at(arena, previous->subproc, previous->bitmap_idx);
if (segment != NULL) {
//mi_assert_internal(arena->blocks_committed == NULL || _mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, 1, bitmap_idx));
if (has_lock) { mi_lock_release(&arena->abandoned_visit_lock); }
return segment;
}
}
}
}
}
if (has_lock) { mi_lock_release(&arena->abandoned_visit_lock); }
}
}
// no more found
mi_assert(previous->start == previous->end);
previous->bitmap_idx = 0;
previous->count = 0;
previous->start = previous->end = 0;
return NULL;
}
static bool mi_arena_visit_abandoned_blocks(mi_subproc_t* subproc, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) {
mi_arena_field_cursor_t current;
_mi_arena_field_cursor_init(NULL, subproc, &current);
mi_segment_t* segment;
while ((segment = _mi_arena_segment_clear_abandoned_next(&current, true /* visit all */)) != NULL) {
bool ok = _mi_segment_visit_blocks(segment, heap_tag, visit_blocks, visitor, arg);
_mi_arena_segment_mark_abandoned(segment);
if (!ok) return false;
}
return true;
}
static bool mi_subproc_visit_abandoned_os_blocks(mi_subproc_t* subproc, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) {
if (!mi_lock_acquire(&subproc->abandoned_os_lock)) {
_mi_error_message(EFAULT, "internal error: failed to acquire abandoned (OS) segment lock");
return false;
}
bool all_visited = true;
for (mi_segment_t* segment = subproc->abandoned_os_list; segment != NULL; segment = segment->abandoned_os_next) {
if (!_mi_segment_visit_blocks(segment, heap_tag, visit_blocks, visitor, arg)) {
all_visited = false;
break;
}
}
mi_lock_release(&subproc->abandoned_os_lock);
return all_visited;
}
bool mi_abandoned_visit_blocks(mi_subproc_id_t subproc_id, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) {
// (unfortunately) the visit_abandoned option must be enabled from the start.
// This is to avoid taking locks if abandoned list visiting is not required (as for most programs)
if (!mi_option_is_enabled(mi_option_visit_abandoned)) {
_mi_error_message(EFAULT, "internal error: can only visit abandoned blocks when MIMALLOC_VISIT_ABANDONED=ON");
return false;
}
mi_subproc_t* const subproc = _mi_subproc_from_id(subproc_id);
// visit abandoned segments in the arena's
if (!mi_arena_visit_abandoned_blocks(subproc, heap_tag, visit_blocks, visitor, arg)) return false;
// and visit abandoned segments outside arena's (in OS allocated memory)
if (!mi_subproc_visit_abandoned_os_blocks(subproc, heap_tag, visit_blocks, visitor, arg)) return false;
return true;
}
/* -----------------------------------------------------------
Add an arena.
----------------------------------------------------------- */
@ -907,7 +1031,7 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int
const size_t bitmaps = (memid.is_pinned ? 3 : 5);
const size_t asize = sizeof(mi_arena_t) + (bitmaps*fields*sizeof(mi_bitmap_field_t));
mi_memid_t meta_memid;
mi_arena_t* arena = (mi_arena_t*)mi_arena_meta_zalloc(asize, &meta_memid, &_mi_stats_main); // TODO: can we avoid allocating from the OS?
mi_arena_t* arena = (mi_arena_t*)_mi_arena_meta_zalloc(asize, &meta_memid);
if (arena == NULL) return false;
// already zero'd due to zalloc
@ -924,7 +1048,8 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int
arena->is_large = is_large;
arena->purge_expire = 0;
arena->search_idx = 0;
// consequetive bitmaps
mi_lock_init(&arena->abandoned_visit_lock);
// consecutive bitmaps
arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap
arena->blocks_abandoned = &arena->blocks_inuse[2 * fields]; // just after dirty bitmap
arena->blocks_committed = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[3*fields]); // just after abandoned bitmap

View File

@ -248,7 +248,8 @@ static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_segment_t* seg
{
// the segment is abandoned, try to reclaim it into our heap
if (_mi_segment_attempt_reclaim(mi_heap_get_default(), segment)) {
mi_assert_internal(_mi_prim_thread_id() == mi_atomic_load_relaxed(&segment->thread_id));
mi_assert_internal(_mi_thread_id() == mi_atomic_load_relaxed(&segment->thread_id));
mi_assert_internal(mi_heap_get_default()->tld->segments.subproc == segment->subproc);
mi_free(block); // recursively free as now it will be a local free in our heap
return;
}

View File

@ -143,6 +143,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
if (force_main) {
// the main thread is abandoned (end-of-program), try to reclaim all abandoned segments.
// if all memory is freed by now, all segments should be freed.
// note: this only collects in the current subprocess
_mi_abandoned_reclaim_all(heap, &heap->tld->segments);
}
@ -232,17 +233,22 @@ void _mi_heap_init(mi_heap_t* heap, mi_tld_t* tld, mi_arena_id_t arena_id, bool
heap->tld->heaps = heap;
}
mi_decl_nodiscard mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id) {
mi_decl_nodiscard mi_heap_t* mi_heap_new_ex(int heap_tag, bool allow_destroy, mi_arena_id_t arena_id) {
mi_heap_t* bheap = mi_heap_get_backing();
mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t); // todo: OS allocate in secure mode?
if (heap == NULL) return NULL;
// don't reclaim abandoned pages or otherwise destroy is unsafe
_mi_heap_init(heap, bheap->tld, arena_id, true /* no reclaim */, 0 /* default tag */);
mi_assert(heap_tag >= 0 && heap_tag < 256);
_mi_heap_init(heap, bheap->tld, arena_id, allow_destroy /* no reclaim? */, (uint8_t)heap_tag /* heap tag */);
return heap;
}
mi_decl_nodiscard mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id) {
return mi_heap_new_ex(0 /* default heap tag */, false /* don't allow `mi_heap_destroy` */, arena_id);
}
mi_decl_nodiscard mi_heap_t* mi_heap_new(void) {
return mi_heap_new_in_arena(_mi_arena_id_none());
// don't reclaim abandoned memory or otherwise destroy is unsafe
return mi_heap_new_ex(0 /* default heap tag */, true /* no reclaim */, _mi_arena_id_none());
}
bool _mi_heap_memid_is_suitable(mi_heap_t* heap, mi_memid_t memid) {
@ -527,54 +533,95 @@ bool mi_check_owned(const void* p) {
enable visiting all blocks of all heaps across threads
----------------------------------------------------------- */
// Separate struct to keep `mi_page_t` out of the public interface
typedef struct mi_heap_area_ex_s {
mi_heap_area_t area;
mi_page_t* page;
} mi_heap_area_ex_t;
void _mi_heap_area_init(mi_heap_area_t* area, mi_page_t* page) {
const size_t bsize = mi_page_block_size(page);
const size_t ubsize = mi_page_usable_block_size(page);
area->reserved = page->reserved * bsize;
area->committed = page->capacity * bsize;
area->blocks = mi_page_start(page);
area->used = page->used; // number of blocks in use (#553)
area->block_size = ubsize;
area->full_block_size = bsize;
}
static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_visit_fun* visitor, void* arg) {
mi_assert(xarea != NULL);
if (xarea==NULL) return true;
const mi_heap_area_t* area = &xarea->area;
mi_page_t* page = xarea->page;
static void mi_get_fast_divisor(size_t divisor, uint64_t* magic, size_t* shift) {
mi_assert_internal(divisor > 0 && divisor <= UINT32_MAX);
*shift = 64 - mi_clz(divisor - 1);
*magic = ((((uint64_t)1 << 32) * (((uint64_t)1 << *shift) - divisor)) / divisor + 1);
}
static size_t mi_fast_divide(size_t n, uint64_t magic, size_t shift) {
mi_assert_internal(n <= UINT32_MAX);
return ((((uint64_t)n * magic) >> 32) + n) >> shift;
}
bool _mi_heap_area_visit_blocks(const mi_heap_area_t* area, mi_page_t* page, mi_block_visit_fun* visitor, void* arg) {
mi_assert(area != NULL);
if (area==NULL) return true;
mi_assert(page != NULL);
if (page == NULL) return true;
_mi_page_free_collect(page,true);
_mi_page_free_collect(page,true); // collect both thread_delayed and local_free
mi_assert_internal(page->local_free == NULL);
if (page->used == 0) return true;
const size_t bsize = mi_page_block_size(page);
const size_t ubsize = mi_page_usable_block_size(page); // without padding
size_t psize;
uint8_t* pstart = _mi_segment_page_start(_mi_page_segment(page), page, &psize);
size_t psize;
uint8_t* const pstart = _mi_segment_page_start(_mi_page_segment(page), page, &psize);
mi_heap_t* const heap = mi_page_heap(page);
const size_t bsize = mi_page_block_size(page);
const size_t ubsize = mi_page_usable_block_size(page); // without padding
// optimize page with one block
if (page->capacity == 1) {
// optimize page with one block
mi_assert_internal(page->used == 1 && page->free == NULL);
return visitor(mi_page_heap(page), area, pstart, ubsize, arg);
}
mi_assert(bsize <= UINT32_MAX);
// optimize full pages
if (page->used == page->capacity) {
uint8_t* block = pstart;
for (size_t i = 0; i < page->capacity; i++) {
if (!visitor(heap, area, block, ubsize, arg)) return false;
block += bsize;
}
return true;
}
// create a bitmap of free blocks.
#define MI_MAX_BLOCKS (MI_SMALL_PAGE_SIZE / sizeof(void*))
uintptr_t free_map[MI_MAX_BLOCKS / sizeof(uintptr_t)];
memset(free_map, 0, sizeof(free_map));
uintptr_t free_map[MI_MAX_BLOCKS / MI_INTPTR_BITS];
const uintptr_t bmapsize = _mi_divide_up(page->capacity, MI_INTPTR_BITS);
memset(free_map, 0, bmapsize * sizeof(intptr_t));
if (page->capacity % MI_INTPTR_BITS != 0) {
// mark left-over bits at the end as free
size_t shift = (page->capacity % MI_INTPTR_BITS);
uintptr_t mask = (UINTPTR_MAX << shift);
free_map[bmapsize - 1] = mask;
}
// fast repeated division by the block size
uint64_t magic;
size_t shift;
mi_get_fast_divisor(bsize, &magic, &shift);
#if MI_DEBUG>1
size_t free_count = 0;
#endif
for (mi_block_t* block = page->free; block != NULL; block = mi_block_next(page,block)) {
for (mi_block_t* block = page->free; block != NULL; block = mi_block_next(page, block)) {
#if MI_DEBUG>1
free_count++;
#endif
mi_assert_internal((uint8_t*)block >= pstart && (uint8_t*)block < (pstart + psize));
size_t offset = (uint8_t*)block - pstart;
mi_assert_internal(offset % bsize == 0);
size_t blockidx = offset / bsize; // Todo: avoid division?
mi_assert_internal( blockidx < MI_MAX_BLOCKS);
size_t bitidx = (blockidx / sizeof(uintptr_t));
size_t bit = blockidx - (bitidx * sizeof(uintptr_t));
mi_assert_internal(offset <= UINT32_MAX);
size_t blockidx = mi_fast_divide(offset, magic, shift);
mi_assert_internal(blockidx == offset / bsize);
mi_assert_internal(blockidx < MI_MAX_BLOCKS);
size_t bitidx = (blockidx / MI_INTPTR_BITS);
size_t bit = blockidx - (bitidx * MI_INTPTR_BITS);
free_map[bitidx] |= ((uintptr_t)1 << bit);
}
mi_assert_internal(page->capacity == (free_count + page->used));
@ -583,42 +630,53 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v
#if MI_DEBUG>1
size_t used_count = 0;
#endif
for (size_t i = 0; i < page->capacity; i++) {
size_t bitidx = (i / sizeof(uintptr_t));
size_t bit = i - (bitidx * sizeof(uintptr_t));
uintptr_t m = free_map[bitidx];
if (bit == 0 && m == UINTPTR_MAX) {
i += (sizeof(uintptr_t) - 1); // skip a run of free blocks
uint8_t* block = pstart;
for (size_t i = 0; i < bmapsize; i++) {
if (free_map[i] == 0) {
// every block is in use
for (size_t j = 0; j < MI_INTPTR_BITS; j++) {
#if MI_DEBUG>1
used_count++;
#endif
if (!visitor(heap, area, block, ubsize, arg)) return false;
block += bsize;
}
}
else if ((m & ((uintptr_t)1 << bit)) == 0) {
#if MI_DEBUG>1
used_count++;
#endif
uint8_t* block = pstart + (i * bsize);
if (!visitor(mi_page_heap(page), area, block, ubsize, arg)) return false;
else {
// visit the used blocks in the mask
uintptr_t m = ~free_map[i];
while (m != 0) {
#if MI_DEBUG>1
used_count++;
#endif
size_t bitidx = mi_ctz(m);
if (!visitor(heap, area, block + (bitidx * bsize), ubsize, arg)) return false;
m &= m - 1; // clear least significant bit
}
block += bsize * MI_INTPTR_BITS;
}
}
mi_assert_internal(page->used == used_count);
return true;
}
typedef bool (mi_heap_area_visit_fun)(const mi_heap_t* heap, const mi_heap_area_ex_t* area, void* arg);
// Separate struct to keep `mi_page_t` out of the public interface
typedef struct mi_heap_area_ex_s {
mi_heap_area_t area;
mi_page_t* page;
} mi_heap_area_ex_t;
typedef bool (mi_heap_area_visit_fun)(const mi_heap_t* heap, const mi_heap_area_ex_t* area, void* arg);
static bool mi_heap_visit_areas_page(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* vfun, void* arg) {
MI_UNUSED(heap);
MI_UNUSED(pq);
mi_heap_area_visit_fun* fun = (mi_heap_area_visit_fun*)vfun;
mi_heap_area_ex_t xarea;
const size_t bsize = mi_page_block_size(page);
const size_t ubsize = mi_page_usable_block_size(page);
xarea.page = page;
xarea.area.reserved = page->reserved * bsize;
xarea.area.committed = page->capacity * bsize;
xarea.area.blocks = mi_page_start(page);
xarea.area.used = page->used; // number of blocks in use (#553)
xarea.area.block_size = ubsize;
xarea.area.full_block_size = bsize;
_mi_heap_area_init(&xarea.area, page);
return fun(heap, &xarea, arg);
}
@ -639,7 +697,7 @@ static bool mi_heap_area_visitor(const mi_heap_t* heap, const mi_heap_area_ex_t*
mi_visit_blocks_args_t* args = (mi_visit_blocks_args_t*)arg;
if (!args->visitor(heap, &xarea->area, NULL, xarea->area.block_size, args->arg)) return false;
if (args->visit_blocks) {
return mi_heap_area_visit_blocks(xarea, args->visitor, args->arg);
return _mi_heap_area_visit_blocks(&xarea->area, xarea->page, args->visitor, args->arg);
}
else {
return true;

View File

@ -129,6 +129,8 @@ mi_decl_cache_align const mi_heap_t _mi_heap_empty = {
MI_PAGE_QUEUES_EMPTY
};
static mi_decl_cache_align mi_subproc_t mi_subproc_default;
#define tld_empty_stats ((mi_stats_t*)((uint8_t*)&tld_empty + offsetof(mi_tld_t,stats)))
#define tld_empty_os ((mi_os_tld_t*)((uint8_t*)&tld_empty + offsetof(mi_tld_t,os)))
@ -136,7 +138,7 @@ mi_decl_cache_align static const mi_tld_t tld_empty = {
0,
false,
NULL, NULL,
{ MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, 0, tld_empty_stats, tld_empty_os }, // segments
{ MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, 0, &mi_subproc_default, tld_empty_stats, tld_empty_os }, // segments
{ 0, tld_empty_stats }, // os
{ MI_STATS_NULL } // stats
};
@ -150,15 +152,15 @@ mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty;
extern mi_heap_t _mi_heap_main;
static mi_tld_t tld_main = {
static mi_decl_cache_align mi_tld_t tld_main = {
0, false,
&_mi_heap_main, & _mi_heap_main,
{ MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, 0, &tld_main.stats, &tld_main.os }, // segments
{ MI_SEGMENT_SPAN_QUEUES_EMPTY, 0, 0, 0, 0, 0, &mi_subproc_default, &tld_main.stats, &tld_main.os }, // segments
{ 0, &tld_main.stats }, // os
{ MI_STATS_NULL } // stats
};
mi_heap_t _mi_heap_main = {
mi_decl_cache_align mi_heap_t _mi_heap_main = {
&tld_main,
MI_ATOMIC_VAR_INIT(NULL),
0, // thread id
@ -191,7 +193,8 @@ static void mi_heap_main_init(void) {
#endif
_mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main);
_mi_heap_main.keys[0] = _mi_heap_random_next(&_mi_heap_main);
_mi_heap_main.keys[1] = _mi_heap_random_next(&_mi_heap_main);
_mi_heap_main.keys[1] = _mi_heap_random_next(&_mi_heap_main);
mi_lock_init(&mi_subproc_default.abandoned_os_lock);
}
}
@ -201,6 +204,56 @@ mi_heap_t* _mi_heap_main_get(void) {
}
/* -----------------------------------------------------------
Sub process
----------------------------------------------------------- */
mi_subproc_id_t mi_subproc_main(void) {
return NULL;
}
mi_subproc_id_t mi_subproc_new(void) {
mi_memid_t memid = _mi_memid_none();
mi_subproc_t* subproc = (mi_subproc_t*)_mi_arena_meta_zalloc(sizeof(mi_subproc_t), &memid);
if (subproc == NULL) return NULL;
subproc->memid = memid;
subproc->abandoned_os_list = NULL;
mi_lock_init(&subproc->abandoned_os_lock);
return subproc;
}
mi_subproc_t* _mi_subproc_from_id(mi_subproc_id_t subproc_id) {
return (subproc_id == NULL ? &mi_subproc_default : (mi_subproc_t*)subproc_id);
}
void mi_subproc_delete(mi_subproc_id_t subproc_id) {
if (subproc_id == NULL) return;
mi_subproc_t* subproc = _mi_subproc_from_id(subproc_id);
// check if there are no abandoned segments still..
bool safe_to_delete = false;
if (mi_lock_acquire(&subproc->abandoned_os_lock)) {
if (subproc->abandoned_os_list == NULL) {
safe_to_delete = true;
}
mi_lock_release(&subproc->abandoned_os_lock);
}
if (!safe_to_delete) return;
// safe to release
// todo: should we refcount subprocesses?
mi_lock_done(&subproc->abandoned_os_lock);
_mi_arena_meta_free(subproc, subproc->memid, sizeof(mi_subproc_t));
}
void mi_subproc_add_current_thread(mi_subproc_id_t subproc_id) {
mi_heap_t* heap = mi_heap_get_default();
if (heap == NULL) return;
mi_assert(heap->tld->segments.subproc == &mi_subproc_default);
if (heap->tld->segments.subproc != &mi_subproc_default) return;
heap->tld->segments.subproc = _mi_subproc_from_id(subproc_id);
}
/* -----------------------------------------------------------
Initialization and freeing of the thread local heaps
----------------------------------------------------------- */
@ -317,6 +370,7 @@ void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap) {
_mi_memcpy_aligned(tld, &tld_empty, sizeof(mi_tld_t));
tld->heap_backing = bheap;
tld->heaps = NULL;
tld->segments.subproc = &mi_subproc_default;
tld->segments.stats = &tld->stats;
tld->segments.os = &tld->os;
tld->os.stats = &tld->stats;

View File

@ -93,6 +93,11 @@ static mi_option_desc_t options[_mi_option_last] =
{ 1, UNINIT, MI_OPTION(abandoned_reclaim_on_free) },// reclaim an abandoned segment on a free
{ 0, UNINIT, MI_OPTION(disallow_arena_alloc) }, // 1 = do not use arena's for allocation (except if using specific arena id's)
{ 400, UNINIT, MI_OPTION(retry_on_oom) }, // windows only: retry on out-of-memory for N milli seconds (=400), set to 0 to disable retries.
#if defined(MI_VISIT_ABANDONED)
{ 1, INITIALIZED, MI_OPTION(visit_abandoned) }, // allow visiting heap blocks in abandonded segments; requires taking locks during reclaim.
#else
{ 0, UNINIT, MI_OPTION(visit_abandoned) },
#endif
};
static void mi_option_init(mi_option_desc_t* desc);
@ -194,7 +199,7 @@ static void mi_cdecl mi_out_stderr(const char* msg, void* arg) {
// an output function is registered it is called immediately with
// the output up to that point.
#ifndef MI_MAX_DELAY_OUTPUT
#define MI_MAX_DELAY_OUTPUT ((size_t)(32*1024))
#define MI_MAX_DELAY_OUTPUT ((size_t)(16*1024))
#endif
static char out_buf[MI_MAX_DELAY_OUTPUT+1];
static _Atomic(size_t) out_len;

View File

@ -142,7 +142,8 @@ static void mi_os_prim_free(void* addr, size_t size, bool still_committed, mi_st
_mi_stat_decrease(&stats->reserved, size);
}
void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t memid, mi_stats_t* tld_stats) {
void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t memid, mi_stats_t* stats) {
if (stats == NULL) stats = &_mi_stats_main;
if (mi_memkind_is_os(memid.memkind)) {
size_t csize = _mi_os_good_alloc_size(size);
void* base = addr;
@ -156,10 +157,10 @@ void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t me
// free it
if (memid.memkind == MI_MEM_OS_HUGE) {
mi_assert(memid.is_pinned);
mi_os_free_huge_os_pages(base, csize, tld_stats);
mi_os_free_huge_os_pages(base, csize, stats);
}
else {
mi_os_prim_free(base, csize, still_committed, tld_stats);
mi_os_prim_free(base, csize, still_committed, stats);
}
}
else {
@ -168,8 +169,9 @@ void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t me
}
}
void _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* tld_stats) {
_mi_os_free_ex(p, size, true, memid, tld_stats);
void _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* stats) {
if (stats == NULL) stats = &_mi_stats_main;
_mi_os_free_ex(p, size, true, memid, stats);
}
@ -284,6 +286,7 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit
void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* stats) {
*memid = _mi_memid_none();
if (size == 0) return NULL;
if (stats == NULL) stats = &_mi_stats_main;
size = _mi_os_good_alloc_size(size);
bool os_is_large = false;
bool os_is_zero = false;
@ -299,6 +302,7 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allo
MI_UNUSED(&_mi_os_get_aligned_hint); // suppress unused warnings
*memid = _mi_memid_none();
if (size == 0) return NULL;
if (stats == NULL) stats = &_mi_stats_main;
size = _mi_os_good_alloc_size(size);
alignment = _mi_align_up(alignment, _mi_os_page_size());
@ -327,6 +331,7 @@ void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offse
mi_assert(offset <= size);
mi_assert((alignment % _mi_os_page_size()) == 0);
*memid = _mi_memid_none();
if (stats == NULL) stats = &_mi_stats_main;
if (offset > MI_SEGMENT_SIZE) return NULL;
if (offset == 0) {
// regular aligned allocation

View File

@ -200,7 +200,7 @@ bool _mi_prim_random_buf(void* buf, size_t buf_len) {
// Thread init/done
//----------------------------------------------------------------
#ifdef __EMSCRIPTEN_SHARED_MEMORY__
#if defined(MI_USE_PTHREADS)
// use pthread local storage keys to detect thread ending
// (and used with MI_TLS_PTHREADS for the default heap)

View File

@ -22,7 +22,6 @@ terms of the MIT license. A copy of the license can be found in the file
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "mimalloc/atomic.h"
#include "mimalloc/prim.h"
#include <sys/mman.h> // mmap
@ -31,9 +30,9 @@ terms of the MIT license. A copy of the license can be found in the file
#if defined(__linux__)
#include <features.h>
#if defined(MI_NO_THP)
#include <sys/prctl.h>
#endif
//#if defined(MI_NO_THP)
#include <sys/prctl.h> // THP disable
//#endif
#if defined(__GLIBC__)
#include <linux/mman.h> // linux mmap flags
#else

View File

@ -9,7 +9,6 @@ terms of the MIT license. A copy of the license can be found in the file
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "mimalloc/atomic.h"
#include "mimalloc/prim.h"
#include <stdio.h> // fputs
@ -22,7 +21,7 @@ terms of the MIT license. A copy of the license can be found in the file
void _mi_prim_mem_init( mi_os_mem_config_t* config ) {
config->page_size = 64*MI_KiB; // WebAssembly has a fixed page size: 64KiB
config->alloc_granularity = 16;
config->has_overcommit = false;
config->has_overcommit = false;
config->has_partial_free = false;
config->has_virtual_reserve = false;
}
@ -134,7 +133,7 @@ int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_la
//---------------------------------------------
int _mi_prim_commit(void* addr, size_t size, bool* is_zero) {
MI_UNUSED(addr); MI_UNUSED(size);
MI_UNUSED(addr); MI_UNUSED(size);
*is_zero = false;
return 0;
}
@ -199,9 +198,9 @@ mi_msecs_t _mi_prim_clock_now(void) {
// low resolution timer
mi_msecs_t _mi_prim_clock_now(void) {
#if !defined(CLOCKS_PER_SEC) || (CLOCKS_PER_SEC == 1000) || (CLOCKS_PER_SEC == 0)
return (mi_msecs_t)clock();
return (mi_msecs_t)clock();
#elif (CLOCKS_PER_SEC < 1000)
return (mi_msecs_t)clock() * (1000 / (mi_msecs_t)CLOCKS_PER_SEC);
return (mi_msecs_t)clock() * (1000 / (mi_msecs_t)CLOCKS_PER_SEC);
#else
return (mi_msecs_t)clock() / ((mi_msecs_t)CLOCKS_PER_SEC / 1000);
#endif

View File

@ -9,7 +9,6 @@ terms of the MIT license. A copy of the license can be found in the file
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "mimalloc/atomic.h"
#include "mimalloc/prim.h"
#include <stdio.h> // fputs, stderr
@ -231,7 +230,7 @@ static void* win_virtual_alloc_prim(void* addr, size_t size, size_t try_alignmen
else if (max_retry_msecs > 0 && (try_alignment <= 2*MI_SEGMENT_ALIGN) &&
(flags&MEM_COMMIT) != 0 && (flags&MEM_LARGE_PAGES) == 0 &&
win_is_out_of_memory_error(GetLastError())) {
// if committing regular memory and being out-of-memory,
// if committing regular memory and being out-of-memory,
// keep trying for a bit in case memory frees up after all. See issue #894
_mi_warning_message("out-of-memory on OS allocation, try again... (attempt %lu, 0x%zx bytes, error code: 0x%x, address: %p, alignment: 0x%zx, flags: 0x%x)\n", tries, size, GetLastError(), addr, try_alignment, flags);
long sleep_msecs = tries*40; // increasing waits
@ -316,7 +315,7 @@ int _mi_prim_commit(void* addr, size_t size, bool* is_zero) {
return 0;
}
int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit) {
int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit) {
BOOL ok = VirtualFree(addr, size, MEM_DECOMMIT);
*needs_recommit = true; // for safety, assume always decommitted even in the case of an error.
return (ok ? 0 : (int)GetLastError());
@ -468,7 +467,6 @@ mi_msecs_t _mi_prim_clock_now(void) {
// Process Info
//----------------------------------------------------------------
#include <windows.h>
#include <psapi.h>
static mi_msecs_t filetime_msecs(const FILETIME* ftime) {
@ -491,7 +489,7 @@ void _mi_prim_process_info(mi_process_info_t* pinfo)
GetProcessTimes(GetCurrentProcess(), &ct, &et, &st, &ut);
pinfo->utime = filetime_msecs(&ut);
pinfo->stime = filetime_msecs(&st);
// load psapi on demand
if (pGetProcessMemoryInfo == NULL) {
HINSTANCE hDll = LoadLibrary(TEXT("psapi.dll"));
@ -505,7 +503,7 @@ void _mi_prim_process_info(mi_process_info_t* pinfo)
memset(&info, 0, sizeof(info));
if (pGetProcessMemoryInfo != NULL) {
pGetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info));
}
}
pinfo->current_rss = (size_t)info.WorkingSetSize;
pinfo->peak_rss = (size_t)info.PeakWorkingSetSize;
pinfo->current_commit = (size_t)info.PagefileUsage;
@ -517,7 +515,7 @@ void _mi_prim_process_info(mi_process_info_t* pinfo)
// Output
//----------------------------------------------------------------
void _mi_prim_out_stderr( const char* msg )
void _mi_prim_out_stderr( const char* msg )
{
// on windows with redirection, the C runtime cannot handle locale dependent output
// after the main thread closes so we use direct console output.
@ -564,7 +562,6 @@ bool _mi_prim_getenv(const char* name, char* result, size_t result_size) {
}
//----------------------------------------------------------------
// Random
//----------------------------------------------------------------
@ -600,7 +597,7 @@ bool _mi_prim_random_buf(void* buf, size_t buf_len) {
}
if (pBCryptGenRandom == NULL) return false;
}
return (pBCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0);
return (pBCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0);
}
#endif // MI_USE_RTLGENRANDOM
@ -636,9 +633,9 @@ void _mi_prim_thread_init_auto_done(void) {
}
void _mi_prim_thread_done_auto_done(void) {
// call thread-done on all threads (except the main thread) to prevent
// call thread-done on all threads (except the main thread) to prevent
// dangling callback pointer if statically linked with a DLL; Issue #208
FlsFree(mi_fls_key);
FlsFree(mi_fls_key);
}
void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {
@ -661,3 +658,4 @@ void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {
}
#endif

View File

@ -16,140 +16,111 @@ terms of the MIT license. A copy of the license can be found in the file
#include "mimalloc/internal.h"
#include "mimalloc/atomic.h"
#if (MI_INTPTR_SIZE>=8) && MI_TRACK_ASAN
#define MI_MAX_ADDRESS ((size_t)140 << 40) // 140TB (see issue #881)
#elif (MI_INTPTR_SIZE >= 8)
#define MI_MAX_ADDRESS ((size_t)40 << 40) // 40TB (to include huge page areas)
// Reduce total address space to reduce .bss (due to the `mi_segment_map`)
#if (MI_INTPTR_SIZE > 4) && MI_TRACK_ASAN
#define MI_SEGMENT_MAP_MAX_ADDRESS (128*1024ULL*MI_GiB) // 128 TiB (see issue #881)
#elif (MI_INTPTR_SIZE > 4)
#define MI_SEGMENT_MAP_MAX_ADDRESS (48*1024ULL*MI_GiB) // 48 TiB
#else
#define MI_MAX_ADDRESS ((size_t)2 << 30) // 2Gb
#define MI_SEGMENT_MAP_MAX_ADDRESS (MAX_UINT32)
#endif
#define MI_SEGMENT_MAP_BITS (MI_MAX_ADDRESS / MI_SEGMENT_SIZE)
#define MI_SEGMENT_MAP_SIZE (MI_SEGMENT_MAP_BITS / 8)
#define MI_SEGMENT_MAP_WSIZE (MI_SEGMENT_MAP_SIZE / MI_INTPTR_SIZE)
#define MI_SEGMENT_MAP_PART_SIZE (MI_INTPTR_SIZE*MI_KiB - 128) // 128 > sizeof(mi_memid_t) !
#define MI_SEGMENT_MAP_PART_BITS (8*MI_SEGMENT_MAP_PART_SIZE)
#define MI_SEGMENT_MAP_PART_ENTRIES (MI_SEGMENT_MAP_PART_SIZE / MI_INTPTR_SIZE)
#define MI_SEGMENT_MAP_PART_BIT_SPAN (MI_SEGMENT_ALIGN)
#define MI_SEGMENT_MAP_PART_SPAN (MI_SEGMENT_MAP_PART_BITS * MI_SEGMENT_MAP_PART_BIT_SPAN)
#define MI_SEGMENT_MAP_MAX_PARTS ((MI_SEGMENT_MAP_MAX_ADDRESS / MI_SEGMENT_MAP_PART_SPAN) + 1)
static _Atomic(uintptr_t) mi_segment_map[MI_SEGMENT_MAP_WSIZE + 1]; // 2KiB per TB with 64MiB segments
// A part of the segment map.
typedef struct mi_segmap_part_s {
mi_memid_t memid;
_Atomic(uintptr_t) map[MI_SEGMENT_MAP_PART_ENTRIES];
} mi_segmap_part_t;
static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) {
// Allocate parts on-demand to reduce .bss footprint
static _Atomic(mi_segmap_part_t*) mi_segment_map[MI_SEGMENT_MAP_MAX_PARTS]; // = { NULL, .. }
static mi_segmap_part_t* mi_segment_map_index_of(const mi_segment_t* segment, bool create_on_demand, size_t* idx, size_t* bitidx) {
// note: segment can be invalid or NULL.
mi_assert_internal(_mi_ptr_segment(segment + 1) == segment); // is it aligned on MI_SEGMENT_SIZE?
if ((uintptr_t)segment >= MI_MAX_ADDRESS) {
*bitidx = 0;
return MI_SEGMENT_MAP_WSIZE;
}
else {
const uintptr_t segindex = ((uintptr_t)segment) / MI_SEGMENT_SIZE;
*bitidx = segindex % MI_INTPTR_BITS;
const size_t mapindex = segindex / MI_INTPTR_BITS;
mi_assert_internal(mapindex < MI_SEGMENT_MAP_WSIZE);
return mapindex;
*idx = 0;
*bitidx = 0;
if ((uintptr_t)segment >= MI_SEGMENT_MAP_MAX_ADDRESS) return NULL;
const uintptr_t segindex = ((uintptr_t)segment) / MI_SEGMENT_MAP_PART_SPAN;
if (segindex >= MI_SEGMENT_MAP_MAX_PARTS) return NULL;
mi_segmap_part_t* part = mi_atomic_load_ptr_relaxed(mi_segmap_part_t, &mi_segment_map[segindex]);
// allocate on demand to reduce .bss footprint
if (part == NULL) {
if (!create_on_demand) return NULL;
mi_memid_t memid;
part = (mi_segmap_part_t*)_mi_os_alloc(sizeof(mi_segmap_part_t), &memid, NULL);
if (part == NULL) return NULL;
mi_segmap_part_t* expected = NULL;
if (!mi_atomic_cas_ptr_strong_release(mi_segmap_part_t, &mi_segment_map[segindex], &expected, part)) {
_mi_os_free(part, sizeof(mi_segmap_part_t), memid, NULL);
part = expected;
if (part == NULL) return NULL;
}
}
mi_assert(part != NULL);
const uintptr_t offset = ((uintptr_t)segment) % MI_SEGMENT_MAP_PART_SPAN;
const uintptr_t bitofs = offset / MI_SEGMENT_MAP_PART_BIT_SPAN;
*idx = bitofs / MI_INTPTR_BITS;
*bitidx = bitofs % MI_INTPTR_BITS;
return part;
}
void _mi_segment_map_allocated_at(const mi_segment_t* segment) {
if (segment->memid.memkind == MI_MEM_ARENA) return; // we lookup segments first in the arena's and don't need the segment map
size_t index;
size_t bitidx;
size_t index = mi_segment_map_index_of(segment, &bitidx);
mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE);
if (index==MI_SEGMENT_MAP_WSIZE) return;
uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
mi_segmap_part_t* part = mi_segment_map_index_of(segment, true /* alloc map if needed */, &index, &bitidx);
if (part == NULL) return; // outside our address range..
uintptr_t mask = mi_atomic_load_relaxed(&part->map[index]);
uintptr_t newmask;
do {
newmask = (mask | ((uintptr_t)1 << bitidx));
} while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask));
} while (!mi_atomic_cas_weak_release(&part->map[index], &mask, newmask));
}
void _mi_segment_map_freed_at(const mi_segment_t* segment) {
if (segment->memid.memkind == MI_MEM_ARENA) return;
size_t index;
size_t bitidx;
size_t index = mi_segment_map_index_of(segment, &bitidx);
mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE);
if (index == MI_SEGMENT_MAP_WSIZE) return;
uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
mi_segmap_part_t* part = mi_segment_map_index_of(segment, false /* don't alloc if not present */, &index, &bitidx);
if (part == NULL) return; // outside our address range..
uintptr_t mask = mi_atomic_load_relaxed(&part->map[index]);
uintptr_t newmask;
do {
newmask = (mask & ~((uintptr_t)1 << bitidx));
} while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask));
} while (!mi_atomic_cas_weak_release(&part->map[index], &mask, newmask));
}
// Determine the segment belonging to a pointer or NULL if it is not in a valid segment.
static mi_segment_t* _mi_segment_of(const void* p) {
if (p == NULL) return NULL;
mi_segment_t* segment = _mi_ptr_segment(p); // segment can be NULL
size_t index;
size_t bitidx;
size_t index = mi_segment_map_index_of(segment, &bitidx);
// fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge
const uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
mi_segmap_part_t* part = mi_segment_map_index_of(segment, false /* dont alloc if not present */, &index, &bitidx);
if (part == NULL) return NULL;
const uintptr_t mask = mi_atomic_load_relaxed(&part->map[index]);
if mi_likely((mask & ((uintptr_t)1 << bitidx)) != 0) {
bool cookie_ok = (_mi_ptr_cookie(segment) == segment->cookie);
mi_assert_internal(cookie_ok); MI_UNUSED(cookie_ok);
return segment; // yes, allocated by us
}
if (index==MI_SEGMENT_MAP_WSIZE) return NULL;
// TODO: maintain max/min allocated range for efficiency for more efficient rejection of invalid pointers?
// search downwards for the first segment in case it is an interior pointer
// could be slow but searches in MI_INTPTR_SIZE * MI_SEGMENT_SIZE (512MiB) steps trough
// valid huge objects
// note: we could maintain a lowest index to speed up the path for invalid pointers?
size_t lobitidx;
size_t loindex;
uintptr_t lobits = mask & (((uintptr_t)1 << bitidx) - 1);
if (lobits != 0) {
loindex = index;
lobitidx = mi_bsr(lobits); // lobits != 0
}
else if (index == 0) {
return NULL;
}
else {
mi_assert_internal(index > 0);
uintptr_t lomask = mask;
loindex = index;
do {
loindex--;
lomask = mi_atomic_load_relaxed(&mi_segment_map[loindex]);
} while (lomask != 0 && loindex > 0);
if (lomask == 0) return NULL;
lobitidx = mi_bsr(lomask); // lomask != 0
}
mi_assert_internal(loindex < MI_SEGMENT_MAP_WSIZE);
// take difference as the addresses could be larger than the MAX_ADDRESS space.
size_t diff = (((index - loindex) * (8*MI_INTPTR_SIZE)) + bitidx - lobitidx) * MI_SEGMENT_SIZE;
segment = (mi_segment_t*)((uint8_t*)segment - diff);
if (segment == NULL) return NULL;
mi_assert_internal((void*)segment < p);
bool cookie_ok = (_mi_ptr_cookie(segment) == segment->cookie);
mi_assert_internal(cookie_ok);
if mi_unlikely(!cookie_ok) return NULL;
if (((uint8_t*)segment + mi_segment_size(segment)) <= (uint8_t*)p) return NULL; // outside the range
mi_assert_internal(p >= (void*)segment && (uint8_t*)p < (uint8_t*)segment + mi_segment_size(segment));
return segment;
return NULL;
}
// Is this a valid pointer in our heap?
static bool mi_is_valid_pointer(const void* p) {
return ((_mi_segment_of(p) != NULL) || (_mi_arena_contains(p)));
static bool mi_is_valid_pointer(const void* p) {
// first check if it is in an arena, then check if it is OS allocated
return (_mi_arena_contains(p) || _mi_segment_of(p) != NULL);
}
mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
return mi_is_valid_pointer(p);
}
/*
// Return the full segment range belonging to a pointer
static void* mi_segment_range_of(const void* p, size_t* size) {
mi_segment_t* segment = _mi_segment_of(p);
if (segment == NULL) {
if (size != NULL) *size = 0;
return NULL;
}
else {
if (size != NULL) *size = segment->segment_size;
return segment;
}
mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld));
mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size);
mi_reset_delayed(tld);
mi_assert_internal(page == NULL || mi_page_not_in_queue(page, tld));
return page;
}
*/

View File

@ -904,8 +904,9 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi
segment->segment_info_slices = info_slices;
segment->thread_id = _mi_thread_id();
segment->cookie = _mi_ptr_cookie(segment);
segment->subproc = tld->subproc;
segment->slice_entries = slice_entries;
segment->kind = (required == 0 ? MI_SEGMENT_NORMAL : MI_SEGMENT_HUGE);
segment->kind = (required == 0 ? MI_SEGMENT_NORMAL : MI_SEGMENT_HUGE);
// _mi_memzero(segment->slices, sizeof(mi_slice_t)*(info_slices+1));
_mi_stat_increase(&tld->stats->page_committed, mi_segment_info_size(segment));
@ -1190,6 +1191,7 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap,
if (right_page_reclaimed != NULL) { *right_page_reclaimed = false; }
// can be 0 still with abandoned_next, or already a thread id for segments outside an arena that are reclaimed on a free.
mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == 0 || mi_atomic_load_relaxed(&segment->thread_id) == _mi_thread_id());
mi_assert_internal(segment->subproc == heap->tld->segments.subproc); // only reclaim within the same subprocess
mi_atomic_store_release(&segment->thread_id, _mi_thread_id());
segment->abandoned_visits = 0;
segment->was_reclaimed = true;
@ -1213,12 +1215,13 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap,
mi_assert_internal(page->next == NULL && page->prev==NULL);
_mi_stat_decrease(&tld->stats->pages_abandoned, 1);
segment->abandoned--;
// set the heap again and allow heap thread delayed free again.
// get the target heap for this thread which has a matching heap tag (so we reclaim into a matching heap)
mi_heap_t* target_heap = _mi_heap_by_tag(heap, page->heap_tag); // allow custom heaps to separate objects
if (target_heap == NULL) {
target_heap = heap;
_mi_error_message(EINVAL, "page with tag %u cannot be reclaimed by a heap with the same tag (using %u instead)\n", page->heap_tag, heap->tag );
_mi_error_message(EFAULT, "page with tag %u cannot be reclaimed by a heap with the same tag (using heap tag %u instead)\n", page->heap_tag, heap->tag );
}
// associate the heap with this page, and allow heap thread delayed free again.
mi_page_set_heap(page, target_heap);
_mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set)
_mi_page_free_collect(page, false); // ensure used count is up to date
@ -1257,7 +1260,9 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap,
// attempt to reclaim a particular segment (called from multi threaded free `alloc.c:mi_free_block_mt`)
bool _mi_segment_attempt_reclaim(mi_heap_t* heap, mi_segment_t* segment) {
if (mi_atomic_load_relaxed(&segment->thread_id) != 0) return false; // it is not abandoned
// don't reclaim more from a free than half the current segments
if (segment->subproc != heap->tld->segments.subproc) return false; // only reclaim within the same subprocess
if (!_mi_heap_memid_is_suitable(heap,segment->memid)) return false; // don't reclaim between exclusive and non-exclusive arena's
// don't reclaim more from a `free` call than half the current segments
// this is to prevent a pure free-ing thread to start owning too many segments
if (heap->tld->segments.reclaim_count * 2 > heap->tld->segments.count) return false;
if (_mi_arena_segment_clear_abandoned(segment)) { // atomically unabandon
@ -1270,17 +1275,17 @@ bool _mi_segment_attempt_reclaim(mi_heap_t* heap, mi_segment_t* segment) {
void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld) {
mi_segment_t* segment;
mi_arena_field_cursor_t current; _mi_arena_field_cursor_init(heap, &current);
while ((segment = _mi_arena_segment_clear_abandoned_next(&current)) != NULL) {
mi_arena_field_cursor_t current; _mi_arena_field_cursor_init(heap, tld->subproc, &current);
while ((segment = _mi_arena_segment_clear_abandoned_next(&current, true /* blocking */)) != NULL) {
mi_segment_reclaim(segment, heap, 0, NULL, tld);
}
}
static long mi_segment_get_reclaim_tries(void) {
static long mi_segment_get_reclaim_tries(mi_segments_tld_t* tld) {
// limit the tries to 10% (default) of the abandoned segments with at least 8 and at most 1024 tries.
const size_t perc = (size_t)mi_option_get_clamp(mi_option_max_segment_reclaim, 0, 100);
if (perc <= 0) return 0;
const size_t total_count = _mi_arena_segment_abandoned_count();
const size_t total_count = mi_atomic_load_relaxed(&tld->subproc->abandoned_count);
if (total_count == 0) return 0;
const size_t relative_count = (total_count > 10000 ? (total_count / 100) * perc : (total_count * perc) / 100); // avoid overflow
long max_tries = (long)(relative_count <= 1 ? 1 : (relative_count > 1024 ? 1024 : relative_count));
@ -1291,13 +1296,14 @@ static long mi_segment_get_reclaim_tries(void) {
static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slices, size_t block_size, bool* reclaimed, mi_segments_tld_t* tld)
{
*reclaimed = false;
long max_tries = mi_segment_get_reclaim_tries();
long max_tries = mi_segment_get_reclaim_tries(tld);
if (max_tries <= 0) return NULL;
mi_segment_t* segment;
mi_arena_field_cursor_t current; _mi_arena_field_cursor_init(heap, &current);
while ((max_tries-- > 0) && ((segment = _mi_arena_segment_clear_abandoned_next(&current)) != NULL))
mi_arena_field_cursor_t current; _mi_arena_field_cursor_init(heap, tld->subproc, &current);
while ((max_tries-- > 0) && ((segment = _mi_arena_segment_clear_abandoned_next(&current, false /* non-blocking */)) != NULL))
{
mi_assert(segment->subproc == heap->tld->segments.subproc); // cursor only visits segments in our sub-process
segment->abandoned_visits++;
// todo: should we respect numa affinity for abondoned reclaim? perhaps only for the first visit?
// todo: an arena exclusive heap will potentially visit many abandoned unsuitable segments and use many tries
@ -1335,9 +1341,9 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice
void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld)
{
mi_segment_t* segment;
mi_arena_field_cursor_t current; _mi_arena_field_cursor_init(heap, &current);
long max_tries = (force ? (long)_mi_arena_segment_abandoned_count() : 1024); // limit latency
while ((max_tries-- > 0) && ((segment = _mi_arena_segment_clear_abandoned_next(&current)) != NULL)) {
mi_arena_field_cursor_t current; _mi_arena_field_cursor_init(heap, tld->subproc, &current);
long max_tries = (force ? (long)mi_atomic_load_relaxed(&tld->subproc->abandoned_count) : 1024); // limit latency
while ((max_tries-- > 0) && ((segment = _mi_arena_segment_clear_abandoned_next(&current, force /* blocking? */)) != NULL)) {
mi_segment_check_free(segment,0,0,tld); // try to free up pages (due to concurrent frees)
if (segment->used == 0) {
// free the segment (by forced reclaim) to make it available to other threads.
@ -1518,7 +1524,38 @@ mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t pag
}
mi_assert_internal(page == NULL || _mi_heap_memid_is_suitable(heap, _mi_page_segment(page)->memid));
mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld));
mi_assert_internal(page == NULL || _mi_page_segment(page)->subproc == tld->subproc);
return page;
}
/* -----------------------------------------------------------
Visit blocks in a segment (only used for abandoned segments)
----------------------------------------------------------- */
static bool mi_segment_visit_page(mi_page_t* page, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) {
mi_heap_area_t area;
_mi_heap_area_init(&area, page);
if (!visitor(NULL, &area, NULL, area.block_size, arg)) return false;
if (visit_blocks) {
return _mi_heap_area_visit_blocks(&area, page, visitor, arg);
}
else {
return true;
}
}
bool _mi_segment_visit_blocks(mi_segment_t* segment, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) {
const mi_slice_t* end;
mi_slice_t* slice = mi_slices_start_iterate(segment, &end);
while (slice < end) {
if (mi_slice_is_used(slice)) {
mi_page_t* const page = mi_slice_to_page(slice);
if (heap_tag < 0 || (int)page->heap_tag == heap_tag) {
if (!mi_segment_visit_page(page, visit_blocks, visitor, arg)) return false;
}
}
slice = slice + slice->slice_count;
}
return true;
}

View File

@ -19,7 +19,7 @@
#endif
#ifdef _WIN32
#include <Windows.h>
#include <windows.h>
static void msleep(unsigned long msecs) { Sleep(msecs); }
#else
#include <unistd.h>
@ -46,7 +46,7 @@ static void test_stl_allocators();
int main() {
// mi_stats_reset(); // ignore earlier allocations
// test_std_string();
// heap_thread_free_huge();
/*

View File

@ -39,6 +39,10 @@ static int ITER = 50; // N full iterations destructing and re-creating a
#define STRESS // undefine for leak test
#ifndef NDEBUG
#define HEAP_WALK // walk the heap objects?
#endif
static bool allow_large_objects = true; // allow very large objects? (set to `true` if SCALE>100)
static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`?
@ -129,6 +133,16 @@ static void free_items(void* p) {
custom_free(p);
}
#ifdef HEAP_WALK
static bool visit_blocks(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg) {
(void)(heap); (void)(area);
size_t* total = (size_t*)arg;
if (block != NULL) {
*total += block_size;
}
return true;
}
#endif
static void stress(intptr_t tid) {
//bench_start_thread();
@ -173,6 +187,13 @@ static void stress(intptr_t tid) {
data[data_idx] = q;
}
}
#ifdef HEAP_WALK
// walk the heap
size_t total = 0;
mi_heap_visit_blocks(mi_heap_get_default(), true, visit_blocks, &total);
#endif
// free everything that is left
for (size_t i = 0; i < retain_top; i++) {
free_items(retained[i]);
@ -190,7 +211,11 @@ static void run_os_threads(size_t nthreads, void (*entry)(intptr_t tid));
static void test_stress(void) {
uintptr_t r = rand();
for (int n = 0; n < ITER; n++) {
run_os_threads(THREADS, &stress);
run_os_threads(THREADS, &stress);
#ifdef HEAP_WALK
size_t total = 0;
mi_abandoned_visit_blocks(mi_subproc_main(), -1, true, visit_blocks, &total);
#endif
for (int i = 0; i < TRANSFERS; i++) {
if (chance(50, &r) || n + 1 == ITER) { // free all on last run, otherwise free half of the transfers
void* p = atomic_exchange_ptr(&transfer[i], NULL);
@ -200,7 +225,7 @@ static void test_stress(void) {
#ifndef NDEBUG
//mi_collect(false);
//mi_debug_show_arenas();
#endif
#endif
#if !defined(NDEBUG) || defined(MI_TSAN)
if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
#endif
@ -230,9 +255,15 @@ static void test_leak(void) {
#endif
int main(int argc, char** argv) {
#ifdef HEAP_WALK
mi_option_enable(mi_option_visit_abandoned);
#endif
#ifndef NDEBUG
mi_option_set(mi_option_arena_reserve, 32 * 1024 /* in kib = 32MiB */);
#endif
#ifndef USE_STD_MALLOC
mi_stats_reset();
#endif
#endif
// > mimalloc-test-stress [THREADS] [SCALE] [ITER]
if (argc >= 2) {
@ -291,7 +322,7 @@ static void (*thread_entry_fun)(intptr_t) = &stress;
#ifdef _WIN32
#include <Windows.h>
#include <windows.h>
static DWORD WINAPI thread_entry(LPVOID param) {
thread_entry_fun((intptr_t)param);