diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index d13f7a55..993ba754 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -194,11 +194,11 @@ bool _mi_page_is_valid(mi_page_t* page); /* ----------------------------------------------------------- Inlined definitions ----------------------------------------------------------- */ -#define UNUSED(x) (void)(x) +#define MI_UNUSED(x) (void)(x) #if (MI_DEBUG>0) -#define UNUSED_RELEASE(x) +#define MI_UNUSED_RELEASE(x) #else -#define UNUSED_RELEASE(x) UNUSED(x) +#define MI_UNUSED_RELEASE(x) MI_UNUSED(x) #endif #define MI_INIT4(x) x(),x(),x(),x() @@ -454,7 +454,7 @@ static inline mi_slice_t* mi_slice_first(const mi_slice_t* slice) { static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) { ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment; mi_assert_internal(diff >= 0 && diff < (ptrdiff_t)MI_SEGMENT_SIZE); - uintptr_t idx = (uintptr_t)diff >> MI_SEGMENT_SLICE_SHIFT; + size_t idx = (size_t)diff >> MI_SEGMENT_SLICE_SHIFT; mi_assert_internal(idx < segment->slice_entries); mi_slice_t* slice0 = (mi_slice_t*)&segment->slices[idx]; mi_slice_t* slice = mi_slice_first(slice0); // adjust to the block that holds the page data @@ -648,7 +648,7 @@ static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* bl #ifdef MI_ENCODE_FREELIST return (mi_block_t*)mi_ptr_decode(null, block->next, keys); #else - UNUSED(keys); UNUSED(null); + MI_UNUSED(keys); MI_UNUSED(null); return (mi_block_t*)block->next; #endif } @@ -657,7 +657,7 @@ static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const #ifdef MI_ENCODE_FREELIST block->next = mi_ptr_encode(null, next, keys); #else - UNUSED(keys); UNUSED(null); + MI_UNUSED(keys); MI_UNUSED(null); block->next = (mi_encoded_t)next; #endif } @@ -673,7 +673,7 @@ static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* } return next; #else - UNUSED(page); + MI_UNUSED(page); return mi_block_nextx(page,block,NULL); #endif } @@ -682,7 +682,7 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c #ifdef MI_ENCODE_FREELIST mi_block_set_nextx(page,block,next, page->keys); #else - UNUSED(page); + MI_UNUSED(page); mi_block_set_nextx(page,block,next,NULL); #endif } @@ -832,7 +832,7 @@ static inline size_t _mi_os_numa_node_count(void) { #if defined(_WIN32) #define WIN32_LEAN_AND_MEAN #include -static inline uintptr_t _mi_thread_id(void) mi_attr_noexcept { +static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept { // Windows: works on Intel and ARM in both 32- and 64-bit return (uintptr_t)NtCurrentTeb(); } @@ -853,11 +853,11 @@ static inline void* mi_tls_slot(size_t slot) mi_attr_noexcept { #elif defined(__x86_64__) __asm__("movq %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 Linux, BSD uses FS #elif defined(__arm__) - void** tcb; UNUSED(ofs); + void** tcb; MI_UNUSED(ofs); __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb)); res = tcb[slot]; #elif defined(__aarch64__) - void** tcb; UNUSED(ofs); + void** tcb; MI_UNUSED(ofs); #if defined(__APPLE__) // M1, issue #343 __asm__ volatile ("mrs %0, tpidrro_el0" : "=r" (tcb)); tcb = (void**)((uintptr_t)tcb & ~0x07UL); // clear lower 3 bits @@ -881,11 +881,11 @@ static inline void mi_tls_slot_set(size_t slot, void* value) mi_attr_noexcept { #elif defined(__x86_64__) __asm__("movq %1,%%fs:%1" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 Linux, BSD uses FS #elif defined(__arm__) - void** tcb; UNUSED(ofs); + void** tcb; MI_UNUSED(ofs); __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb)); tcb[slot] = value; #elif defined(__aarch64__) - void** tcb; UNUSED(ofs); + void** tcb; MI_UNUSED(ofs); #if defined(__APPLE__) // M1, issue #343 __asm__ volatile ("mrs %0, tpidrro_el0" : "=r" (tcb)); tcb = (void**)((uintptr_t)tcb & ~0x07UL); // clear lower 3 bits @@ -896,7 +896,7 @@ static inline void mi_tls_slot_set(size_t slot, void* value) mi_attr_noexcept { #endif } -static inline uintptr_t _mi_thread_id(void) mi_attr_noexcept { +static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept { #if defined(__BIONIC__) && (defined(__arm__) || defined(__aarch64__)) // on Android, slot 1 is the thread ID (pointer to pthread internal struct) return (uintptr_t)mi_tls_slot(1); @@ -907,7 +907,7 @@ static inline uintptr_t _mi_thread_id(void) mi_attr_noexcept { } #else // otherwise use standard C -static inline uintptr_t _mi_thread_id(void) mi_attr_noexcept { +static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept { return (uintptr_t)&_mi_heap_default; } #endif diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 8d1e5149..24cffe6d 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -17,7 +17,7 @@ terms of the MIT license. A copy of the license can be found in the file #endif // Minimal alignment necessary. On most platforms 16 bytes are needed -// due to SSE registers for example. This must be at least `MI_INTPTR_SIZE` +// due to SSE registers for example. This must be at least `sizeof(void*)` #ifndef MI_MAX_ALIGN_SIZE #define MI_MAX_ALIGN_SIZE 16 // sizeof(max_align_t) #endif @@ -67,6 +67,7 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_ENCODE_FREELIST 1 #endif + // ------------------------------------------------------ // Platform specific values // ------------------------------------------------------ @@ -83,20 +84,43 @@ terms of the MIT license. A copy of the license can be found in the file // or otherwise one might define an intptr_t type that is larger than a pointer... // ------------------------------------------------------ -#if INTPTR_MAX == 9223372036854775807LL +#if INTPTR_MAX > INT64_MAX +# define MI_INTPTR_SHIFT (4) // assume 128-bit (as on arm CHERI for example) +#elif INTPTR_MAX == INT64_MAX # define MI_INTPTR_SHIFT (3) -#elif INTPTR_MAX == 2147483647LL +#elif INTPTR_MAX == INT32_MAX # define MI_INTPTR_SHIFT (2) #else -#error platform must be 32 or 64 bits +#error platform pointers must be 32, 64, or 128 bits +#endif + +#if SIZE_MAX == UINT64_MAX +# define MI_SIZE_SHIFT (3) +typedef int64_t mi_ssize_t; +#elif SIZE_MAX == UINT32_MAX +# define MI_SIZE_SHIFT (2) +typedef int32_t mi_ssize_t; +#else +#error platform objects must be 32 or 64 bits +#endif + +#if (SIZE_MAX/2) > LONG_MAX +# define MI_ZU(x) x##ULL +# define MI_ZI(x) x##LL +#else +# define MI_ZU(x) x##UL +# define MI_ZI(x) x##L #endif #define MI_INTPTR_SIZE (1<blocks_in_use = 0; stats->size_in_use = 0; @@ -171,7 +171,7 @@ static void intro_statistics(malloc_zone_t* zone, malloc_statistics_t* stats) { } static boolean_t intro_zone_locked(malloc_zone_t* zone) { - UNUSED(zone); + MI_UNUSED(zone); return false; } @@ -261,7 +261,7 @@ mi_decl_externc void _malloc_fork_child(void); static malloc_zone_t* mi_malloc_create_zone(vm_size_t size, unsigned flags) { - UNUSED(size); UNUSED(flags); + MI_UNUSED(size); MI_UNUSED(flags); return mi_get_default_zone(); } @@ -274,12 +274,12 @@ static malloc_zone_t* mi_malloc_default_purgeable_zone(void) { } static void mi_malloc_destroy_zone(malloc_zone_t* zone) { - UNUSED(zone); + MI_UNUSED(zone); // nothing. } static kern_return_t mi_malloc_get_all_zones (task_t task, memory_reader_t mr, vm_address_t** addresses, unsigned* count) { - UNUSED(task); UNUSED(mr); + MI_UNUSED(task); MI_UNUSED(mr); if (addresses != NULL) *addresses = NULL; if (count != NULL) *count = 0; return KERN_SUCCESS; @@ -290,11 +290,11 @@ static const char* mi_malloc_get_zone_name(malloc_zone_t* zone) { } static void mi_malloc_set_zone_name(malloc_zone_t* zone, const char* name) { - UNUSED(zone); UNUSED(name); + MI_UNUSED(zone); MI_UNUSED(name); } static int mi_malloc_jumpstart(uintptr_t cookie) { - UNUSED(cookie); + MI_UNUSED(cookie); return 1; // or 0 for no error? } @@ -309,37 +309,37 @@ static void mi__malloc_fork_child(void) { } static void mi_malloc_printf(const char* fmt, ...) { - UNUSED(fmt); + MI_UNUSED(fmt); } static bool zone_check(malloc_zone_t* zone) { - UNUSED(zone); + MI_UNUSED(zone); return true; } static malloc_zone_t* zone_from_ptr(const void* p) { - UNUSED(p); + MI_UNUSED(p); return mi_get_default_zone(); } static void zone_log(malloc_zone_t* zone, void* p) { - UNUSED(zone); UNUSED(p); + MI_UNUSED(zone); MI_UNUSED(p); } static void zone_print(malloc_zone_t* zone, bool b) { - UNUSED(zone); UNUSED(b); + MI_UNUSED(zone); MI_UNUSED(b); } static void zone_print_ptr_info(void* p) { - UNUSED(p); + MI_UNUSED(p); } static void zone_register(malloc_zone_t* zone) { - UNUSED(zone); + MI_UNUSED(zone); } static void zone_unregister(malloc_zone_t* zone) { - UNUSED(zone); + MI_UNUSED(zone); } // use interposing so `DYLD_INSERT_LIBRARIES` works without `DYLD_FORCE_FLAT_NAMESPACE=1` diff --git a/src/alloc-override.c b/src/alloc-override.c index fa04b460..42fecbb3 100644 --- a/src/alloc-override.c +++ b/src/alloc-override.c @@ -147,8 +147,8 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t; void* operator new(std::size_t n) noexcept(false) MI_FORWARD1(mi_new,n) void* operator new[](std::size_t n) noexcept(false) MI_FORWARD1(mi_new,n) - void* operator new (std::size_t n, const std::nothrow_t& tag) noexcept { UNUSED(tag); return mi_new_nothrow(n); } - void* operator new[](std::size_t n, const std::nothrow_t& tag) noexcept { UNUSED(tag); return mi_new_nothrow(n); } + void* operator new (std::size_t n, const std::nothrow_t& tag) noexcept { MI_UNUSED(tag); return mi_new_nothrow(n); } + void* operator new[](std::size_t n, const std::nothrow_t& tag) noexcept { MI_UNUSED(tag); return mi_new_nothrow(n); } #if (__cplusplus >= 201402L || _MSC_VER >= 1916) void operator delete (void* p, std::size_t n) noexcept MI_FORWARD02(mi_free_size,p,n) @@ -187,21 +187,21 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t; #if (MI_INTPTR_SIZE==8) void* _Znwm(size_t n) MI_FORWARD1(mi_new,n) // new 64-bit void* _Znam(size_t n) MI_FORWARD1(mi_new,n) // new[] 64-bit - void* _ZnwmRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { UNUSED(tag); return mi_new_nothrow(n); } - void* _ZnamRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { UNUSED(tag); return mi_new_nothrow(n); } + void* _ZnwmRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_nothrow(n); } + void* _ZnamRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_nothrow(n); } void* _ZnwmSt11align_val_t(size_t n, size_t al) MI_FORWARD2(mi_new_aligned, n, al) void* _ZnamSt11align_val_t(size_t n, size_t al) MI_FORWARD2(mi_new_aligned, n, al) - void* _ZnwmSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { UNUSED(tag); return mi_new_aligned_nothrow(n,al); } - void* _ZnamSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { UNUSED(tag); return mi_new_aligned_nothrow(n,al); } + void* _ZnwmSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_aligned_nothrow(n,al); } + void* _ZnamSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_aligned_nothrow(n,al); } #elif (MI_INTPTR_SIZE==4) void* _Znwj(size_t n) MI_FORWARD1(mi_new,n) // new 64-bit void* _Znaj(size_t n) MI_FORWARD1(mi_new,n) // new[] 64-bit - void* _ZnwjRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { UNUSED(tag); return mi_new_nothrow(n); } - void* _ZnajRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { UNUSED(tag); return mi_new_nothrow(n); } + void* _ZnwjRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_nothrow(n); } + void* _ZnajRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_nothrow(n); } void* _ZnwjSt11align_val_t(size_t n, size_t al) MI_FORWARD2(mi_new_aligned, n, al) void* _ZnajSt11align_val_t(size_t n, size_t al) MI_FORWARD2(mi_new_aligned, n, al) - void* _ZnwjSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { UNUSED(tag); return mi_new_aligned_nothrow(n,al); } - void* _ZnajSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { UNUSED(tag); return mi_new_aligned_nothrow(n,al); } + void* _ZnwjSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_aligned_nothrow(n,al); } + void* _ZnajSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_aligned_nothrow(n,al); } #else #error "define overloads for new/delete for this platform (just for performance, can be skipped)" #endif diff --git a/src/alloc.c b/src/alloc.c index ecff9fe8..59b226c1 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -123,7 +123,7 @@ extern inline mi_decl_restrict void* mi_malloc(size_t size) mi_attr_noexcept { void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size) { // note: we need to initialize the whole usable block size to zero, not just the requested size, // or the recalloc/rezalloc functions cannot safely expand in place (see issue #63) - UNUSED(size); + MI_UNUSED(size); mi_assert_internal(p != NULL); mi_assert_internal(mi_usable_size(p) >= size); // size can be zero mi_assert_internal(_mi_ptr_page(p)==page); @@ -205,8 +205,8 @@ static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block } #else static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) { - UNUSED(page); - UNUSED(block); + MI_UNUSED(page); + MI_UNUSED(block); return false; } #endif @@ -278,19 +278,19 @@ static void mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, co } #else static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { - UNUSED(page); - UNUSED(block); + MI_UNUSED(page); + MI_UNUSED(block); } static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) { - UNUSED(block); + MI_UNUSED(block); return mi_page_usable_block_size(page); } static void mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) { - UNUSED(page); - UNUSED(block); - UNUSED(min_size); + MI_UNUSED(page); + MI_UNUSED(block); + MI_UNUSED(min_size); } #endif @@ -298,7 +298,7 @@ static void mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, co #if (MI_STAT>0) static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) { #if (MI_STAT < 2) - UNUSED(block); + MI_UNUSED(block); #endif mi_heap_t* const heap = mi_heap_get_default(); const size_t bsize = mi_page_usable_block_size(page); @@ -315,7 +315,7 @@ static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) { } #else static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) { - UNUSED(page); UNUSED(block); + MI_UNUSED(page); MI_UNUSED(block); } #endif @@ -333,7 +333,7 @@ static void mi_stat_huge_free(const mi_page_t* page) { } #else static void mi_stat_huge_free(const mi_page_t* page) { - UNUSED(page); + MI_UNUSED(page); } #endif @@ -447,7 +447,7 @@ static void mi_decl_noinline mi_free_generic(const mi_segment_t* segment, bool l // (and secure mode) if this was a valid pointer. static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* msg) { - UNUSED(msg); + MI_UNUSED(msg); #if (MI_DEBUG>0) if (mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0)) { _mi_error_message(EINVAL, "%s: invalid (unaligned) pointer: %p\n", msg, p); @@ -482,7 +482,7 @@ void mi_free(void* p) mi_attr_noexcept const mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free"); if (mi_unlikely(segment == NULL)) return; - const uintptr_t tid = _mi_thread_id(); + const mi_threadid_t tid = _mi_thread_id(); mi_page_t* const page = _mi_segment_page_of(segment, p); if (mi_likely(tid == mi_atomic_load_relaxed(&segment->thread_id) && page->flags.full_aligned == 0)) { // the thread id matches and it is not a full page, nor has aligned blocks @@ -574,19 +574,19 @@ void* _mi_externs[] = { // ------------------------------------------------------ void mi_free_size(void* p, size_t size) mi_attr_noexcept { - UNUSED_RELEASE(size); + MI_UNUSED_RELEASE(size); mi_assert(p == NULL || size <= _mi_usable_size(p,"mi_free_size")); mi_free(p); } void mi_free_size_aligned(void* p, size_t size, size_t alignment) mi_attr_noexcept { - UNUSED_RELEASE(alignment); + MI_UNUSED_RELEASE(alignment); mi_assert(((uintptr_t)p % alignment) == 0); mi_free_size(p,size); } void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept { - UNUSED_RELEASE(alignment); + MI_UNUSED_RELEASE(alignment); mi_assert(((uintptr_t)p % alignment) == 0); mi_free(p); } diff --git a/src/arena.c b/src/arena.c index 3072dbae..ac57ad8e 100644 --- a/src/arena.c +++ b/src/arena.c @@ -151,7 +151,7 @@ static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t aren static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { - UNUSED_RELEASE(alignment); + MI_UNUSED_RELEASE(alignment); mi_assert_internal(alignment <= MI_SEGMENT_ALIGN); const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); const size_t bcount = mi_block_count_of_size(size); @@ -437,7 +437,7 @@ int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t } int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept { - UNUSED(max_secs); + MI_UNUSED(max_secs); _mi_warning_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n"); if (pages_reserved != NULL) *pages_reserved = 0; int err = mi_reserve_huge_os_pages_interleave(pages, 0, (size_t)(max_secs * 1000.0)); diff --git a/src/bitmap.c b/src/bitmap.c index 50fd4742..6b0183b0 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -121,7 +121,7 @@ bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, const size_t idx = mi_bitmap_index_field(bitmap_idx); const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); const uintptr_t mask = mi_bitmap_mask_(count, bitidx); - mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); + mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); // mi_assert_internal((bitmap[idx] & mask) == mask); uintptr_t prev = mi_atomic_and_acq_rel(&bitmap[idx], ~mask); return ((prev & mask) == mask); @@ -134,7 +134,7 @@ bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi const size_t idx = mi_bitmap_index_field(bitmap_idx); const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); const uintptr_t mask = mi_bitmap_mask_(count, bitidx); - mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); + mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); //mi_assert_internal(any_zero != NULL || (bitmap[idx] & mask) == 0); uintptr_t prev = mi_atomic_or_acq_rel(&bitmap[idx], mask); if (any_zero != NULL) *any_zero = ((prev & mask) != mask); @@ -146,7 +146,7 @@ static bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_fields, size const size_t idx = mi_bitmap_index_field(bitmap_idx); const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); const uintptr_t mask = mi_bitmap_mask_(count, bitidx); - mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); + mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); uintptr_t field = mi_atomic_load_relaxed(&bitmap[idx]); if (any_ones != NULL) *any_ones = ((field & mask) != 0); return ((field & mask) == mask); @@ -280,7 +280,7 @@ bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitm // Helper for masks across fields; returns the mid count, post_mask may be 0 static size_t mi_bitmap_mask_across(mi_bitmap_index_t bitmap_idx, size_t bitmap_fields, size_t count, uintptr_t* pre_mask, uintptr_t* mid_mask, uintptr_t* post_mask) { - UNUSED_RELEASE(bitmap_fields); + MI_UNUSED_RELEASE(bitmap_fields); const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); if (mi_likely(bitidx + count <= MI_BITMAP_FIELD_BITS)) { *pre_mask = mi_bitmap_mask_(count, bitidx); diff --git a/src/heap.c b/src/heap.c index 1831fce8..d7975b0b 100644 --- a/src/heap.c +++ b/src/heap.c @@ -50,9 +50,9 @@ static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void #if MI_DEBUG>=2 static bool mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) { - UNUSED(arg1); - UNUSED(arg2); - UNUSED(pq); + MI_UNUSED(arg1); + MI_UNUSED(arg2); + MI_UNUSED(pq); mi_assert_internal(mi_page_heap(page) == heap); mi_segment_t* segment = _mi_page_segment(page); mi_assert_internal(segment->thread_id == heap->thread_id); @@ -86,8 +86,8 @@ typedef enum mi_collect_e { static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg_collect, void* arg2 ) { - UNUSED(arg2); - UNUSED(heap); + MI_UNUSED(arg2); + MI_UNUSED(heap); mi_assert_internal(mi_heap_page_is_valid(heap, pq, page, NULL, NULL)); mi_collect_t collect = *((mi_collect_t*)arg_collect); _mi_page_free_collect(page, collect >= MI_FORCE); @@ -104,10 +104,10 @@ static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t } static bool mi_heap_page_never_delayed_free(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) { - UNUSED(arg1); - UNUSED(arg2); - UNUSED(heap); - UNUSED(pq); + MI_UNUSED(arg1); + MI_UNUSED(arg2); + MI_UNUSED(heap); + MI_UNUSED(pq); _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false); return true; // don't break } @@ -262,10 +262,10 @@ static void mi_heap_free(mi_heap_t* heap) { ----------------------------------------------------------- */ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) { - UNUSED(arg1); - UNUSED(arg2); - UNUSED(heap); - UNUSED(pq); + MI_UNUSED(arg1); + MI_UNUSED(arg2); + MI_UNUSED(heap); + MI_UNUSED(pq); // ensure no more thread_delayed_free will be added _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false); @@ -422,8 +422,8 @@ bool mi_heap_contains_block(mi_heap_t* heap, const void* p) { static bool mi_heap_page_check_owned(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* p, void* vfound) { - UNUSED(heap); - UNUSED(pq); + MI_UNUSED(heap); + MI_UNUSED(pq); bool* found = (bool*)vfound; mi_segment_t* segment = _mi_page_segment(page); void* start = _mi_page_start(segment, page, NULL); @@ -521,8 +521,8 @@ typedef bool (mi_heap_area_visit_fun)(const mi_heap_t* heap, const mi_heap_area_ static bool mi_heap_visit_areas_page(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* vfun, void* arg) { - UNUSED(heap); - UNUSED(pq); + MI_UNUSED(heap); + MI_UNUSED(pq); mi_heap_area_visit_fun* fun = (mi_heap_area_visit_fun*)vfun; mi_heap_area_ex_t xarea; const size_t bsize = mi_page_block_size(page); diff --git a/src/init.c b/src/init.c index a7e84bf1..71e7e3e6 100644 --- a/src/init.c +++ b/src/init.c @@ -477,7 +477,7 @@ static void mi_process_load(void) { mi_heap_main_init(); #if defined(MI_TLS_RECURSE_GUARD) volatile mi_heap_t* dummy = _mi_heap_default; // access TLS to allocate it before setting tls_initialized to true; - UNUSED(dummy); + MI_UNUSED(dummy); #endif os_preloading = false; atexit(&mi_process_done); @@ -536,7 +536,7 @@ void mi_process_init(void) mi_attr_noexcept { if (mi_option_is_enabled(mi_option_reserve_os_memory)) { long ksize = mi_option_get(mi_option_reserve_os_memory); if (ksize > 0) { - mi_reserve_os_memory((size_t)ksize*KiB, true /* commit? */, true /* allow large pages? */); + mi_reserve_os_memory((size_t)ksize*MI_KiB, true /* commit? */, true /* allow large pages? */); } } } @@ -575,8 +575,8 @@ static void mi_process_done(void) { #if defined(_WIN32) && defined(MI_SHARED_LIB) // Windows DLL: easy to hook into process_init and thread_done __declspec(dllexport) BOOL WINAPI DllMain(HINSTANCE inst, DWORD reason, LPVOID reserved) { - UNUSED(reserved); - UNUSED(inst); + MI_UNUSED(reserved); + MI_UNUSED(inst); if (reason==DLL_PROCESS_ATTACH) { mi_process_load(); } diff --git a/src/options.c b/src/options.c index 261dcc14..eb60cc90 100644 --- a/src/options.c +++ b/src/options.c @@ -106,7 +106,7 @@ void _mi_options_init(void) { mi_add_stderr_output(); // now it safe to use stderr for output for(int i = 0; i < _mi_option_last; i++ ) { mi_option_t option = (mi_option_t)i; - long l = mi_option_get(option); UNUSED(l); // initialize + long l = mi_option_get(option); MI_UNUSED(l); // initialize if (option != mi_option_verbose) { mi_option_desc_t* desc = &options[option]; _mi_verbose_message("option '%s': %ld\n", desc->name, desc->value); @@ -164,7 +164,7 @@ void mi_option_disable(mi_option_t option) { static void mi_out_stderr(const char* msg, void* arg) { - UNUSED(arg); + MI_UNUSED(arg); #ifdef _WIN32 // on windows with redirection, the C runtime cannot handle locale dependent output // after the main thread closes so we use direct console output. @@ -185,7 +185,7 @@ static char out_buf[MI_MAX_DELAY_OUTPUT+1]; static _Atomic(uintptr_t) out_len; static void mi_out_buf(const char* msg, void* arg) { - UNUSED(arg); + MI_UNUSED(arg); if (msg==NULL) return; if (mi_atomic_load_relaxed(&out_len)>=MI_MAX_DELAY_OUTPUT) return; size_t n = strlen(msg); @@ -356,7 +356,7 @@ static mi_error_fun* volatile mi_error_handler; // = NULL static _Atomic(void*) mi_error_arg; // = NULL static void mi_error_default(int err) { - UNUSED(err); + MI_UNUSED(err); #if (MI_DEBUG>0) if (err==EFAULT) { #ifdef _MSC_VER @@ -414,9 +414,9 @@ static void mi_strlcat(char* dest, const char* src, size_t dest_size) { #ifdef MI_NO_GETENV static bool mi_getenv(const char* name, char* result, size_t result_size) { - UNUSED(name); - UNUSED(result); - UNUSED(result_size); + MI_UNUSED(name); + MI_UNUSED(result); + MI_UNUSED(result_size); return false; } #else @@ -524,9 +524,9 @@ static void mi_option_init(mi_option_desc_t* desc) { if (desc->option == mi_option_reserve_os_memory) { // this option is interpreted in KiB to prevent overflow of `long` if (*end == 'K') { end++; } - else if (*end == 'M') { value *= KiB; end++; } - else if (*end == 'G') { value *= MiB; end++; } - else { value = (value + KiB - 1) / KiB; } + else if (*end == 'M') { value *= MI_KiB; end++; } + else if (*end == 'G') { value *= MI_MiB; end++; } + else { value = (value + MI_KiB - 1) / MI_KiB; } if (*end == 'B') { end++; } } if (*end == 0) { diff --git a/src/os.c b/src/os.c index bed61182..772f0b52 100644 --- a/src/os.c +++ b/src/os.c @@ -111,11 +111,11 @@ static bool use_large_os_page(size_t size, size_t alignment) { // round to a good OS allocation size (bounded by max 12.5% waste) size_t _mi_os_good_alloc_size(size_t size) { size_t align_size; - if (size < 512*KiB) align_size = _mi_os_page_size(); - else if (size < 2*MiB) align_size = 64*KiB; - else if (size < 8*MiB) align_size = 256*KiB; - else if (size < 32*MiB) align_size = 1*MiB; - else align_size = 4*MiB; + if (size < 512*MI_KiB) align_size = _mi_os_page_size(); + else if (size < 2*MI_MiB) align_size = 64*MI_KiB; + else if (size < 8*MI_MiB) align_size = 256*MI_KiB; + else if (size < 32*MI_MiB) align_size = 1*MI_MiB; + else align_size = 4*MI_MiB; if (mi_unlikely(size >= (SIZE_MAX - align_size))) return size; // possible overflow? return _mi_align_up(size, align_size); } @@ -252,7 +252,7 @@ void _mi_os_init() { os_page_size = (size_t)result; os_alloc_granularity = os_page_size; } - large_os_page_size = 2*MiB; // TODO: can we query the OS for this? + large_os_page_size = 2*MI_MiB; // TODO: can we query the OS for this? os_detect_overcommit(); } #endif @@ -406,7 +406,7 @@ static void* mi_wasm_heap_grow(size_t size, size_t try_alignment) { #else #define MI_OS_USE_MMAP static void* mi_unix_mmapx(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) { - UNUSED(try_alignment); + MI_UNUSED(try_alignment); #if defined(MAP_ALIGNED) // BSD if (addr == NULL && try_alignment > 0 && (try_alignment % _mi_os_page_size()) == 0) { size_t n = mi_bsr(try_alignment); @@ -497,7 +497,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro #endif #ifdef MAP_HUGE_1GB static bool mi_huge_pages_available = true; - if ((size % GiB) == 0 && mi_huge_pages_available) { + if ((size % MI_GiB) == 0 && mi_huge_pages_available) { lflags |= MAP_HUGE_1GB; } else @@ -585,7 +585,7 @@ static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { if (try_alignment == 0 || try_alignment > MI_SEGMENT_SIZE) return NULL; if ((size%MI_SEGMENT_SIZE) != 0) return NULL; - if (size > 1*GiB) return NULL; // guarantee the chance of fixed valid address is at most 1/(KK_HINT_AREA / 1<<30) = 1/4096. + if (size > 1*MI_GiB) return NULL; // guarantee the chance of fixed valid address is at most 1/(KK_HINT_AREA / 1<<30) = 1/4096. #if (MI_SECURE>0) size += MI_SEGMENT_SIZE; // put in `MI_SEGMENT_SIZE` virtual gaps between hinted blocks; this splits VLA's but increases guarded areas. #endif @@ -608,7 +608,7 @@ static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) // no need for mi_os_get_aligned_hint #else static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { - UNUSED(try_alignment); UNUSED(size); + MI_UNUSED(try_alignment); MI_UNUSED(size); return NULL; } #endif @@ -735,7 +735,7 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, ----------------------------------------------------------- */ void* _mi_os_alloc(size_t size, mi_stats_t* tld_stats) { - UNUSED(tld_stats); + MI_UNUSED(tld_stats); mi_stats_t* stats = &_mi_stats_main; if (size == 0) return NULL; size = _mi_os_good_alloc_size(size); @@ -744,7 +744,7 @@ void* _mi_os_alloc(size_t size, mi_stats_t* tld_stats) { } void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* tld_stats) { - UNUSED(tld_stats); + MI_UNUSED(tld_stats); mi_stats_t* stats = &_mi_stats_main; if (size == 0 || p == NULL) return; size = _mi_os_good_alloc_size(size); @@ -757,7 +757,7 @@ void _mi_os_free(void* p, size_t size, mi_stats_t* stats) { void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_stats_t* tld_stats) { - UNUSED(tld_stats); + MI_UNUSED(tld_stats); if (size == 0) return NULL; size = _mi_os_good_alloc_size(size); alignment = _mi_align_up(alignment, _mi_os_page_size()); @@ -808,7 +808,7 @@ static void mi_mprotect_hint(int err) { " > sudo sysctl -w vm.max_map_count=262144\n"); } #else - UNUSED(err); + MI_UNUSED(err); #endif } @@ -883,13 +883,13 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ } bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) { - UNUSED(tld_stats); + MI_UNUSED(tld_stats); mi_stats_t* stats = &_mi_stats_main; return mi_os_commitx(addr, size, true, false /* liberal */, is_zero, stats); } bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* tld_stats) { - UNUSED(tld_stats); + MI_UNUSED(tld_stats); mi_stats_t* stats = &_mi_stats_main; bool is_zero; return mi_os_commitx(addr, size, false, true /* conservative */, &is_zero, stats); @@ -958,7 +958,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) // pages and reduce swapping while keeping the memory committed. // We page align to a conservative area inside the range to reset. bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats) { - UNUSED(tld_stats); + MI_UNUSED(tld_stats); mi_stats_t* stats = &_mi_stats_main; if (mi_option_is_enabled(mi_option_reset_decommits)) { return _mi_os_decommit(addr, size, stats); @@ -969,7 +969,7 @@ bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats) { } bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) { - UNUSED(tld_stats); + MI_UNUSED(tld_stats); mi_stats_t* stats = &_mi_stats_main; if (mi_option_is_enabled(mi_option_reset_decommits)) { return mi_os_commit_unreset(addr, size, is_zero, stats); // re-commit it (conservatively!) @@ -1045,12 +1045,12 @@ bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize, mi_stats_t* stats) { Support for allocating huge OS pages (1Gib) that are reserved up-front and possibly associated with a specific NUMA node. (use `numa_node>=0`) -----------------------------------------------------------------------------*/ -#define MI_HUGE_OS_PAGE_SIZE (GiB) +#define MI_HUGE_OS_PAGE_SIZE (MI_GiB) #if defined(_WIN32) && (MI_INTPTR_SIZE >= 8) static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) { - mi_assert_internal(size%GiB == 0); + mi_assert_internal(size%MI_GiB == 0); mi_assert_internal(addr != NULL); const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE; @@ -1091,7 +1091,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1); } #else - UNUSED(numa_node); + MI_UNUSED(numa_node); #endif // otherwise use regular virtual alloc on older windows return VirtualAlloc(addr, size, flags, PAGE_READWRITE); @@ -1108,12 +1108,12 @@ static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, cons } #else static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) { - UNUSED(start); UNUSED(len); UNUSED(mode); UNUSED(nmask); UNUSED(maxnode); UNUSED(flags); + MI_UNUSED(start); MI_UNUSED(len); MI_UNUSED(mode); MI_UNUSED(nmask); MI_UNUSED(maxnode); MI_UNUSED(flags); return 0; } #endif static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) { - mi_assert_internal(size%GiB == 0); + mi_assert_internal(size%MI_GiB == 0); bool is_large = true; void* p = mi_unix_mmap(addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); if (p == NULL) return NULL; @@ -1131,7 +1131,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) } #else static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) { - UNUSED(addr); UNUSED(size); UNUSED(numa_node); + MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(numa_node); return NULL; } #endif @@ -1167,7 +1167,7 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { } #else static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { - UNUSED(pages); + MI_UNUSED(pages); if (total_size != NULL) *total_size = 0; return NULL; } @@ -1368,7 +1368,7 @@ size_t _mi_os_numa_node_count_get(void) { } int _mi_os_numa_node_get(mi_os_tld_t* tld) { - UNUSED(tld); + MI_UNUSED(tld); size_t numa_count = _mi_os_numa_node_count(); if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0 // never more than the node count and >= 0 diff --git a/src/page.c b/src/page.c index 96d581cf..f33ca033 100644 --- a/src/page.c +++ b/src/page.c @@ -30,7 +30,7 @@ terms of the MIT license. A copy of the license can be found in the file // Index a block in a page static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_start, size_t block_size, size_t i) { - UNUSED(page); + MI_UNUSED(page); mi_assert_internal(page != NULL); mi_assert_internal(i <= page->reserved); return (mi_block_t*)((uint8_t*)page_start + (i * block_size)); @@ -472,7 +472,7 @@ void _mi_heap_collect_retired(mi_heap_t* heap, bool force) { #define MI_MIN_SLICES (2) static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* const page, const size_t bsize, const size_t extend, mi_stats_t* const stats) { - UNUSED(stats); + MI_UNUSED(stats); #if (MI_SECURE<=2) mi_assert_internal(page->free == NULL); mi_assert_internal(page->local_free == NULL); @@ -530,7 +530,7 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t bsize, const size_t extend, mi_stats_t* const stats) { - UNUSED(stats); + MI_UNUSED(stats); #if (MI_SECURE <= 2) mi_assert_internal(page->free == NULL); mi_assert_internal(page->local_free == NULL); @@ -571,7 +571,7 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co // allocations but this did not speed up any benchmark (due to an // extra test in malloc? or cache effects?) static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) { - UNUSED(tld); + MI_UNUSED(tld); mi_assert_expensive(mi_page_is_valid_init(page)); #if (MI_SECURE<=2) mi_assert(page->free == NULL); diff --git a/src/region.c b/src/region.c new file mode 100644 index 00000000..d99b74af --- /dev/null +++ b/src/region.c @@ -0,0 +1,505 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2019-2020, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +/* ---------------------------------------------------------------------------- +This implements a layer between the raw OS memory (VirtualAlloc/mmap/sbrk/..) +and the segment and huge object allocation by mimalloc. There may be multiple +implementations of this (one could be the identity going directly to the OS, +another could be a simple cache etc), but the current one uses large "regions". +In contrast to the rest of mimalloc, the "regions" are shared between threads and +need to be accessed using atomic operations. +We need this memory layer between the raw OS calls because of: +1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order + to reuse memory effectively. +2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of + an OS allocation/free is still (much) too expensive relative to the accesses + in that object :-( (`malloc-large` tests this). This means we need a cheaper + way to reuse memory. +3. This layer allows for NUMA aware allocation. + +Possible issues: +- (2) can potentially be addressed too with a small cache per thread which is much + simpler. Generally though that requires shrinking of huge pages, and may overuse + memory per thread. (and is not compatible with `sbrk`). +- Since the current regions are per-process, we need atomic operations to + claim blocks which may be contended +- In the worst case, we need to search the whole region map (16KiB for 256GiB) + linearly. At what point will direct OS calls be faster? Is there a way to + do this better without adding too much complexity? +-----------------------------------------------------------------------------*/ +#include "mimalloc.h" +#include "mimalloc-internal.h" +#include "mimalloc-atomic.h" + +#include // memset + +#include "bitmap.h" + +// Internal raw OS interface +size_t _mi_os_large_page_size(void); +bool _mi_os_protect(void* addr, size_t size); +bool _mi_os_unprotect(void* addr, size_t size); +bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); +bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats); +bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); +bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); + +// arena.c +void _mi_arena_free(void* p, size_t size, size_t memid, bool all_committed, mi_stats_t* stats); +void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld); +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld); + + + +// Constants +#if (MI_INTPTR_SIZE==8) +#define MI_HEAP_REGION_MAX_SIZE (256 * MI_GiB) // 64KiB for the region map +#elif (MI_INTPTR_SIZE==4) +#define MI_HEAP_REGION_MAX_SIZE (3 * MI_GiB) // ~ KiB for the region map +#else +#error "define the maximum heap space allowed for regions on this platform" +#endif + +#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE + +#define MI_REGION_MAX_BLOCKS MI_BITMAP_FIELD_BITS +#define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS) // 256MiB (64MiB on 32 bits) +#define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE) // 1024 (48 on 32 bits) +#define MI_REGION_MAX_OBJ_BLOCKS (MI_REGION_MAX_BLOCKS/4) // 64MiB +#define MI_REGION_MAX_OBJ_SIZE (MI_REGION_MAX_OBJ_BLOCKS*MI_SEGMENT_SIZE) + +// Region info +typedef union mi_region_info_u { + uintptr_t value; + struct { + bool valid; // initialized? + bool is_large:1; // allocated in fixed large/huge OS pages + bool is_pinned:1; // pinned memory cannot be decommitted + short numa_node; // the associated NUMA node (where -1 means no associated node) + } x; +} mi_region_info_t; + + +// A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with +// a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block. +typedef struct mem_region_s { + _Atomic(uintptr_t) info; // mi_region_info_t.value + _Atomic(void*) start; // start of the memory area + mi_bitmap_field_t in_use; // bit per in-use block + mi_bitmap_field_t dirty; // track if non-zero per block + mi_bitmap_field_t commit; // track if committed per block + mi_bitmap_field_t reset; // track if reset per block + _Atomic(uintptr_t) arena_memid; // if allocated from a (huge page) arena + uintptr_t padding; // round to 8 fields +} mem_region_t; + +// The region map +static mem_region_t regions[MI_REGION_MAX]; + +// Allocated regions +static _Atomic(uintptr_t) regions_count; // = 0; + + +/* ---------------------------------------------------------------------------- +Utility functions +-----------------------------------------------------------------------------*/ + +// Blocks (of 4MiB) needed for the given size. +static size_t mi_region_block_count(size_t size) { + return _mi_divide_up(size, MI_SEGMENT_SIZE); +} + +/* +// Return a rounded commit/reset size such that we don't fragment large OS pages into small ones. +static size_t mi_good_commit_size(size_t size) { + if (size > (SIZE_MAX - _mi_os_large_page_size())) return size; + return _mi_align_up(size, _mi_os_large_page_size()); +} +*/ + +// Return if a pointer points into a region reserved by us. +bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { + if (p==NULL) return false; + size_t count = mi_atomic_load_relaxed(®ions_count); + for (size_t i = 0; i < count; i++) { + uint8_t* start = (uint8_t*)mi_atomic_load_ptr_relaxed(uint8_t, ®ions[i].start); + if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true; + } + return false; +} + + +static void* mi_region_blocks_start(const mem_region_t* region, mi_bitmap_index_t bit_idx) { + uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t, &((mem_region_t*)region)->start); + mi_assert_internal(start != NULL); + return (start + (bit_idx * MI_SEGMENT_SIZE)); +} + +static size_t mi_memid_create(mem_region_t* region, mi_bitmap_index_t bit_idx) { + mi_assert_internal(bit_idx < MI_BITMAP_FIELD_BITS); + size_t idx = region - regions; + mi_assert_internal(®ions[idx] == region); + return (idx*MI_BITMAP_FIELD_BITS + bit_idx)<<1; +} + +static size_t mi_memid_create_from_arena(size_t arena_memid) { + return (arena_memid << 1) | 1; +} + + +static bool mi_memid_is_arena(size_t id, mem_region_t** region, mi_bitmap_index_t* bit_idx, size_t* arena_memid) { + if ((id&1)==1) { + if (arena_memid != NULL) *arena_memid = (id>>1); + return true; + } + else { + size_t idx = (id >> 1) / MI_BITMAP_FIELD_BITS; + *bit_idx = (mi_bitmap_index_t)(id>>1) % MI_BITMAP_FIELD_BITS; + *region = ®ions[idx]; + return false; + } +} + + +/* ---------------------------------------------------------------------------- + Allocate a region is allocated from the OS (or an arena) +-----------------------------------------------------------------------------*/ + +static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) +{ + // not out of regions yet? + if (mi_atomic_load_relaxed(®ions_count) >= MI_REGION_MAX - 1) return false; + + // try to allocate a fresh region from the OS + bool region_commit = (commit && mi_option_is_enabled(mi_option_eager_region_commit)); + bool region_large = (commit && allow_large); + bool is_zero = false; + bool is_pinned = false; + size_t arena_memid = 0; + void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, ®ion_commit, ®ion_large, &is_pinned, &is_zero, &arena_memid, tld); + if (start == NULL) return false; + mi_assert_internal(!(region_large && !allow_large)); + mi_assert_internal(!region_large || region_commit); + + // claim a fresh slot + const uintptr_t idx = mi_atomic_increment_acq_rel(®ions_count); + if (idx >= MI_REGION_MAX) { + mi_atomic_decrement_acq_rel(®ions_count); + _mi_arena_free(start, MI_REGION_SIZE, arena_memid, region_commit, tld->stats); + _mi_warning_message("maximum regions used: %zu GiB (perhaps recompile with a larger setting for MI_HEAP_REGION_MAX_SIZE)", _mi_divide_up(MI_HEAP_REGION_MAX_SIZE, MI_GiB)); + return false; + } + + // allocated, initialize and claim the initial blocks + mem_region_t* r = ®ions[idx]; + r->arena_memid = arena_memid; + mi_atomic_store_release(&r->in_use, (uintptr_t)0); + mi_atomic_store_release(&r->dirty, (is_zero ? 0 : MI_BITMAP_FIELD_FULL)); + mi_atomic_store_release(&r->commit, (region_commit ? MI_BITMAP_FIELD_FULL : 0)); + mi_atomic_store_release(&r->reset, (uintptr_t)0); + *bit_idx = 0; + _mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL); + mi_atomic_store_ptr_release(void,&r->start, start); + + // and share it + mi_region_info_t info; + info.value = 0; // initialize the full union to zero + info.x.valid = true; + info.x.is_large = region_large; + info.x.is_pinned = is_pinned; + info.x.numa_node = (short)_mi_os_numa_node(tld); + mi_atomic_store_release(&r->info, info.value); // now make it available to others + *region = r; + return true; +} + +/* ---------------------------------------------------------------------------- + Try to claim blocks in suitable regions +-----------------------------------------------------------------------------*/ + +static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool allow_large ) { + // initialized at all? + mi_region_info_t info; + info.value = mi_atomic_load_relaxed(&((mem_region_t*)region)->info); + if (info.value==0) return false; + + // numa correct + if (numa_node >= 0) { // use negative numa node to always succeed + int rnode = info.x.numa_node; + if (rnode >= 0 && rnode != numa_node) return false; + } + + // check allow-large + if (!allow_large && info.x.is_large) return false; + + return true; +} + + +static bool mi_region_try_claim(int numa_node, size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) +{ + // try all regions for a free slot + const size_t count = mi_atomic_load_relaxed(®ions_count); // monotonic, so ok to be relaxed + size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? Starting at 0 seems to increase latency though + for (size_t visited = 0; visited < count; visited++, idx++) { + if (idx >= count) idx = 0; // wrap around + mem_region_t* r = ®ions[idx]; + // if this region suits our demand (numa node matches, large OS page matches) + if (mi_region_is_suitable(r, numa_node, allow_large)) { + // then try to atomically claim a segment(s) in this region + if (_mi_bitmap_try_find_claim_field(&r->in_use, 0, blocks, bit_idx)) { + tld->region_idx = idx; // remember the last found position + *region = r; + return true; + } + } + } + return false; +} + + +static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld) +{ + mi_assert_internal(blocks <= MI_BITMAP_FIELD_BITS); + mem_region_t* region; + mi_bitmap_index_t bit_idx; + const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld)); + // try to claim in existing regions + if (!mi_region_try_claim(numa_node, blocks, *large, ®ion, &bit_idx, tld)) { + // otherwise try to allocate a fresh region and claim in there + if (!mi_region_try_alloc_os(blocks, *commit, *large, ®ion, &bit_idx, tld)) { + // out of regions or memory + return NULL; + } + } + + // ------------------------------------------------ + // found a region and claimed `blocks` at `bit_idx`, initialize them now + mi_assert_internal(region != NULL); + mi_assert_internal(_mi_bitmap_is_claimed(®ion->in_use, 1, blocks, bit_idx)); + + mi_region_info_t info; + info.value = mi_atomic_load_acquire(®ion->info); + uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t,®ion->start); + mi_assert_internal(!(info.x.is_large && !*large)); + mi_assert_internal(start != NULL); + + *is_zero = _mi_bitmap_claim(®ion->dirty, 1, blocks, bit_idx, NULL); + *large = info.x.is_large; + *is_pinned = info.x.is_pinned; + *memid = mi_memid_create(region, bit_idx); + void* p = start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE); + + // commit + if (*commit) { + // ensure commit + bool any_uncommitted; + _mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, &any_uncommitted); + if (any_uncommitted) { + mi_assert_internal(!info.x.is_large && !info.x.is_pinned); + bool commit_zero = false; + if (!_mi_mem_commit(p, blocks * MI_SEGMENT_SIZE, &commit_zero, tld)) { + // failed to commit! unclaim and return + mi_bitmap_unclaim(®ion->in_use, 1, blocks, bit_idx); + return NULL; + } + if (commit_zero) *is_zero = true; + } + } + else { + // no need to commit, but check if already fully committed + *commit = _mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx); + } + mi_assert_internal(!*commit || _mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx)); + + // unreset reset blocks + if (_mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)) { + // some blocks are still reset + mi_assert_internal(!info.x.is_large && !info.x.is_pinned); + mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit || mi_option_get(mi_option_eager_commit_delay) > 0); + mi_bitmap_unclaim(®ion->reset, 1, blocks, bit_idx); + if (*commit || !mi_option_is_enabled(mi_option_reset_decommits)) { // only if needed + bool reset_zero = false; + _mi_mem_unreset(p, blocks * MI_SEGMENT_SIZE, &reset_zero, tld); + if (reset_zero) *is_zero = true; + } + } + mi_assert_internal(!_mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)); + + #if (MI_DEBUG>=2) + if (*commit) { ((uint8_t*)p)[0] = 0; } + #endif + + // and return the allocation + mi_assert_internal(p != NULL); + return p; +} + + +/* ---------------------------------------------------------------------------- + Allocation +-----------------------------------------------------------------------------*/ + +// Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`. +// (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`) +void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld) +{ + mi_assert_internal(memid != NULL && tld != NULL); + mi_assert_internal(size > 0); + *memid = 0; + *is_zero = false; + *is_pinned = false; + bool default_large = false; + if (large==NULL) large = &default_large; // ensure `large != NULL` + if (size == 0) return NULL; + size = _mi_align_up(size, _mi_os_page_size()); + + // allocate from regions if possible + void* p = NULL; + size_t arena_memid; + const size_t blocks = mi_region_block_count(size); + if (blocks <= MI_REGION_MAX_OBJ_BLOCKS && alignment <= MI_SEGMENT_ALIGN) { + p = mi_region_try_alloc(blocks, commit, large, is_pinned, is_zero, memid, tld); + if (p == NULL) { + _mi_warning_message("unable to allocate from region: size %zu\n", size); + } + } + if (p == NULL) { + // and otherwise fall back to the OS + p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_pinned, is_zero, &arena_memid, tld); + *memid = mi_memid_create_from_arena(arena_memid); + } + + if (p != NULL) { + mi_assert_internal((uintptr_t)p % alignment == 0); +#if (MI_DEBUG>=2) + if (*commit) { ((uint8_t*)p)[0] = 0; } // ensure the memory is committed +#endif + } + return p; +} + + + +/* ---------------------------------------------------------------------------- +Free +-----------------------------------------------------------------------------*/ + +// Free previously allocated memory with a given id. +void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_reset, mi_os_tld_t* tld) { + mi_assert_internal(size > 0 && tld != NULL); + if (p==NULL) return; + if (size==0) return; + size = _mi_align_up(size, _mi_os_page_size()); + + size_t arena_memid = 0; + mi_bitmap_index_t bit_idx; + mem_region_t* region; + if (mi_memid_is_arena(id,®ion,&bit_idx,&arena_memid)) { + // was a direct arena allocation, pass through + _mi_arena_free(p, size, arena_memid, full_commit, tld->stats); + } + else { + // allocated in a region + mi_assert_internal(size <= MI_REGION_MAX_OBJ_SIZE); if (size > MI_REGION_MAX_OBJ_SIZE) return; + const size_t blocks = mi_region_block_count(size); + mi_assert_internal(blocks + bit_idx <= MI_BITMAP_FIELD_BITS); + mi_region_info_t info; + info.value = mi_atomic_load_acquire(®ion->info); + mi_assert_internal(info.value != 0); + void* blocks_start = mi_region_blocks_start(region, bit_idx); + mi_assert_internal(blocks_start == p); // not a pointer in our area? + mi_assert_internal(bit_idx + blocks <= MI_BITMAP_FIELD_BITS); + if (blocks_start != p || bit_idx + blocks > MI_BITMAP_FIELD_BITS) return; // or `abort`? + + // committed? + if (full_commit && (size % MI_SEGMENT_SIZE) == 0) { + _mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, NULL); + } + + if (any_reset) { + // set the is_reset bits if any pages were reset + _mi_bitmap_claim(®ion->reset, 1, blocks, bit_idx, NULL); + } + + // reset the blocks to reduce the working set. + if (!info.x.is_large && !info.x.is_pinned && mi_option_is_enabled(mi_option_segment_reset) + && (mi_option_is_enabled(mi_option_eager_commit) || + mi_option_is_enabled(mi_option_reset_decommits))) // cannot reset halfway committed segments, use only `option_page_reset` instead + { + bool any_unreset; + _mi_bitmap_claim(®ion->reset, 1, blocks, bit_idx, &any_unreset); + if (any_unreset) { + _mi_abandoned_await_readers(); // ensure no more pending write (in case reset = decommit) + _mi_mem_reset(p, blocks * MI_SEGMENT_SIZE, tld); + } + } + + // and unclaim + bool all_unclaimed = mi_bitmap_unclaim(®ion->in_use, 1, blocks, bit_idx); + mi_assert_internal(all_unclaimed); MI_UNUSED(all_unclaimed); + } +} + + +/* ---------------------------------------------------------------------------- + collection +-----------------------------------------------------------------------------*/ +void _mi_mem_collect(mi_os_tld_t* tld) { + // free every region that has no segments in use. + uintptr_t rcount = mi_atomic_load_relaxed(®ions_count); + for (size_t i = 0; i < rcount; i++) { + mem_region_t* region = ®ions[i]; + if (mi_atomic_load_relaxed(®ion->info) != 0) { + // if no segments used, try to claim the whole region + uintptr_t m = mi_atomic_load_relaxed(®ion->in_use); + while (m == 0 && !mi_atomic_cas_weak_release(®ion->in_use, &m, MI_BITMAP_FIELD_FULL)) { /* nothing */ }; + if (m == 0) { + // on success, free the whole region + uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t,®ions[i].start); + size_t arena_memid = mi_atomic_load_relaxed(®ions[i].arena_memid); + uintptr_t commit = mi_atomic_load_relaxed(®ions[i].commit); + memset((void*)®ions[i], 0, sizeof(mem_region_t)); // cast to void* to avoid atomic warning + // and release the whole region + mi_atomic_store_release(®ion->info, (uintptr_t)0); + if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { + _mi_abandoned_await_readers(); // ensure no pending reads + _mi_arena_free(start, MI_REGION_SIZE, arena_memid, (~commit == 0), tld->stats); + } + } + } + } +} + + +/* ---------------------------------------------------------------------------- + Other +-----------------------------------------------------------------------------*/ + +bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) { + return _mi_os_reset(p, size, tld->stats); +} + +bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { + return _mi_os_unreset(p, size, is_zero, tld->stats); +} + +bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { + return _mi_os_commit(p, size, is_zero, tld->stats); +} + +bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld) { + return _mi_os_decommit(p, size, tld->stats); +} + +bool _mi_mem_protect(void* p, size_t size) { + return _mi_os_protect(p, size); +} + +bool _mi_mem_unprotect(void* p, size_t size) { + return _mi_os_unprotect(p, size); +} diff --git a/src/segment-cache.c b/src/segment-cache.c index e78543be..fc34e190 100644 --- a/src/segment-cache.c +++ b/src/segment-cache.c @@ -118,7 +118,7 @@ static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, vo static mi_decl_noinline void mi_segment_cache_purge(mi_os_tld_t* tld) { - UNUSED(tld); + MI_UNUSED(tld); mi_msecs_t now = _mi_clock_now(); size_t idx = (_mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX); // random start size_t purged = 0;