diff --git a/CMakeLists.txt b/CMakeLists.txt index de2689a3..2bcd1ef7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,7 +50,6 @@ set(mi_sources src/page.c src/random.c src/segment.c - src/segment-cache.c src/segment-map.c src/stats.c src/prim/prim.c) diff --git a/ide/vs2022/mimalloc-override.vcxproj b/ide/vs2022/mimalloc-override.vcxproj index 52ed5282..e2c7f71d 100644 --- a/ide/vs2022/mimalloc-override.vcxproj +++ b/ide/vs2022/mimalloc-override.vcxproj @@ -257,7 +257,6 @@ - diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj index 33a719c1..2916483d 100644 --- a/ide/vs2022/mimalloc.vcxproj +++ b/ide/vs2022/mimalloc.vcxproj @@ -235,7 +235,6 @@ - diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index ab8c0d28..f4a08a09 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -120,16 +120,9 @@ void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_o void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld); void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld); bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id); -bool _mi_arena_memid_is_os_allocated(mi_memid_t memid); void _mi_arena_collect(bool free_arenas, bool force_decommit, mi_stats_t* stats); bool _mi_arena_contains(const void* p); -// "segment-cache.c" -void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* purge_mask, bool large_allowed, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld); -bool _mi_segment_cache_push(void* start, size_t size, mi_memid_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* purge_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld); -void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld); -void _mi_segment_cache_free_all(mi_os_tld_t* tld); - // "segment-map.c" void _mi_segment_map_allocated_at(const mi_segment_t* segment); void _mi_segment_map_freed_at(const mi_segment_t* segment); diff --git a/src/heap.c b/src/heap.c index 9238812b..14c3d66c 100644 --- a/src/heap.c +++ b/src/heap.c @@ -163,10 +163,6 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) _mi_segment_thread_collect(&heap->tld->segments); } - // decommit in global segment caches - // note: forced decommit can be quite expensive if many threads are created/destroyed so we do not force on abandonment - _mi_segment_cache_collect( collect == MI_FORCE, &heap->tld->os); - // collect regions on program-exit (or shared library unload) if (force && _mi_is_main_thread() && mi_heap_is_backing(heap)) { _mi_arena_collect(false /* destroy arenas */, true /* force purge */, &heap->tld->stats); diff --git a/src/init.c b/src/init.c index 5fb1ae43..17dc2faf 100644 --- a/src/init.c +++ b/src/init.c @@ -632,7 +632,6 @@ static void mi_cdecl mi_process_done(void) { // or C-runtime termination code. if (mi_option_is_enabled(mi_option_destroy_on_exit)) { _mi_heap_destroy_all(); // forcefully release all memory held by all heaps (of this thread only!) - _mi_segment_cache_free_all(&_mi_heap_main_get()->tld->os); // release all cached segments _mi_arena_collect(true /* destroy (owned) arenas */, true /* purge the rest */, &_mi_heap_main_get()->tld->stats); } diff --git a/src/segment-cache.c b/src/segment-cache.c deleted file mode 100644 index 2aee27c6..00000000 --- a/src/segment-cache.c +++ /dev/null @@ -1,277 +0,0 @@ -/* ---------------------------------------------------------------------------- -Copyright (c) 2020, Microsoft Research, Daan Leijen -This is free software; you can redistribute it and/or modify it under the -terms of the MIT license. A copy of the license can be found in the file -"LICENSE" at the root of this distribution. ------------------------------------------------------------------------------*/ - -/* ---------------------------------------------------------------------------- - Implements a cache of segments to avoid expensive OS calls and to reuse - the commit_mask to optimize the commit/decommit calls. - The full memory map of all segments is also implemented here. ------------------------------------------------------------------------------*/ -#include "mimalloc.h" -#include "mimalloc/internal.h" -#include "mimalloc/atomic.h" - -#include "./bitmap.h" // atomic bitmap - -// #define MI_CACHE_DISABLE 1 // define to completely disable the segment cache - -#define MI_CACHE_FIELDS (16) -#define MI_CACHE_MAX (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS) // 1024 on 64-bit - -#define BITS_SET() MI_ATOMIC_VAR_INIT(UINTPTR_MAX) -#define MI_CACHE_BITS_SET MI_INIT16(BITS_SET) // note: update if MI_CACHE_FIELDS changes - -typedef struct mi_cache_slot_s { - void* p; - mi_memid_t memid; - bool is_pinned; - mi_commit_mask_t commit_mask; - mi_commit_mask_t purge_mask; - _Atomic(mi_msecs_t) expire; -} mi_cache_slot_t; - -static mi_decl_cache_align mi_cache_slot_t cache[MI_CACHE_MAX]; // = 0 - -static mi_decl_cache_align mi_bitmap_field_t cache_unavailable[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET }; // zero bit = available! -static mi_decl_cache_align mi_bitmap_field_t cache_unavailable_large[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET }; -static mi_decl_cache_align mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS]; // zero bit = free - -static bool mi_cdecl mi_segment_cache_is_suitable(mi_bitmap_index_t bitidx, void* arg) { - mi_arena_id_t req_arena_id = *((mi_arena_id_t*)arg); - mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; - return _mi_arena_memid_is_suitable(slot->memid, req_arena_id); -} - -mi_decl_noinline static void* mi_segment_cache_pop_ex( - bool all_suitable, - size_t size, mi_commit_mask_t* commit_mask, - mi_commit_mask_t* purge_mask, bool large_allowed, - bool* large, bool* is_pinned, bool* is_zero, - mi_arena_id_t _req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld) -{ -#ifdef MI_CACHE_DISABLE - return NULL; -#else - - // only segment blocks - if (size != MI_SEGMENT_SIZE) return NULL; - - // numa node determines start field - const int numa_node = _mi_os_numa_node(tld); - size_t start_field = 0; - if (numa_node > 0) { - start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node; - if (start_field >= MI_CACHE_FIELDS) start_field = 0; - } - - // find an available slot and make it unavailable - mi_bitmap_index_t bitidx = 0; - bool claimed = false; - mi_arena_id_t req_arena_id = _req_arena_id; - mi_bitmap_pred_fun_t pred_fun = (all_suitable ? NULL : &mi_segment_cache_is_suitable); // cannot pass NULL as the arena may be exclusive itself; todo: do not put exclusive arenas in the cache? - - if (large_allowed) { // large allowed? - claimed = _mi_bitmap_try_find_from_claim_pred(cache_unavailable_large, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx); - if (claimed) *large = true; - } - if (!claimed) { - claimed = _mi_bitmap_try_find_from_claim_pred (cache_unavailable, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx); - if (claimed) *large = false; - } - - if (!claimed) return NULL; - - // no longer available but still in-use - mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx)); - mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable_large, MI_CACHE_FIELDS, 1, bitidx)); - mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx)); - - // found a slot - mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; - void* p = slot->p; - *memid = slot->memid; - *is_pinned = slot->is_pinned; - *is_zero = false; - *commit_mask = slot->commit_mask; - *purge_mask = slot->purge_mask; - slot->p = NULL; - mi_atomic_storei64_release(&slot->expire,(mi_msecs_t)0); - - // mark the slot as free again - _mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx); - return p; -#endif -} - - -mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* purge_mask, bool large_allowed, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t _req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld) -{ - return mi_segment_cache_pop_ex(false, size, commit_mask, purge_mask, large_allowed, large, is_pinned, is_zero, _req_arena_id, memid, tld); -} - -static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t total, mi_stats_t* stats) -{ - if (mi_commit_mask_is_empty(cmask)) { - // nothing - } - else if (mi_commit_mask_is_full(cmask)) { - // decommit the whole in one call - _mi_os_decommit(p, total, stats); - } - else { - // decommit parts - mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0); - size_t part = total/MI_COMMIT_MASK_BITS; - size_t idx; - size_t count; - mi_commit_mask_foreach(cmask, idx, count) { - void* start = (uint8_t*)p + (idx*part); - size_t size = count*part; - _mi_os_decommit(start, size, stats); - } - mi_commit_mask_foreach_end() - } - mi_commit_mask_create_empty(cmask); -} - -#define MI_MAX_PURGE_PER_PUSH (4) - -static mi_decl_noinline void mi_segment_cache_purge(bool visit_all, bool force, mi_os_tld_t* tld) -{ - MI_UNUSED(tld); - if (!mi_option_is_enabled(mi_option_allow_purge)) return; - mi_msecs_t now = _mi_clock_now(); - size_t purged = 0; - const size_t max_visits = (visit_all ? MI_CACHE_MAX /* visit all */ : MI_CACHE_FIELDS /* probe at most N (=16) slots */); - size_t idx = (visit_all ? 0 : _mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX /* random start */ ); - for (size_t visited = 0; visited < max_visits; visited++,idx++) { // visit N slots - if (idx >= MI_CACHE_MAX) idx = 0; // wrap - mi_cache_slot_t* slot = &cache[idx]; - mi_msecs_t expire = mi_atomic_loadi64_relaxed(&slot->expire); - if (expire != 0 && (force || now >= expire)) { // racy read - // seems expired, first claim it from available - purged++; - mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx); - if (_mi_bitmap_claim(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx, NULL)) { // no need to check large as those cannot be decommitted anyways - // it was available, we claimed it (and made it unavailable) - mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx)); - mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable_large, MI_CACHE_FIELDS, 1, bitidx)); - // we can now access it safely - expire = mi_atomic_loadi64_acquire(&slot->expire); - if (expire != 0 && (force || now >= expire)) { // safe read - mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx)); - // still expired, decommit it - mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0); - mi_assert_internal(!mi_commit_mask_is_empty(&slot->commit_mask)); - _mi_abandoned_await_readers(); // wait until safe to decommit - // decommit committed parts - // TODO: instead of decommit, we could also free to the OS? - mi_commit_mask_decommit(&slot->commit_mask, slot->p, MI_SEGMENT_SIZE, tld->stats); - mi_commit_mask_create_empty(&slot->purge_mask); - } - _mi_bitmap_unclaim(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop - } - if (!visit_all && purged > MI_MAX_PURGE_PER_PUSH) break; // bound to no more than N purge tries per push - } - } -} - -void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld) { - if (force) { - // called on `mi_collect(true)` but not on thread termination - _mi_segment_cache_free_all(tld); - } - else { - mi_segment_cache_purge(true /* visit all */, false /* don't force unexpired */, tld); - } -} - -void _mi_segment_cache_free_all(mi_os_tld_t* tld) { - mi_commit_mask_t commit_mask; - mi_commit_mask_t purge_mask; - bool is_pinned; - bool is_zero; - bool is_large; - mi_memid_t memid; - const size_t size = MI_SEGMENT_SIZE; - void* p; - do { - // keep popping and freeing the memory - p = mi_segment_cache_pop_ex(true /* all */, size, &commit_mask, &purge_mask, - true /* allow large */, &is_large, &is_pinned, &is_zero, _mi_arena_id_none(), &memid, tld); - if (p != NULL) { - size_t csize = _mi_commit_mask_committed_size(&commit_mask, size); - if (csize > 0 && !is_pinned) { _mi_stat_decrease(&_mi_stats_main.committed, csize); } - _mi_arena_free(p, size, MI_SEGMENT_ALIGN, 0, memid, is_pinned /* pretend not committed to not double count decommits */, tld->stats); - } - } while (p != NULL); -} - -mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, mi_memid_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* purge_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld) -{ -#ifdef MI_CACHE_DISABLE - return false; -#else - - // purge expired entries - mi_segment_cache_purge(false /* limit purges to a constant N */, false /* don't force unexpired */, tld); - - // only cache normal segment blocks - if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false; - - // Also do not cache arena allocated segments that cannot be decommitted. (as arena allocation is fast) - // This is a common case with reserved huge OS pages. - // - // (note: we could also allow segments that are already fully decommitted but that never happens - // as the first slice is always committed (for the segment metadata)) - if (!_mi_arena_memid_is_os_allocated(memid) && is_pinned) return false; - - // numa node determines start field - int numa_node = _mi_os_numa_node(NULL); - size_t start_field = 0; - if (numa_node > 0) { - start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count()) * numa_node; - if (start_field >= MI_CACHE_FIELDS) start_field = 0; - } - - // find an available slot - mi_bitmap_index_t bitidx; - bool claimed = _mi_bitmap_try_find_from_claim(cache_inuse, MI_CACHE_FIELDS, start_field, 1, &bitidx); - if (!claimed) return false; - - mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx)); - mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable_large, MI_CACHE_FIELDS, 1, bitidx)); -#if MI_DEBUG>1 - if (is_pinned || is_large) { - mi_assert_internal(mi_commit_mask_is_full(commit_mask)); - } -#endif - - // set the slot - mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; - slot->p = start; - slot->memid = memid; - slot->is_pinned = is_pinned; - mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0); - slot->commit_mask = *commit_mask; - slot->purge_mask = *purge_mask; - if (!mi_commit_mask_is_empty(commit_mask) && !is_large && !is_pinned && mi_option_is_enabled(mi_option_allow_purge)) { - long delay = mi_option_get(mi_option_purge_delay) * mi_option_get(mi_option_arena_purge_mult); - if (delay == 0) { - _mi_abandoned_await_readers(); // wait until safe to decommit - mi_commit_mask_decommit(&slot->commit_mask, start, MI_SEGMENT_SIZE, tld->stats); - mi_commit_mask_create_empty(&slot->purge_mask); - } - else { - mi_atomic_storei64_release(&slot->expire, _mi_clock_now() + delay); - } - } - - // make it available - _mi_bitmap_unclaim((is_large ? cache_unavailable_large : cache_unavailable), MI_CACHE_FIELDS, 1, bitidx); - return true; -#endif -} diff --git a/src/segment.c b/src/segment.c index af4ed95c..7d0d2c28 100644 --- a/src/segment.c +++ b/src/segment.c @@ -11,7 +11,6 @@ terms of the MIT license. A copy of the license can be found in the file #include // memset #include -#define MI_USE_SEGMENT_CACHE 0 #define MI_PAGE_HUGE_ALIGN (256*1024) static void mi_segment_try_purge(mi_segment_t* segment, bool force, mi_stats_t* stats); @@ -393,28 +392,11 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { // purge delayed decommits now? (no, leave it to the arena) // mi_segment_try_purge(segment,true,tld->stats); - // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats); const size_t size = mi_segment_size(segment); -#if MI_USE_SEGMENT_CACHE - if (size != MI_SEGMENT_SIZE || segment->mem_align_offset != 0 || segment->kind == MI_SEGMENT_HUGE // only push regular segments on the cache - || !_mi_segment_cache_push(segment, size, segment->memid, &segment->commit_mask, &segment->purge_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) -#endif - { - const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size); - /* - // if not all committed, an arena may decommit the whole area, but that double counts - // the already decommitted parts; adjust for that in the stats. - if (!mi_commit_mask_is_full(&segment->commit_mask)) { - const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size); - mi_assert_internal(size > csize); - if (size > csize) { - _mi_stat_increase(&_mi_stats_main.committed, size - csize); - } - } - */ - _mi_abandoned_await_readers(); // wait until safe to free - _mi_arena_free(segment, mi_segment_size(segment), segment->mem_alignment, segment->mem_align_offset, segment->memid, csize, tld->stats); - } + const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size); + + _mi_abandoned_await_readers(); // wait until safe to free + _mi_arena_free(segment, mi_segment_size(segment), segment->mem_alignment, segment->mem_align_offset, segment->memid, csize, tld->stats); } // called by threads that are terminating @@ -819,6 +801,7 @@ static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment bool* is_zero, bool* pcommit, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { + MI_UNUSED(ppurge_mask); mi_memid_t memid; bool mem_large = (!eager_delayed && (MI_SECURE == 0)); // only allow large OS pages once we are no longer lazy bool is_pinned = false; @@ -837,15 +820,6 @@ static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment } const size_t segment_size = (*psegment_slices) * MI_SEGMENT_SLICE_SIZE; mi_segment_t* segment = NULL; - - #if MI_USE_SEGMENT_CACHE - // get from cache? - if (page_alignment == 0) { - segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, pcommit_mask, ppurge_mask, mem_large, &mem_large, &is_pinned, is_zero, req_arena_id, &memid, os_tld); - } - #else - MI_UNUSED(ppurge_mask); - #endif // get from OS if (segment==NULL) { diff --git a/src/static.c b/src/static.c index 831e9ecd..bc05dd72 100644 --- a/src/static.c +++ b/src/static.c @@ -32,7 +32,6 @@ terms of the MIT license. A copy of the license can be found in the file #include "page.c" // includes page-queue.c #include "random.c" #include "segment.c" -#include "segment-cache.c" #include "segment-map.c" #include "stats.c" #include "prim/prim.c"