From cd7aa0ce836c49cd7f025b8fada325d549cfa524 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 23 Jun 2019 23:15:42 -0700 Subject: [PATCH] improve handling of huge pages in the cache --- include/mimalloc-internal.h | 1 + include/mimalloc-types.h | 5 ++- src/init.c | 2 +- src/os.c | 28 +++++++++++- src/segment.c | 89 +++++++++++++++++++++++-------------- 5 files changed, 86 insertions(+), 39 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 74f0ca36..29a1b89a 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -32,6 +32,7 @@ uintptr_t _mi_random_init(uintptr_t seed /* can be zero */); // "os.c" bool _mi_os_reset(void* p, size_t size); void* _mi_os_alloc(size_t size, mi_stats_t* stats); +bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize); void _mi_os_free(void* p, size_t size, mi_stats_t* stats); bool _mi_os_protect(void* addr, size_t size); bool _mi_os_unprotect(void* addr, size_t size); diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 0ec5bb07..c660897e 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -370,9 +370,10 @@ typedef struct mi_segment_queue_s { // Segments thread local data typedef struct mi_segments_tld_s { mi_segment_queue_t small_free; // queue of segments with free small pages - size_t count; // current number of segments - size_t peak; // peak number of segments + size_t current_size; // current size of all segments + size_t peak_size; // peak size of all segments size_t cache_count; // number of segments in the cache + size_t cache_size; // total size of all segments in the cache mi_segment_queue_t cache; // (small) cache of segments for small and large pages (to avoid repeated mmap calls) mi_stats_t* stats; // points to tld stats } mi_segments_tld_t; diff --git a/src/init.c b/src/init.c index 2fe449f7..6d8c21e9 100644 --- a/src/init.c +++ b/src/init.c @@ -90,7 +90,7 @@ mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; static mi_tld_t tld_main = { 0, &_mi_heap_main, - { { NULL, NULL }, 0, 0, 0, {NULL,NULL}, tld_main_stats }, // segments + { { NULL, NULL }, 0, 0, 0, 0, {NULL,NULL}, tld_main_stats }, // segments { 0, NULL, NULL, 0, tld_main_stats }, // os { MI_STATS_NULL } // stats }; diff --git a/src/os.c b/src/os.c index f8eabe67..0923d740 100644 --- a/src/os.c +++ b/src/os.c @@ -66,9 +66,9 @@ size_t _mi_os_page_size() { } -static void mi_munmap(void* addr, size_t size) +static bool mi_munmap(void* addr, size_t size) { - if (addr == NULL || size == 0) return; + if (addr == NULL || size == 0) return true; bool err = false; #if defined(_WIN32) err = (VirtualFree(addr, 0, MEM_RELEASE) == 0); @@ -78,6 +78,10 @@ static void mi_munmap(void* addr, size_t size) if (err) { #pragma warning(suppress:4996) _mi_warning_message("munmap failed: %s, addr 0x%8li, size %lu\n", strerror(errno), (size_t)addr, size); + return false; + } + else { + return true; } } @@ -193,6 +197,26 @@ bool _mi_os_unprotect(void* addr, size_t size) { return mi_os_protectx(addr, size, false); } +bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize) { + // page align conservatively within the range + mi_assert_internal(oldsize > newsize && p != NULL); + if (oldsize < newsize || p==NULL) return false; + if (oldsize == newsize) return true; + + // oldsize and newsize should be page aligned or we cannot shrink precisely + void* addr = (uint8_t*)p + newsize; + size_t size = 0; + void* start = mi_os_page_align_region(addr, oldsize - newsize, &size); + if (size==0 || start != addr) return false; + + #ifdef _WIN32 + // we cannot shrink on windows + return false; + #else + return mi_munmap( start, size ); + #endif +} + /* ----------------------------------------------------------- OS allocation using mmap/munmap ----------------------------------------------------------- */ diff --git a/src/segment.c b/src/segment.c index c251ce92..00bcd4da 100644 --- a/src/segment.c +++ b/src/segment.c @@ -11,23 +11,19 @@ terms of the MIT license. A copy of the license can be found in the file #include // memset #include +#define MI_PAGE_HUGE_ALIGN (256*1024) + /* ----------------------------------------------------------- Segment allocation We allocate pages inside big OS allocated "segments" - (2mb on 64-bit). This is to avoid splitting VMA's on Linux + (4mb on 64-bit). This is to avoid splitting VMA's on Linux and reduce fragmentation on other OS's. Each thread owns its own segments. Currently we have: - small pages (64kb), 32 in one segment - - large pages (2mb), 1 in one segment - - huge blocks > RC_LARGE_SIZE_MAX (256kb) are directly allocated by the OS - - It might be good to have "medium" pages too (of, say 256kb) - to reduce pressure on the virtual address space on 32-bit systems - but for now we choose the simpler implementation since this - will only be a problem if multiple threads allocate many - differently sized objects between 8kb and 2mb which is not common. + - large pages (4mb), 1 in one segment + - huge blocks > MI_LARGE_SIZE_MAX (512kb) are directly allocated by the OS In any case the memory for a segment is virtual and only committed on demand (i.e. we are careful to not touch the memory @@ -188,39 +184,60 @@ all again repeatedly. (We tried a one-element cache but that proves to be too small for certain workloads). ----------------------------------------------------------- */ -static void mi_segments_count_add(long inc, mi_segments_tld_t* tld) { - if (inc>=0) mi_stat_increase(tld->stats->segments,inc); - else mi_stat_decrease(tld->stats->segments,-inc); - mi_assert_internal(inc < 0 ? tld->count >= (size_t)(-inc) : tld->count < (SIZE_MAX - inc)); - mi_assert_internal(tld->peak >= tld->count); - tld->count += inc; - if (tld->count > tld->peak) tld->peak = tld->count; +static void mi_segments_track_size(long segment_size, mi_segments_tld_t* tld) { + if (segment_size>=0) mi_stat_increase(tld->stats->segments,1); + else mi_stat_decrease(tld->stats->segments,1); + tld->current_size += segment_size; + if (tld->current_size > tld->peak_size) tld->peak_size = tld->current_size; } -static size_t mi_segments_peak(mi_segments_tld_t* tld) { - return tld->peak; -} static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_segments_tld_t* tld) { - mi_segments_count_add(-1,tld); + mi_segments_track_size(-((long)segment_size),tld); _mi_os_free(segment, segment_size,tld->stats); } -// The segment cache is limited to be at most 1/6 of the peak -// number of segments in use (and no more than 32) +// The segment cache is limited to be at most 1/8 of the peak size +// in use (and no more than 32) #define MI_SEGMENT_CACHE_MAX (32) -#define MI_SEGMENT_CACHE_FRACTION (6) +#define MI_SEGMENT_CACHE_FRACTION (8) -// Get a segment of at least `required` size. +// Get a segment of at least `required` size. +// If `required == MI_SEGMENT_SIZE` the `segment_size` will match exactly static mi_segment_t* _mi_segment_cache_findx(mi_segments_tld_t* tld, size_t required, bool reverse) { + mi_assert_internal(required % _mi_os_page_size() == 0); mi_segment_t* segment = (reverse ? tld->cache.last : tld->cache.first); while (segment != NULL) { if (segment->segment_size >= required) { tld->cache_count--; + tld->cache_size -= segment->segment_size; mi_segment_queue_remove(&tld->cache, segment); - // TODO: unmap excess memory if larger than N% - return segment; + // exact size match? + if (segment->segment_size == required) { + return segment; + } + // not more than 25% waste and on a huge page segment? (in that case the segment size does not need to match required) + else if (required != MI_SEGMENT_SIZE && segment->segment_size - (segment->segment_size/4) <= required) { + return segment; + } + // try to shrink the memory to match exactly + else { + if (mi_option_is_enabled(mi_option_secure)) { + _mi_os_unprotect(segment, segment->segment_size); + } + if (_mi_os_shrink(segment, segment->segment_size, required)) { + tld->current_size -= segment->segment_size; + tld->current_size += required; + segment->segment_size = required; + return segment; + } + else { + // if that all fails, we give up + mi_segment_os_free(segment,segment->segment_size,tld); + return NULL; + } + } } segment = (reverse ? segment->prev : segment->next); } @@ -238,12 +255,12 @@ static mi_segment_t* mi_segment_cache_evict(mi_segments_tld_t* tld) { static bool mi_segment_cache_full(mi_segments_tld_t* tld) { if (tld->cache_count < MI_SEGMENT_CACHE_MAX && - tld->cache_count*MI_SEGMENT_CACHE_FRACTION < mi_segments_peak(tld)) return false; + tld->cache_size*MI_SEGMENT_CACHE_FRACTION < tld->peak_size) return false; // take the opportunity to reduce the segment cache if it is too large (now) - while (tld->cache_count*MI_SEGMENT_CACHE_FRACTION >= mi_segments_peak(tld) + 1) { + while (tld->cache_size*MI_SEGMENT_CACHE_FRACTION >= tld->peak_size + 1) { mi_segment_t* segment = mi_segment_cache_evict(tld); mi_assert_internal(segment != NULL); - if (segment != NULL) mi_segment_os_free(segment, MI_SEGMENT_SIZE, tld); + if (segment != NULL) mi_segment_os_free(segment, segment->segment_size, tld); } return true; } @@ -263,6 +280,7 @@ static bool mi_segment_cache_insert(mi_segment_t* segment, mi_segments_tld_t* tl } mi_segment_queue_insert_before( &tld->cache, seg, segment ); tld->cache_count++; + tld->cache_size += segment->segment_size; return true; } @@ -272,7 +290,7 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) { while ((segment = mi_segment_cache_find(tld,0)) != NULL) { mi_segment_os_free(segment, MI_SEGMENT_SIZE, tld); } - mi_assert_internal(tld->cache_count == 0); + mi_assert_internal(tld->cache_count == 0 && tld->cache_size == 0); mi_assert_internal(mi_segment_queue_is_empty(&tld->cache)); } @@ -301,7 +319,7 @@ static mi_segment_t* mi_segment_alloc( size_t required, mi_page_kind_t page_kind size_t info_size; size_t pre_size; size_t segment_size = mi_segment_size( capacity, required, &pre_size, &info_size); - + mi_assert_internal(segment_size >= required); size_t page_size = (page_kind == MI_PAGE_HUGE ? segment_size : (size_t)1 << page_shift); // Allocate the segment @@ -309,6 +327,9 @@ static mi_segment_t* mi_segment_alloc( size_t required, mi_page_kind_t page_kind // try to get it from our caches segment = mi_segment_cache_find(tld,segment_size); + mi_assert_internal(segment == NULL || + (segment_size==MI_SEGMENT_SIZE && segment_size == segment->segment_size) || + (segment_size!=MI_SEGMENT_SIZE && segment_size >= segment->segment_size)); if (segment != NULL && mi_option_is_enabled(mi_option_secure) && (segment->page_kind != page_kind || segment->segment_size != segment_size)) { _mi_os_unprotect(segment,segment->segment_size); } @@ -317,7 +338,7 @@ static mi_segment_t* mi_segment_alloc( size_t required, mi_page_kind_t page_kind if (segment == NULL) { segment = (mi_segment_t*)_mi_os_alloc_aligned(segment_size, MI_SEGMENT_SIZE, os_tld); if (segment == NULL) return NULL; - mi_segments_count_add(1,tld); + mi_segments_track_size((long)segment_size,tld); } mi_assert_internal((uintptr_t)segment % MI_SEGMENT_SIZE == 0); @@ -560,7 +581,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen mi_atomic_decrement(&abandoned_count); segment->thread_id = _mi_thread_id(); segment->abandoned_next = NULL; - mi_segments_count_add(1,tld); + mi_segments_track_size((long)segment->segment_size,tld); mi_assert_internal(segment->next == NULL && segment->prev == NULL); mi_assert_expensive(mi_segment_is_valid(segment)); mi_stat_decrease(tld->stats->segments_abandoned,1); @@ -645,7 +666,7 @@ static mi_page_t* mi_segment_large_page_alloc(mi_segments_tld_t* tld, mi_os_tld_ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { - mi_segment_t* segment = mi_segment_alloc(size, MI_PAGE_HUGE, MI_SEGMENT_SHIFT,tld,os_tld); + mi_segment_t* segment = mi_segment_alloc(_mi_align_up(size, MI_PAGE_HUGE_ALIGN), MI_PAGE_HUGE, MI_SEGMENT_SHIFT,tld,os_tld); if (segment == NULL) return NULL; mi_assert_internal(segment->segment_size - segment->segment_info_size >= size); segment->used = 1;