diff --git a/CMakeLists.txt b/CMakeLists.txt index 84668eb3..f8836f20 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,6 +18,7 @@ set(mi_install_dir "lib/mimalloc-${mi_version}") set(mi_sources src/stats.c src/os.c + src/arena.c src/segment.c src/page.c src/alloc.c diff --git a/ide/vs2017/mimalloc-override.vcxproj b/ide/vs2017/mimalloc-override.vcxproj index 35f84a4b..458d5e70 100644 --- a/ide/vs2017/mimalloc-override.vcxproj +++ b/ide/vs2017/mimalloc-override.vcxproj @@ -231,6 +231,7 @@ + diff --git a/ide/vs2017/mimalloc-override.vcxproj.filters b/ide/vs2017/mimalloc-override.vcxproj.filters index f7ea6d4c..64bb3dbd 100644 --- a/ide/vs2017/mimalloc-override.vcxproj.filters +++ b/ide/vs2017/mimalloc-override.vcxproj.filters @@ -67,5 +67,8 @@ Source Files + + Source Files + \ No newline at end of file diff --git a/ide/vs2017/mimalloc.vcxproj b/ide/vs2017/mimalloc.vcxproj index 415f87fc..219449c9 100644 --- a/ide/vs2017/mimalloc.vcxproj +++ b/ide/vs2017/mimalloc.vcxproj @@ -217,6 +217,7 @@ + diff --git a/ide/vs2017/mimalloc.vcxproj.filters b/ide/vs2017/mimalloc.vcxproj.filters index 0e5512bc..87f7e9e1 100644 --- a/ide/vs2017/mimalloc.vcxproj.filters +++ b/ide/vs2017/mimalloc.vcxproj.filters @@ -50,6 +50,9 @@ Source Files + + Source Files + diff --git a/ide/vs2019/mimalloc-override.vcxproj b/ide/vs2019/mimalloc-override.vcxproj index 3a9cdcae..ac559468 100644 --- a/ide/vs2019/mimalloc-override.vcxproj +++ b/ide/vs2019/mimalloc-override.vcxproj @@ -231,6 +231,7 @@ + diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index 2af40f16..f38a7a11 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -217,6 +217,7 @@ + diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index f4822b10..f5b11c33 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -65,6 +65,12 @@ bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats); bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld); +int _mi_os_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept; + +// arena.c +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); +void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); +void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats); /* // memory.c diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index b77d77d3..b043bebe 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -239,12 +239,14 @@ typedef mi_page_t mi_slice_t; // the OS. Inside segments we allocated fixed size _pages_ that // contain blocks. typedef struct mi_segment_s { - struct mi_segment_s* next; // the list of freed segments in the cache - volatile _Atomic(struct mi_segment_s*) abandoned_next; - + size_t memid; // memory id for arena allocation bool mem_is_fixed; // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages) bool mem_is_committed; // `true` if the whole segment is eagerly committed + // from here is zero initialized + struct mi_segment_s* next; // the list of freed segments in the cache + volatile _Atomic(struct mi_segment_s*) abandoned_next; + size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) size_t used; // count of pages in use uintptr_t cookie; // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie` diff --git a/src/memory.c b/src/memory.c deleted file mode 100644 index 80351edc..00000000 --- a/src/memory.c +++ /dev/null @@ -1,551 +0,0 @@ -/* ---------------------------------------------------------------------------- -Copyright (c) 2019, Microsoft Research, Daan Leijen -This is free software; you can redistribute it and/or modify it under the -terms of the MIT license. A copy of the license can be found in the file -"LICENSE" at the root of this distribution. ------------------------------------------------------------------------------*/ - -/* ---------------------------------------------------------------------------- -This implements a layer between the raw OS memory (VirtualAlloc/mmap/sbrk/..) -and the segment and huge object allocation by mimalloc. There may be multiple -implementations of this (one could be the identity going directly to the OS, -another could be a simple cache etc), but the current one uses large "regions". -In contrast to the rest of mimalloc, the "regions" are shared between threads and -need to be accessed using atomic operations. -We need this memory layer between the raw OS calls because of: -1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order - to reuse memory effectively. -2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of - an OS allocation/free is still (much) too expensive relative to the accesses in that - object :-( (`malloc-large` tests this). This means we need a cheaper way to - reuse memory. -3. This layer can help with a NUMA aware allocation in the future. - -Possible issues: -- (2) can potentially be addressed too with a small cache per thread which is much - simpler. Generally though that requires shrinking of huge pages, and may overuse - memory per thread. (and is not compatible with `sbrk`). -- Since the current regions are per-process, we need atomic operations to - claim blocks which may be contended -- In the worst case, we need to search the whole region map (16KiB for 256GiB) - linearly. At what point will direct OS calls be faster? Is there a way to - do this better without adding too much complexity? ------------------------------------------------------------------------------*/ -#include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" - -#include // memset - -// Internal raw OS interface -size_t _mi_os_large_page_size(); -bool _mi_os_protect(void* addr, size_t size); -bool _mi_os_unprotect(void* addr, size_t size); -bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); -bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats); -bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); -bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); -void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld); -void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats); -void* _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment); -bool _mi_os_is_huge_reserved(void* p); - -// Constants -#if (MI_INTPTR_SIZE==8) -#define MI_HEAP_REGION_MAX_SIZE (256 * (1ULL << 30)) // 256GiB => 16KiB for the region map -#elif (MI_INTPTR_SIZE==4) -#define MI_HEAP_REGION_MAX_SIZE (3 * (1UL << 30)) // 3GiB => 196 bytes for the region map -#else -#error "define the maximum heap space allowed for regions on this platform" -#endif - -#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE - -#define MI_REGION_MAP_BITS (MI_INTPTR_SIZE * 8) -#define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_REGION_MAP_BITS) -#define MI_REGION_MAX_ALLOC_SIZE ((MI_REGION_MAP_BITS/4)*MI_SEGMENT_SIZE) // 64MiB -#define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE) -#define MI_REGION_MAP_FULL UINTPTR_MAX - - -typedef uintptr_t mi_region_info_t; - -static inline mi_region_info_t mi_region_info_create(void* start, bool is_large, bool is_committed) { - return ((uintptr_t)start | ((uintptr_t)(is_large?1:0) << 1) | (is_committed?1:0)); -} - -static inline void* mi_region_info_read(mi_region_info_t info, bool* is_large, bool* is_committed) { - if (is_large) *is_large = ((info&0x02) != 0); - if (is_committed) *is_committed = ((info&0x01) != 0); - return (void*)(info & ~0x03); -} - - -// A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with -// a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block. -typedef struct mem_region_s { - volatile _Atomic(uintptr_t) map; // in-use bit per MI_SEGMENT_SIZE block - volatile _Atomic(mi_region_info_t) info; // start of virtual memory area, and flags - volatile _Atomic(uintptr_t) dirty_mask; // bit per block if the contents are not zero'd -} mem_region_t; - - -// The region map; 16KiB for a 256GiB HEAP_REGION_MAX -// TODO: in the future, maintain a map per NUMA node for numa aware allocation -static mem_region_t regions[MI_REGION_MAX]; - -static volatile _Atomic(uintptr_t) regions_count; // = 0; // allocated regions - - -/* ---------------------------------------------------------------------------- -Utility functions ------------------------------------------------------------------------------*/ - -// Blocks (of 4MiB) needed for the given size. -static size_t mi_region_block_count(size_t size) { - mi_assert_internal(size <= MI_REGION_MAX_ALLOC_SIZE); - return (size + MI_SEGMENT_SIZE - 1) / MI_SEGMENT_SIZE; -} - -// The bit mask for a given number of blocks at a specified bit index. -static uintptr_t mi_region_block_mask(size_t blocks, size_t bitidx) { - mi_assert_internal(blocks + bitidx <= MI_REGION_MAP_BITS); - return ((((uintptr_t)1 << blocks) - 1) << bitidx); -} - -// Return a rounded commit/reset size such that we don't fragment large OS pages into small ones. -static size_t mi_good_commit_size(size_t size) { - if (size > (SIZE_MAX - _mi_os_large_page_size())) return size; - return _mi_align_up(size, _mi_os_large_page_size()); -} - -// Return if a pointer points into a region reserved by us. -bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { - if (p==NULL) return false; - size_t count = mi_atomic_read_relaxed(®ions_count); - for (size_t i = 0; i < count; i++) { - uint8_t* start = (uint8_t*)mi_region_info_read( mi_atomic_read_relaxed(®ions[i].info), NULL, NULL); - if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true; - } - return false; -} - - -/* ---------------------------------------------------------------------------- -Commit from a region ------------------------------------------------------------------------------*/ - -// Commit the `blocks` in `region` at `idx` and `bitidx` of a given `size`. -// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written -// if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call. -// (not being able to claim is not considered an error so check for `p != NULL` afterwards). -static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bitidx, size_t blocks, - size_t size, bool* commit, bool* allow_large, bool* is_zero, void** p, size_t* id, mi_os_tld_t* tld) -{ - size_t mask = mi_region_block_mask(blocks,bitidx); - mi_assert_internal(mask != 0); - mi_assert_internal((mask & mi_atomic_read_relaxed(®ion->map)) == mask); - mi_assert_internal(®ions[idx] == region); - - // ensure the region is reserved - mi_region_info_t info = mi_atomic_read(®ion->info); - if (info == 0) - { - bool region_commit = mi_option_is_enabled(mi_option_eager_region_commit); - bool region_large = *allow_large; - void* start = NULL; - if (region_large) { - start = _mi_os_try_alloc_from_huge_reserved(MI_REGION_SIZE, MI_SEGMENT_ALIGN); - if (start != NULL) { region_commit = true; } - } - if (start == NULL) { - start = _mi_os_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, region_commit, ®ion_large, tld); - } - mi_assert_internal(!(region_large && !*allow_large)); - - if (start == NULL) { - // failure to allocate from the OS! unclaim the blocks and fail - size_t map; - do { - map = mi_atomic_read_relaxed(®ion->map); - } while (!mi_atomic_cas_weak(®ion->map, map & ~mask, map)); - return false; - } - - // set the newly allocated region - info = mi_region_info_create(start,region_large,region_commit); - if (mi_atomic_cas_strong(®ion->info, info, 0)) { - // update the region count - mi_atomic_increment(®ions_count); - } - else { - // failed, another thread allocated just before us! - // we assign it to a later slot instead (up to 4 tries). - for(size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) { - if (mi_atomic_cas_strong(®ions[idx+i].info, info, 0)) { - mi_atomic_increment(®ions_count); - start = NULL; - break; - } - } - if (start != NULL) { - // free it if we didn't succeed to save it to some other region - _mi_os_free_ex(start, MI_REGION_SIZE, region_commit, tld->stats); - } - // and continue with the memory at our index - info = mi_atomic_read(®ion->info); - } - } - mi_assert_internal(info == mi_atomic_read(®ion->info)); - mi_assert_internal(info != 0); - - // Commit the blocks to memory - bool region_is_committed = false; - bool region_is_large = false; - void* start = mi_region_info_read(info,®ion_is_large,®ion_is_committed); - mi_assert_internal(!(region_is_large && !*allow_large)); - mi_assert_internal(start!=NULL); - - // set dirty bits - uintptr_t m; - do { - m = mi_atomic_read(®ion->dirty_mask); - } while (!mi_atomic_cas_weak(®ion->dirty_mask, m | mask, m)); - *is_zero = ((m & mask) == 0); // no dirty bit set in our claimed range? - - void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE); - if (*commit && !region_is_committed) { - // ensure commit - bool commit_zero = false; - _mi_os_commit(blocks_start, mi_good_commit_size(size), &commit_zero, tld->stats); // only commit needed size (unless using large OS pages) - if (commit_zero) *is_zero = true; - } - else if (!*commit && region_is_committed) { - // but even when no commit is requested, we might have committed anyway (in a huge OS page for example) - *commit = true; - } - - // and return the allocation - mi_assert_internal(blocks_start != NULL); - *allow_large = region_is_large; - *p = blocks_start; - *id = (idx*MI_REGION_MAP_BITS) + bitidx; - return true; -} - -// Use bit scan forward to quickly find the first zero bit if it is available -#if defined(_MSC_VER) -#define MI_HAVE_BITSCAN -#include -static inline size_t mi_bsf(uintptr_t x) { - if (x==0) return 8*MI_INTPTR_SIZE; - DWORD idx; - #if (MI_INTPTR_SIZE==8) - _BitScanForward64(&idx, x); - #else - _BitScanForward(&idx, x); - #endif - return idx; -} -static inline size_t mi_bsr(uintptr_t x) { - if (x==0) return 8*MI_INTPTR_SIZE; - DWORD idx; - #if (MI_INTPTR_SIZE==8) - _BitScanReverse64(&idx, x); - #else - _BitScanReverse(&idx, x); - #endif - return idx; -} -#elif defined(__GNUC__) || defined(__clang__) -#define MI_HAVE_BITSCAN -static inline size_t mi_bsf(uintptr_t x) { - return (x==0 ? 8*MI_INTPTR_SIZE : __builtin_ctzl(x)); -} -static inline size_t mi_bsr(uintptr_t x) { - return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - __builtin_clzl(x)); -} -#endif - -// Allocate `blocks` in a `region` at `idx` of a given `size`. -// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written -// if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call. -// (not being able to claim is not considered an error so check for `p != NULL` afterwards). -static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t blocks, size_t size, - bool* commit, bool* allow_large, bool* is_zero, void** p, size_t* id, mi_os_tld_t* tld) -{ - mi_assert_internal(p != NULL && id != NULL); - mi_assert_internal(blocks < MI_REGION_MAP_BITS); - - const uintptr_t mask = mi_region_block_mask(blocks, 0); - const size_t bitidx_max = MI_REGION_MAP_BITS - blocks; - uintptr_t map = mi_atomic_read(®ion->map); - if (map==MI_REGION_MAP_FULL) return true; - - #ifdef MI_HAVE_BITSCAN - size_t bitidx = mi_bsf(~map); // quickly find the first zero bit if possible - #else - size_t bitidx = 0; // otherwise start at 0 - #endif - uintptr_t m = (mask << bitidx); // invariant: m == mask shifted by bitidx - - // scan linearly for a free range of zero bits - while(bitidx <= bitidx_max) { - if ((map & m) == 0) { // are the mask bits free at bitidx? - mi_assert_internal((m >> bitidx) == mask); // no overflow? - uintptr_t newmap = map | m; - mi_assert_internal((newmap^map) >> bitidx == mask); - if (!mi_atomic_cas_weak(®ion->map, newmap, map)) { // TODO: use strong cas here? - // no success, another thread claimed concurrently.. keep going - map = mi_atomic_read(®ion->map); - continue; - } - else { - // success, we claimed the bits - // now commit the block memory -- this can still fail - return mi_region_commit_blocks(region, idx, bitidx, blocks, - size, commit, allow_large, is_zero, p, id, tld); - } - } - else { - // on to the next bit range - #ifdef MI_HAVE_BITSCAN - size_t shift = (blocks == 1 ? 1 : mi_bsr(map & m) - bitidx + 1); - mi_assert_internal(shift > 0 && shift <= blocks); - #else - size_t shift = 1; - #endif - bitidx += shift; - m <<= shift; - } - } - // no error, but also no bits found - return true; -} - -// Try to allocate `blocks` in a `region` at `idx` of a given `size`. Does a quick check before trying to claim. -// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written -// if the blocks were successfully claimed so ensure they are initialized to NULL/0 before the call. -// (not being able to claim is not considered an error so check for `p != NULL` afterwards). -static bool mi_region_try_alloc_blocks(size_t idx, size_t blocks, size_t size, - bool* commit, bool* allow_large, bool* is_zero, - void** p, size_t* id, mi_os_tld_t* tld) -{ - // check if there are available blocks in the region.. - mi_assert_internal(idx < MI_REGION_MAX); - mem_region_t* region = ®ions[idx]; - uintptr_t m = mi_atomic_read_relaxed(®ion->map); - if (m != MI_REGION_MAP_FULL) { // some bits are zero - bool ok = (*commit || *allow_large); // committing or allow-large is always ok - if (!ok) { - // otherwise skip incompatible regions if possible. - // this is not guaranteed due to multiple threads allocating at the same time but - // that's ok. In secure mode, large is never allowed for any thread, so that works out; - // otherwise we might just not be able to reset/decommit individual pages sometimes. - mi_region_info_t info = mi_atomic_read_relaxed(®ion->info); - bool is_large; - bool is_committed; - void* start = mi_region_info_read(info,&is_large,&is_committed); - ok = (start == NULL || (*commit || !is_committed) || (*allow_large || !is_large)); // Todo: test with one bitmap operation? - } - if (ok) { - return mi_region_alloc_blocks(region, idx, blocks, size, commit, allow_large, is_zero, p, id, tld); - } - } - return true; // no error, but no success either -} - -/* ---------------------------------------------------------------------------- - Allocation ------------------------------------------------------------------------------*/ - -// Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`. -// (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`) -void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, - size_t* id, mi_os_tld_t* tld) -{ - mi_assert_internal(id != NULL && tld != NULL); - mi_assert_internal(size > 0); - *id = SIZE_MAX; - *is_zero = false; - bool default_large = false; - if (large==NULL) large = &default_large; // ensure `large != NULL` - - // use direct OS allocation for huge blocks or alignment (with `id = SIZE_MAX`) - if (size > MI_REGION_MAX_ALLOC_SIZE || alignment > MI_SEGMENT_ALIGN) { - *is_zero = true; - return _mi_os_alloc_aligned(mi_good_commit_size(size), alignment, *commit, large, tld); // round up size - } - - // always round size to OS page size multiple (so commit/decommit go over the entire range) - // TODO: use large OS page size here? - size = _mi_align_up(size, _mi_os_page_size()); - - // calculate the number of needed blocks - size_t blocks = mi_region_block_count(size); - mi_assert_internal(blocks > 0 && blocks <= 8*MI_INTPTR_SIZE); - - // find a range of free blocks - void* p = NULL; - size_t count = mi_atomic_read(®ions_count); - size_t idx = tld->region_idx; // start at 0 to reuse low addresses? Or, use tld->region_idx to reduce contention? - for (size_t visited = 0; visited < count; visited++, idx++) { - if (idx >= count) idx = 0; // wrap around - if (!mi_region_try_alloc_blocks(idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error - if (p != NULL) break; - } - - if (p == NULL) { - // no free range in existing regions -- try to extend beyond the count.. but at most 8 regions - for (idx = count; idx < mi_atomic_read_relaxed(®ions_count) + 8 && idx < MI_REGION_MAX; idx++) { - if (!mi_region_try_alloc_blocks(idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error - if (p != NULL) break; - } - } - - if (p == NULL) { - // we could not find a place to allocate, fall back to the os directly - _mi_warning_message("unable to allocate from region: size %zu\n", size); - *is_zero = true; - p = _mi_os_alloc_aligned(size, alignment, commit, large, tld); - } - else { - tld->region_idx = idx; // next start of search? currently not used as we use first-fit - } - - mi_assert_internal( p == NULL || (uintptr_t)p % alignment == 0); - return p; -} - - - -/* ---------------------------------------------------------------------------- -Free ------------------------------------------------------------------------------*/ - -// Free previously allocated memory with a given id. -void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { - mi_assert_internal(size > 0 && stats != NULL); - if (p==NULL) return; - if (size==0) return; - if (id == SIZE_MAX) { - // was a direct OS allocation, pass through - _mi_os_free(p, size, stats); - } - else { - // allocated in a region - mi_assert_internal(size <= MI_REGION_MAX_ALLOC_SIZE); if (size > MI_REGION_MAX_ALLOC_SIZE) return; - // we can align the size up to page size (as we allocate that way too) - // this ensures we fully commit/decommit/reset - size = _mi_align_up(size, _mi_os_page_size()); - size_t idx = (id / MI_REGION_MAP_BITS); - size_t bitidx = (id % MI_REGION_MAP_BITS); - size_t blocks = mi_region_block_count(size); - size_t mask = mi_region_block_mask(blocks, bitidx); - mi_assert_internal(idx < MI_REGION_MAX); if (idx >= MI_REGION_MAX) return; // or `abort`? - mem_region_t* region = ®ions[idx]; - mi_assert_internal((mi_atomic_read_relaxed(®ion->map) & mask) == mask ); // claimed? - mi_region_info_t info = mi_atomic_read(®ion->info); - bool is_large; - bool is_eager_committed; - void* start = mi_region_info_read(info,&is_large,&is_eager_committed); - mi_assert_internal(start != NULL); - void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE); - mi_assert_internal(blocks_start == p); // not a pointer in our area? - mi_assert_internal(bitidx + blocks <= MI_REGION_MAP_BITS); - if (blocks_start != p || bitidx + blocks > MI_REGION_MAP_BITS) return; // or `abort`? - - // decommit (or reset) the blocks to reduce the working set. - // TODO: implement delayed decommit/reset as these calls are too expensive - // if the memory is reused soon. - // reset: 10x slowdown on malloc-large, decommit: 17x slowdown on malloc-large - if (!is_large) { - if (mi_option_is_enabled(mi_option_segment_reset)) { - if (!is_eager_committed && // cannot reset large pages - (mi_option_is_enabled(mi_option_eager_commit) || // cannot reset halfway committed segments, use `option_page_reset` instead - mi_option_is_enabled(mi_option_reset_decommits))) // but we can decommit halfway committed segments - { - _mi_os_reset(p, size, stats); - //_mi_os_decommit(p, size, stats); // todo: and clear dirty bits? - } - } -<<<<<<< HEAD - // else { _mi_os_reset(p,size,stats); } - } -======= - } ->>>>>>> dev - if (!is_eager_committed) { - // adjust commit statistics as we commit again when re-using the same slot - _mi_stat_decrease(&stats->committed, mi_good_commit_size(size)); - } - - // TODO: should we free empty regions? currently only done _mi_mem_collect. - // this frees up virtual address space which might be useful on 32-bit systems? - - // and unclaim - uintptr_t map; - uintptr_t newmap; - do { - map = mi_atomic_read_relaxed(®ion->map); - newmap = map & ~mask; - } while (!mi_atomic_cas_weak(®ion->map, newmap, map)); - } -} - - -/* ---------------------------------------------------------------------------- - collection ------------------------------------------------------------------------------*/ -void _mi_mem_collect(mi_stats_t* stats) { - // free every region that has no segments in use. - for (size_t i = 0; i < regions_count; i++) { - mem_region_t* region = ®ions[i]; - if (mi_atomic_read_relaxed(®ion->map) == 0) { - // if no segments used, try to claim the whole region - uintptr_t m; - do { - m = mi_atomic_read_relaxed(®ion->map); - } while(m == 0 && !mi_atomic_cas_weak(®ion->map, ~((uintptr_t)0), 0 )); - if (m == 0) { - // on success, free the whole region (unless it was huge reserved) - bool is_eager_committed; - void* start = mi_region_info_read(mi_atomic_read(®ion->info), NULL, &is_eager_committed); - if (start != NULL && !_mi_os_is_huge_reserved(start)) { - _mi_os_free_ex(start, MI_REGION_SIZE, is_eager_committed, stats); - } - // and release - mi_atomic_write(®ion->info,0); - mi_atomic_write(®ion->map,0); - } - } - } -} - -/* ---------------------------------------------------------------------------- - Other ------------------------------------------------------------------------------*/ - -bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats) { - return _mi_os_commit(p, size, is_zero, stats); -} - -bool _mi_mem_decommit(void* p, size_t size, mi_stats_t* stats) { - return _mi_os_decommit(p, size, stats); -} - -bool _mi_mem_reset(void* p, size_t size, mi_stats_t* stats) { - return _mi_os_reset(p, size, stats); -} - -bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats) { - return _mi_os_unreset(p, size, is_zero, stats); -} - -bool _mi_mem_protect(void* p, size_t size) { - return _mi_os_protect(p, size); -} - -bool _mi_mem_unprotect(void* p, size_t size) { - return _mi_os_unprotect(p, size); -} diff --git a/src/os.c b/src/os.c index 5e595f93..191be56c 100644 --- a/src/os.c +++ b/src/os.c @@ -868,13 +868,13 @@ static void mi_os_free_huge_reserved() { */ #if !(MI_INTPTR_SIZE >= 8 && (defined(_WIN32) || defined(MI_OS_USE_MMAP))) -int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept { +int _mi_os_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept { UNUSED(pages); UNUSED(max_secs); if (pages_reserved != NULL) *pages_reserved = 0; return ENOMEM; } #else -int mi_reserve_huge_os_pages( size_t pages, double max_secs, size_t* pages_reserved ) mi_attr_noexcept +int _mi_os_reserve_huge_os_pages( size_t pages, double max_secs, size_t* pages_reserved ) mi_attr_noexcept { if (pages_reserved != NULL) *pages_reserved = 0; if (max_secs==0) return ETIMEDOUT; // timeout diff --git a/src/segment.c b/src/segment.c index 7dcfcd36..8a02acac 100644 --- a/src/segment.c +++ b/src/segment.c @@ -284,7 +284,8 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { if (MI_SECURE>0) { _mi_os_unprotect(segment, mi_segment_size(segment)); // ensure no more guard pages are set } - _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats); + // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats); + _mi_arena_free(segment, mi_segment_size(segment), segment->memid, tld->stats); } @@ -598,29 +599,35 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m // Try to get from our cache first mi_segment_t* segment = mi_segment_cache_pop(segment_slices, tld); + bool is_zero = false; if (segment==NULL) { // Allocate the segment from the OS bool mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy - segment = (mi_segment_t*)_mi_os_alloc_aligned(segment_size, MI_SEGMENT_SIZE, commit, &mem_large, os_tld); + size_t memid = 0; + // segment = (mi_segment_t*)_mi_os_alloc_aligned(segment_size, MI_SEGMENT_SIZE, commit, &mem_large, os_tld); + segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_zero, &memid, os_tld); if (segment == NULL) return NULL; // failed to allocate mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); if (!commit) { // at least commit the info slices mi_assert_internal(MI_COMMIT_SIZE > info_slices*MI_SEGMENT_SLICE_SIZE); - bool is_zero = false; _mi_os_commit(segment, MI_COMMIT_SIZE, &is_zero, tld->stats); } + segment->memid = memid; segment->mem_is_fixed = mem_large; segment->mem_is_committed = commit; mi_segments_track_size((long)(segment_size), tld); mi_segment_map_allocated_at(segment); } - // zero the segment info? -- not needed as it is zero initialized from the OS - // memset(segment, 0, info_size); + // zero the segment info? -- not always needed as it is zero initialized from the OS + if (!is_zero) { + ptrdiff_t ofs = offsetof(mi_segment_t, next); + size_t prefix = offsetof(mi_segment_t, slices) - ofs; + memset((uint8_t*)segment+ofs, 0, prefix + sizeof(mi_slice_t)*segment_slices); + } // initialize segment info - memset(segment,0,offsetof(mi_segment_t,slices)); segment->segment_slices = segment_slices; segment->segment_info_slices = info_slices; segment->thread_id = _mi_thread_id(); @@ -629,7 +636,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m segment->kind = (required == 0 ? MI_SEGMENT_NORMAL : MI_SEGMENT_HUGE); segment->allow_decommit = !commit; segment->commit_mask = (!commit ? 0x01 : ~((uintptr_t)0)); // on lazy commit, the initial part is always committed - memset(segment->slices, 0, sizeof(mi_slice_t)*(info_slices+1)); + // memset(segment->slices, 0, sizeof(mi_slice_t)*(info_slices+1)); _mi_stat_increase(&tld->stats->page_committed, mi_segment_info_size(segment)); // set up guard pages diff --git a/test/test-stress.c b/test/test-stress.c index 08406ec7..f60cda10 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -18,7 +18,7 @@ terms of the MIT license. // argument defaults static int THREADS = 32; // more repeatable if THREADS <= #processors -static int N = 20; // scaling factor +static int N = 40; // scaling factor // static int THREADS = 8; // more repeatable if THREADS <= #processors // static int N = 100; // scaling factor