From d36d04b4a6e5ada99fa36447332e5d7d3b1d33be Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 31 Oct 2019 15:35:10 -0700 Subject: [PATCH 001/179] add arena for huge page management --- ide/vs2019/mimalloc-override.vcxproj | 1 + ide/vs2019/mimalloc.vcxproj | 1 + include/mimalloc-internal.h | 1 + src/arena.c | 369 +++++++++++++++++++++++++++ src/memory.c | 80 ++++-- src/os.c | 4 +- 6 files changed, 435 insertions(+), 21 deletions(-) create mode 100644 src/arena.c diff --git a/ide/vs2019/mimalloc-override.vcxproj b/ide/vs2019/mimalloc-override.vcxproj index 96a8924f..09fd37fb 100644 --- a/ide/vs2019/mimalloc-override.vcxproj +++ b/ide/vs2019/mimalloc-override.vcxproj @@ -231,6 +231,7 @@ + diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index 28e96d71..1fabff5e 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -217,6 +217,7 @@ + diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index ccf12a06..2b881ac9 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -57,6 +57,7 @@ void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocat void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data size_t _mi_os_good_alloc_size(size_t size); + // memory.c void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* id, mi_os_tld_t* tld); void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats); diff --git a/src/arena.c b/src/arena.c new file mode 100644 index 00000000..5f33965a --- /dev/null +++ b/src/arena.c @@ -0,0 +1,369 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2019, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +/* ---------------------------------------------------------------------------- + +-----------------------------------------------------------------------------*/ +#include "mimalloc.h" +#include "mimalloc-internal.h" +#include "mimalloc-atomic.h" + +#include // memset + +// os.c +void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld); +void* _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment); +int _mi_os_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept; + +/* ----------------------------------------------------------- + Arena allocation +----------------------------------------------------------- */ + +#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE +#define MI_ARENA_BLOCK_SIZE (4*MI_SEGMENT_ALIGN) // 16MiB +#define MI_MAX_ARENAS (64) + +// Block info: bit 0 contains the `in_use` bit, the upper bits the +// size in count of arena blocks. +typedef uintptr_t mi_block_info_t; + +// A memory arena descriptor +typedef struct mi_arena_s { + uint8_t* start; // the start of the memory area + size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) + bool is_zero_init; // is the arena zero initialized? + bool is_large; // large OS page allocated + _Atomic(uintptr_t) block_bottom; // optimization to start the search for free blocks + _Atomic(mi_block_info_t) blocks[1]; // `block_count` block info's +} mi_arena_t; + + +// The available arenas +static _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS]; +static _Atomic(uintptr_t) mi_arena_count; // = 0 + + +/* ----------------------------------------------------------- + Arena allocations get a memory id where the lower 8 bits are + the arena index +1, and the upper bits the block index. +----------------------------------------------------------- */ + +// Use `0` as a special id for direct OS allocated memory. +#define MI_MEMID_OS 0 + +static size_t mi_memid_create(size_t arena_index, size_t block_index) { + mi_assert_internal(arena_index < 0xFE); + return ((block_index << 8) | ((arena_index+1) & 0xFF)); +} + +static void mi_memid_indices(size_t memid, size_t* arena_index, size_t* block_index) { + mi_assert_internal(memid != MI_MEMID_OS); + *arena_index = (memid & 0xFF) - 1; + *block_index = (memid >> 8); +} + +/* ----------------------------------------------------------- + Block info +----------------------------------------------------------- */ + +static bool mi_block_is_in_use(mi_block_info_t info) { + return ((info&1) != 0); +} + +static size_t mi_block_count(mi_block_info_t info) { + return (info>>1); +} + +static mi_block_info_t mi_block_info_create(size_t bcount, bool in_use) { + return (((mi_block_info_t)bcount << 1) | (in_use ? 1 : 0)); +} + + +/* ----------------------------------------------------------- + Thread safe allocation in an arena +----------------------------------------------------------- */ + +static void* mi_arena_allocx(mi_arena_t* arena, size_t start_idx, size_t end_idx, size_t needed_bcount, bool* is_zero, size_t* block_index) +{ + // Scan linearly through all block info's + // Skipping used ranges, coalescing free ranges on demand. + mi_assert_internal(needed_bcount > 0); + mi_assert_internal(start_idx <= arena->block_count); + mi_assert_internal(end_idx <= arena->block_count); + _Atomic(mi_block_info_t)* block = &arena->blocks[start_idx]; + _Atomic(mi_block_info_t)* end = &arena->blocks[end_idx]; + while (block < end) { + mi_block_info_t binfo = mi_atomic_read_relaxed(block); + size_t bcount = mi_block_count(binfo); + if (mi_block_is_in_use(binfo)) { + // in-use, skip ahead + mi_assert_internal(bcount > 0); + block += bcount; + } + else { + // free blocks + if (bcount==0) { + // optimization: + // use 0 initialized blocks at the end, to use single atomic operation + // initially to reduce contention (as we don't need to split) + if (block + needed_bcount > end) { + return NULL; // does not fit + } + else if (!mi_atomic_cas_weak(block, mi_block_info_create(needed_bcount, true), binfo)) { + // ouch, someone else was quicker. Try again.. + continue; + } + else { + // we got it: return a pointer to the claimed memory + ptrdiff_t idx = (block - arena->blocks); + *is_zero = arena->is_zero_init; + *block_index = idx; + return (arena->start + (idx*MI_ARENA_BLOCK_SIZE)); + } + } + + mi_assert_internal(bcount>0); + if (needed_bcount > bcount) { +#if 0 // MI_NO_ARENA_COALESCE + block += bcount; // too small, skip to the next range + continue; +#else + // too small, try to coalesce + _Atomic(mi_block_info_t)* block_next = block + bcount; + if (block_next >= end) { + return NULL; // does not fit + } + mi_block_info_t binfo_next = mi_atomic_read(block_next); + size_t bcount_next = mi_block_count(binfo_next); + if (mi_block_is_in_use(binfo_next)) { + // next block is in use, cannot coalesce + block += (bcount + bcount_next); // skip ahea over both blocks + } + else { + // next block is free, try to coalesce + // first set the next one to being used to prevent dangling ranges + if (!mi_atomic_cas_strong(block_next, mi_block_info_create(bcount_next, true), binfo_next)) { + // someone else got in before us.. try again + continue; + } + else { + if (!mi_atomic_cas_strong(block, mi_block_info_create(bcount + bcount_next, true), binfo)) { // use strong to increase success chance + // someone claimed/coalesced the block in the meantime + // first free the next block again.. + bool ok = mi_atomic_cas_strong(block_next, mi_block_info_create(bcount_next, false), binfo_next); // must be strong + mi_assert(ok); UNUSED(ok); + // and try again + continue; + } + else { + // coalesced! try again + // todo: we could optimize here to immediately claim the block if the + // coalesced size is a fit instead of retrying. Keep it simple for now. + continue; + } + } + } +#endif + } + else { // needed_bcount <= bcount + mi_assert_internal(needed_bcount <= bcount); + // it fits, claim the whole block + if (!mi_atomic_cas_weak(block, mi_block_info_create(bcount, true), binfo)) { + // ouch, someone else was quicker. Try again.. + continue; + } + else { + // got it, now split off the needed part + if (needed_bcount < bcount) { + mi_atomic_write(block + needed_bcount, mi_block_info_create(bcount - needed_bcount, false)); + mi_atomic_write(block, mi_block_info_create(needed_bcount, true)); + } + // return a pointer to the claimed memory + ptrdiff_t idx = (block - arena->blocks); + *is_zero = false; + *block_index = idx; + return (arena->start + (idx*MI_ARENA_BLOCK_SIZE)); + } + } + } + } + // no success + return NULL; +} + +// Try to reduce search time by starting from bottom and wrap around. +static void* mi_arena_alloc(mi_arena_t* arena, size_t needed_bcount, bool* is_zero, size_t* block_index) +{ + uintptr_t bottom = mi_atomic_read_relaxed(&arena->block_bottom); + void* p = mi_arena_allocx(arena, bottom, arena->block_count, needed_bcount, is_zero, block_index); + if (p == NULL && bottom > 0) { + // try again from the start + p = mi_arena_allocx(arena, 0, bottom, needed_bcount, is_zero, block_index); + } + if (p != NULL) { + mi_atomic_write(&arena->block_bottom, *block_index); + } + return p; +} + +/* ----------------------------------------------------------- + Arena Allocation +----------------------------------------------------------- */ + +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { + mi_assert_internal(memid != NULL && tld != NULL); + mi_assert_internal(size > 0); + *memid = MI_MEMID_OS; + *is_zero = false; + bool default_large = false; + if (large==NULL) large = &default_large; // ensure `large != NULL` + + // try to allocate in an arena if the alignment is small enough + // and if there is not too much waste around the `MI_ARENA_BLOCK_SIZE`. + if (alignment <= MI_SEGMENT_ALIGN && + size >= 3*(MI_ARENA_BLOCK_SIZE/4) && // > 12MiB (not more than 25% waste) + !(size > MI_ARENA_BLOCK_SIZE && size < 3*(MI_ARENA_BLOCK_SIZE/2)) // ! <16MiB - 24MiB> + ) + { + size_t asize = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); + size_t bcount = asize / MI_ARENA_BLOCK_SIZE; + + mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE); + for (size_t i = 0; i < MI_MAX_ARENAS; i++) { + mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[i])); + if (arena==NULL) break; + if (*large || !arena->is_large) { // large OS pages allowed, or arena is not large OS pages + size_t block_index = SIZE_MAX; + void* p = mi_arena_alloc(arena, bcount, is_zero, &block_index); + if (p != NULL) { + mi_assert_internal(block_index != SIZE_MAX); + #if MI_DEBUG>=1 + _Atomic(mi_block_info_t)* block = &arena->blocks[block_index]; + mi_block_info_t binfo = mi_atomic_read(block); + mi_assert_internal(mi_block_is_in_use(binfo)); + mi_assert_internal(mi_block_count(binfo)*MI_ARENA_BLOCK_SIZE >= size); + #endif + *memid = mi_memid_create(i, block_index); + *commit = true; // TODO: support commit on demand? + *large = arena->is_large; + mi_assert_internal((uintptr_t)p % alignment == 0); + return p; + } + } + } + } + + // fall back to the OS + *is_zero = true; + *memid = MI_MEMID_OS; + return _mi_os_alloc_aligned(size, alignment, *commit, large, tld); +} + +void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) +{ + return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, commit, large, is_zero, memid, tld); +} + +/* ----------------------------------------------------------- + Arena free +----------------------------------------------------------- */ + +void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) { + mi_assert_internal(size > 0 && stats != NULL); + if (p==NULL) return; + if (size==0) return; + if (memid == MI_MEMID_OS) { + // was a direct OS allocation, pass through + _mi_os_free(p, size, stats); + } + else { + // allocated in an arena + size_t arena_idx; + size_t block_idx; + mi_memid_indices(memid, &arena_idx, &block_idx); + mi_assert_internal(arena_idx < MI_MAX_ARENAS); + mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[arena_idx])); + mi_assert_internal(arena != NULL); + if (arena == NULL) { + _mi_fatal_error("trying to free from non-existent arena: %p, size %zu, memid: 0x%zx\n", p, size, memid); + return; + } + mi_assert_internal(arena->block_count > block_idx); + if (arena->block_count <= block_idx) { + _mi_fatal_error("trying to free from non-existent block: %p, size %zu, memid: 0x%zx\n", p, size, memid); + return; + } + _Atomic(mi_block_info_t)* block = &arena->blocks[block_idx]; + mi_block_info_t binfo = mi_atomic_read_relaxed(block); + mi_assert_internal(mi_block_is_in_use(binfo)); + mi_assert_internal(mi_block_count(binfo)*MI_ARENA_BLOCK_SIZE >= size); + if (!mi_block_is_in_use(binfo)) { + _mi_fatal_error("trying to free an already freed block: %p, size %zu\n", p, size); + return; + }; + bool ok = mi_atomic_cas_strong(block, mi_block_info_create(mi_block_count(binfo), false), binfo); + mi_assert_internal(ok); + if (!ok) { + _mi_warning_message("unable to free arena block: %p, info 0x%zx", p, binfo); + } + if (block_idx < mi_atomic_read_relaxed(&arena->block_bottom)) { + mi_atomic_write(&arena->block_bottom, block_idx); + } + } +} + +/* ----------------------------------------------------------- + Add an arena. +----------------------------------------------------------- */ + +static bool mi_arena_add(mi_arena_t* arena) { + mi_assert_internal(arena != NULL); + mi_assert_internal((uintptr_t)arena->start % MI_SEGMENT_ALIGN == 0); + mi_assert_internal(arena->block_count > 0); + mi_assert_internal(mi_mem_is_zero(arena->blocks,arena->block_count*sizeof(mi_block_info_t))); + + uintptr_t i = mi_atomic_addu(&mi_arena_count,1); + if (i >= MI_MAX_ARENAS) { + mi_atomic_subu(&mi_arena_count, 1); + return false; + } + mi_atomic_write_ptr(mi_atomic_cast(void*,&mi_arenas[i]), arena); + return true; +} + + +/* ----------------------------------------------------------- + Reserve a huge page arena. + TODO: improve OS api to just reserve and claim a huge + page area at once, (and return the total size). +----------------------------------------------------------- */ + +#include + +int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept { + size_t pages_reserved_default = 0; + if (pages_reserved==NULL) pages_reserved = &pages_reserved_default; + int err = _mi_os_reserve_huge_os_pages(pages, max_secs, pages_reserved); + if (*pages_reserved==0) return err; + size_t hsize = (*pages_reserved) * GiB; + void* p = _mi_os_try_alloc_from_huge_reserved(hsize, MI_SEGMENT_ALIGN); + mi_assert_internal(p != NULL); + if (p == NULL) return ENOMEM; + size_t bcount = hsize / MI_ARENA_BLOCK_SIZE; + size_t asize = sizeof(mi_arena_t) + (bcount*sizeof(mi_block_info_t)); // one too much + mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_heap_default->tld->stats); + if (arena == NULL) return ENOMEM; + arena->block_count = bcount; + arena->start = (uint8_t*)p; + arena->block_bottom = 0; + arena->is_large = true; + arena->is_zero_init = true; + memset(arena->blocks, 0, bcount * sizeof(mi_block_info_t)); + //mi_atomic_write(&arena->blocks[0], mi_block_info_create(bcount, false)); + mi_arena_add(arena); + return 0; +} diff --git a/src/memory.c b/src/memory.c index dd03cf95..9ab7c850 100644 --- a/src/memory.c +++ b/src/memory.c @@ -50,6 +50,12 @@ void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* sta void* _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment); bool _mi_os_is_huge_reserved(void* p); +// arena.c +void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats); +void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); + + // Constants #if (MI_INTPTR_SIZE==8) #define MI_HEAP_REGION_MAX_SIZE (256 * (1ULL << 30)) // 256GiB => 16KiB for the region map @@ -87,6 +93,7 @@ typedef struct mem_region_s { volatile _Atomic(uintptr_t) map; // in-use bit per MI_SEGMENT_SIZE block volatile _Atomic(mi_region_info_t) info; // start of virtual memory area, and flags volatile _Atomic(uintptr_t) dirty_mask; // bit per block if the contents are not zero'd + size_t arena_memid; } mem_region_t; @@ -131,6 +138,30 @@ bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { } +static size_t mi_memid_create(size_t idx, size_t bitidx) { + return ((idx*MI_REGION_MAP_BITS) + bitidx)<<1; +} + +static size_t mi_memid_create_from_arena(size_t arena_memid) { + return (arena_memid << 1) | 1; +} + +static bool mi_memid_is_arena(size_t id) { + return ((id&1)==1); +} + +static bool mi_memid_indices(size_t id, size_t* idx, size_t* bitidx, size_t* arena_memid) { + if (mi_memid_is_arena(id)) { + *arena_memid = (id>>1); + return true; + } + else { + *idx = ((id>>1) / MI_REGION_MAP_BITS); + *bitidx = ((id>>1) % MI_REGION_MAP_BITS); + return false; + } +} + /* ---------------------------------------------------------------------------- Commit from a region -----------------------------------------------------------------------------*/ @@ -153,6 +184,9 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit { bool region_commit = mi_option_is_enabled(mi_option_eager_region_commit); bool region_large = *allow_large; + size_t arena_memid = 0; + void* start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, ®ion_commit, ®ion_large, is_zero, &arena_memid, tld); + /* void* start = NULL; if (region_large) { start = _mi_os_try_alloc_from_huge_reserved(MI_REGION_SIZE, MI_SEGMENT_ALIGN); @@ -161,6 +195,7 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit if (start == NULL) { start = _mi_os_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, region_commit, ®ion_large, tld); } + */ mi_assert_internal(!(region_large && !*allow_large)); if (start == NULL) { @@ -176,6 +211,7 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit info = mi_region_info_create(start,region_large,region_commit); if (mi_atomic_cas_strong(®ion->info, info, 0)) { // update the region count + region->arena_memid = arena_memid; mi_atomic_increment(®ions_count); } else { @@ -183,6 +219,7 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit // we assign it to a later slot instead (up to 4 tries). for(size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) { if (mi_atomic_cas_strong(®ions[idx+i].info, info, 0)) { + regions[idx+i].arena_memid = arena_memid; mi_atomic_increment(®ions_count); start = NULL; break; @@ -190,7 +227,8 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit } if (start != NULL) { // free it if we didn't succeed to save it to some other region - _mi_os_free_ex(start, MI_REGION_SIZE, region_commit, tld->stats); + _mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats); + // _mi_os_free_ex(start, MI_REGION_SIZE, region_commit, tld->stats); } // and continue with the memory at our index info = mi_atomic_read(®ion->info); @@ -229,7 +267,7 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit mi_assert_internal(blocks_start != NULL); *allow_large = region_is_large; *p = blocks_start; - *id = (idx*MI_REGION_MAP_BITS) + bitidx; + *id = mi_memid_create(idx, bitidx); return true; } @@ -269,7 +307,7 @@ static inline size_t mi_bsr(uintptr_t x) { // Allocate `blocks` in a `region` at `idx` of a given `size`. // Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written -// if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call. +// if the blocks were successfully claimed so ensure they are initialized to NULL/0 before the call. // (not being able to claim is not considered an error so check for `p != NULL` afterwards). static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t blocks, size_t size, bool* commit, bool* allow_large, bool* is_zero, void** p, size_t* id, mi_os_tld_t* tld) @@ -366,15 +404,17 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l { mi_assert_internal(id != NULL && tld != NULL); mi_assert_internal(size > 0); - *id = SIZE_MAX; + *id = 0; *is_zero = false; bool default_large = false; if (large==NULL) large = &default_large; // ensure `large != NULL` - // use direct OS allocation for huge blocks or alignment (with `id = SIZE_MAX`) + // use direct OS allocation for huge blocks or alignment if (size > MI_REGION_MAX_ALLOC_SIZE || alignment > MI_SEGMENT_ALIGN) { - *is_zero = true; - return _mi_os_alloc_aligned(mi_good_commit_size(size), alignment, *commit, large, tld); // round up size + size_t arena_memid = 0; + void* p = _mi_arena_alloc_aligned(mi_good_commit_size(size), alignment, commit, large, is_zero, &arena_memid, tld); // round up size + *id = mi_memid_create_from_arena(arena_memid); + return p; } // always round size to OS page size multiple (so commit/decommit go over the entire range) @@ -405,9 +445,10 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l if (p == NULL) { // we could not find a place to allocate, fall back to the os directly - _mi_warning_message("unable to allocate from region: size %zu\n", size); - *is_zero = true; - p = _mi_os_alloc_aligned(size, alignment, commit, large, tld); + _mi_warning_message("unable to allocate from region: size %zu\n", size); + size_t arena_memid = 0; + p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_zero, &arena_memid, tld); + *id = mi_memid_create_from_arena(arena_memid); } else { tld->region_idx = idx; // next start of search? currently not used as we use first-fit @@ -428,18 +469,19 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { mi_assert_internal(size > 0 && stats != NULL); if (p==NULL) return; if (size==0) return; - if (id == SIZE_MAX) { - // was a direct OS allocation, pass through - _mi_os_free(p, size, stats); + size_t arena_memid = 0; + size_t idx = 0; + size_t bitidx = 0; + if (mi_memid_indices(id,&idx,&bitidx,&arena_memid)) { + // was a direct arena allocation, pass through + _mi_arena_free(p, size, arena_memid, stats); } else { // allocated in a region mi_assert_internal(size <= MI_REGION_MAX_ALLOC_SIZE); if (size > MI_REGION_MAX_ALLOC_SIZE) return; // we can align the size up to page size (as we allocate that way too) // this ensures we fully commit/decommit/reset - size = _mi_align_up(size, _mi_os_page_size()); - size_t idx = (id / MI_REGION_MAP_BITS); - size_t bitidx = (id % MI_REGION_MAP_BITS); + size = _mi_align_up(size, _mi_os_page_size()); size_t blocks = mi_region_block_count(size); size_t mask = mi_region_block_mask(blocks, bitidx); mi_assert_internal(idx < MI_REGION_MAX); if (idx >= MI_REGION_MAX) return; // or `abort`? @@ -503,11 +545,11 @@ void _mi_mem_collect(mi_stats_t* stats) { m = mi_atomic_read_relaxed(®ion->map); } while(m == 0 && !mi_atomic_cas_weak(®ion->map, ~((uintptr_t)0), 0 )); if (m == 0) { - // on success, free the whole region (unless it was huge reserved) + // on success, free the whole region bool is_eager_committed; void* start = mi_region_info_read(mi_atomic_read(®ion->info), NULL, &is_eager_committed); - if (start != NULL && !_mi_os_is_huge_reserved(start)) { - _mi_os_free_ex(start, MI_REGION_SIZE, is_eager_committed, stats); + if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { + _mi_arena_free(start, MI_REGION_SIZE, region->arena_memid, stats); } // and release mi_atomic_write(®ion->info,0); diff --git a/src/os.c b/src/os.c index 8f5afc5b..85cd1a83 100644 --- a/src/os.c +++ b/src/os.c @@ -869,13 +869,13 @@ static void mi_os_free_huge_reserved() { */ #if !(MI_INTPTR_SIZE >= 8 && (defined(_WIN32) || defined(MI_OS_USE_MMAP))) -int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept { +int _mi_os_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept { UNUSED(pages); UNUSED(max_secs); if (pages_reserved != NULL) *pages_reserved = 0; return ENOMEM; } #else -int mi_reserve_huge_os_pages( size_t pages, double max_secs, size_t* pages_reserved ) mi_attr_noexcept +int _mi_os_reserve_huge_os_pages( size_t pages, double max_secs, size_t* pages_reserved ) mi_attr_noexcept { if (pages_reserved != NULL) *pages_reserved = 0; if (max_secs==0) return ETIMEDOUT; // timeout From aaf01620f4e878d48a4d2815bd0d894f28a5f093 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 31 Oct 2019 19:39:49 -0700 Subject: [PATCH 002/179] improve allocation of the huge OS page arena --- src/arena.c | 39 ++++++++++++------- src/os.c | 110 +++++++++++++++++----------------------------------- 2 files changed, 60 insertions(+), 89 deletions(-) diff --git a/src/arena.c b/src/arena.c index 5f33965a..469755f2 100644 --- a/src/arena.c +++ b/src/arena.c @@ -6,7 +6,16 @@ terms of the MIT license. A copy of the license can be found in the file -----------------------------------------------------------------------------*/ /* ---------------------------------------------------------------------------- +"Arenas" are fixed area's of OS memory from which we can allocate +large blocks (>= MI_ARENA_BLOCK_SIZE, 16MiB). Currently only used to +allocate in one arena consisting of huge OS pages -- otherwise it +delegates to direct allocation from the OS. +In the future, we can expose an API to manually add more arenas which +is sometimes needed for embedded devices or shared memory for example. + +The arena allocation needs to be thread safe and we use a lock-free scan +with on-demand coalescing. -----------------------------------------------------------------------------*/ #include "mimalloc.h" #include "mimalloc-internal.h" @@ -16,8 +25,8 @@ terms of the MIT license. A copy of the license can be found in the file // os.c void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld); -void* _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment); -int _mi_os_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept; +int _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** pstart, size_t* pages_reserved, size_t* psize) mi_attr_noexcept; +void _mi_os_free(void* p, size_t size, mi_stats_t* stats); /* ----------------------------------------------------------- Arena allocation @@ -338,25 +347,27 @@ static bool mi_arena_add(mi_arena_t* arena) { /* ----------------------------------------------------------- Reserve a huge page arena. - TODO: improve OS api to just reserve and claim a huge - page area at once, (and return the total size). ----------------------------------------------------------- */ - -#include +#include // ENOMEM int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept { size_t pages_reserved_default = 0; if (pages_reserved==NULL) pages_reserved = &pages_reserved_default; - int err = _mi_os_reserve_huge_os_pages(pages, max_secs, pages_reserved); - if (*pages_reserved==0) return err; - size_t hsize = (*pages_reserved) * GiB; - void* p = _mi_os_try_alloc_from_huge_reserved(hsize, MI_SEGMENT_ALIGN); - mi_assert_internal(p != NULL); - if (p == NULL) return ENOMEM; + size_t hsize = 0; + void* p = NULL; + int err = _mi_os_alloc_huge_os_pages(pages, max_secs, &p, pages_reserved, &hsize); + _mi_verbose_message("reserved %zu huge pages\n", *pages_reserved); + if (p==NULL) return err; + // err might be != 0 but that is fine, we just got less pages. + mi_assert_internal(*pages_reserved > 0 && hsize > 0 && *pages_reserved <= pages); size_t bcount = hsize / MI_ARENA_BLOCK_SIZE; size_t asize = sizeof(mi_arena_t) + (bcount*sizeof(mi_block_info_t)); // one too much - mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_heap_default->tld->stats); - if (arena == NULL) return ENOMEM; + mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); + if (arena == NULL) { + *pages_reserved = 0; + _mi_os_free(p, hsize, &_mi_stats_main); + return ENOMEM; + } arena->block_count = bcount; arena->start = (uint8_t*)p; arena->block_bottom = 0; diff --git a/src/os.c b/src/os.c index 85cd1a83..b7bffa64 100644 --- a/src/os.c +++ b/src/os.c @@ -36,8 +36,6 @@ terms of the MIT license. A copy of the license can be found in the file large OS pages (if MIMALLOC_LARGE_OS_PAGES is true). ----------------------------------------------------------- */ bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); -bool _mi_os_is_huge_reserved(void* p); -void* _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment); static void* mi_align_up_ptr(void* p, size_t alignment) { return (void*)_mi_align_up((uintptr_t)p, alignment); @@ -184,7 +182,7 @@ void _mi_os_init() { static bool mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats_t* stats) { - if (addr == NULL || size == 0 || _mi_os_is_huge_reserved(addr)) return true; + if (addr == NULL || size == 0) return true; // || _mi_os_is_huge_reserved(addr) bool err = false; #if defined(_WIN32) err = (VirtualFree(addr, 0, MEM_RELEASE) == 0); @@ -628,7 +626,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ *is_zero = false; size_t csize; void* start = mi_os_page_align_areax(conservative, addr, size, &csize); - if (csize == 0 || _mi_os_is_huge_reserved(addr)) return true; + if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr)) int err = 0; if (commit) { _mi_stat_increase(&stats->committed, csize); @@ -684,7 +682,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) // page align conservatively within the range size_t csize; void* start = mi_os_page_align_area_conservative(addr, size, &csize); - if (csize == 0 || _mi_os_is_huge_reserved(addr)) return true; + if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr) if (reset) _mi_stat_increase(&stats->reset, csize); else _mi_stat_decrease(&stats->reset, csize); if (!reset) return true; // nothing to do on unreset! @@ -758,9 +756,11 @@ static bool mi_os_protectx(void* addr, size_t size, bool protect) { size_t csize = 0; void* start = mi_os_page_align_area_conservative(addr, size, &csize); if (csize == 0) return false; + /* if (_mi_os_is_huge_reserved(addr)) { _mi_warning_message("cannot mprotect memory allocated in huge OS pages\n"); } + */ int err = 0; #ifdef _WIN32 DWORD oldprotect = 0; @@ -816,79 +816,42 @@ will be reused. -----------------------------------------------------------------------------*/ #define MI_HUGE_OS_PAGE_SIZE ((size_t)1 << 30) // 1GiB -typedef struct mi_huge_info_s { - volatile _Atomic(void*) start; // start of huge page area (32TiB) - volatile _Atomic(size_t) reserved; // total reserved size - volatile _Atomic(size_t) used; // currently allocated -} mi_huge_info_t; - -static mi_huge_info_t os_huge_reserved = { NULL, 0, ATOMIC_VAR_INIT(0) }; - -bool _mi_os_is_huge_reserved(void* p) { - return (mi_atomic_read_ptr(&os_huge_reserved.start) != NULL && - p >= mi_atomic_read_ptr(&os_huge_reserved.start) && - (uint8_t*)p < (uint8_t*)mi_atomic_read_ptr(&os_huge_reserved.start) + mi_atomic_read(&os_huge_reserved.reserved)); -} - -void* _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment) -{ - // only allow large aligned allocations (e.g. regions) - if (size < MI_SEGMENT_SIZE || (size % MI_SEGMENT_SIZE) != 0) return NULL; - if (try_alignment > MI_SEGMENT_SIZE) return NULL; - if (mi_atomic_read_ptr(&os_huge_reserved.start)==NULL) return NULL; - if (mi_atomic_read(&os_huge_reserved.used) >= mi_atomic_read(&os_huge_reserved.reserved)) return NULL; // already full - - // always aligned - mi_assert_internal(mi_atomic_read(&os_huge_reserved.used) % MI_SEGMENT_SIZE == 0 ); - mi_assert_internal( (uintptr_t)mi_atomic_read_ptr(&os_huge_reserved.start) % MI_SEGMENT_SIZE == 0 ); - - // try to reserve space - size_t base = mi_atomic_addu( &os_huge_reserved.used, size ); - if ((base + size) > os_huge_reserved.reserved) { - // "free" our over-allocation - mi_atomic_subu( &os_huge_reserved.used, size); - return NULL; - } - - // success! - uint8_t* p = (uint8_t*)mi_atomic_read_ptr(&os_huge_reserved.start) + base; - mi_assert_internal( (uintptr_t)p % MI_SEGMENT_SIZE == 0 ); - return p; -} - -/* -static void mi_os_free_huge_reserved() { - uint8_t* addr = os_huge_reserved.start; - size_t total = os_huge_reserved.reserved; - os_huge_reserved.reserved = 0; - os_huge_reserved.start = NULL; - for( size_t current = 0; current < total; current += MI_HUGE_OS_PAGE_SIZE) { - _mi_os_free(addr + current, MI_HUGE_OS_PAGE_SIZE, &_mi_stats_main); - } -} -*/ #if !(MI_INTPTR_SIZE >= 8 && (defined(_WIN32) || defined(MI_OS_USE_MMAP))) -int _mi_os_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept { +int _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** start, size_t* pages_reserved, size_t* size) mi_attr_noexcept { UNUSED(pages); UNUSED(max_secs); + if (start != NULL) *start = NULL; if (pages_reserved != NULL) *pages_reserved = 0; + if (size != NULL) *size = 0; return ENOMEM; } #else -int _mi_os_reserve_huge_os_pages( size_t pages, double max_secs, size_t* pages_reserved ) mi_attr_noexcept +static _Atomic(uintptr_t) huge_top; // = 0 + +int _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** pstart, size_t* pages_reserved, size_t* psize) mi_attr_noexcept { - if (pages_reserved != NULL) *pages_reserved = 0; + *pstart = NULL; + *pages_reserved = 0; + *psize = 0; if (max_secs==0) return ETIMEDOUT; // timeout if (pages==0) return 0; // ok - if (!mi_atomic_cas_ptr_strong(&os_huge_reserved.start,(void*)1,NULL)) return ETIMEDOUT; // already reserved - // Set the start address after the 32TiB area - uint8_t* start = (uint8_t*)((uintptr_t)32 << 40); // 32TiB virtual start address - #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode - uintptr_t r = _mi_random_init((uintptr_t)&mi_reserve_huge_os_pages); - start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF)); // (randomly 0-1024)*1GiB == 0 to 1TiB - #endif + // Atomically claim a huge address range + size_t size = pages * MI_HUGE_OS_PAGE_SIZE; + uint8_t* start; + do { + start = (uint8_t*)mi_atomic_addu(&huge_top, size); + if (start == NULL) { + uintptr_t top = ((uintptr_t)32 << 40); // 32TiB virtual start address + #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode + uintptr_t r = _mi_random_init((uintptr_t)&_mi_os_alloc_huge_os_pages); + top += ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF)); // (randomly 0-1024)*1GiB == 0 to 1TiB + #endif + mi_atomic_cas_strong(&huge_top, top, 0); + } + } while (start == NULL); + // Allocate one page at the time but try to place them contiguously // We allocate one page at the time to be able to abort if it takes too long double start_t = _mi_clock_start(); @@ -925,16 +888,13 @@ int _mi_os_reserve_huge_os_pages( size_t pages, double max_secs, size_t* pages_r } // success, record it if (page==0) { - mi_atomic_write_ptr(&os_huge_reserved.start, addr); // don't switch the order of these writes - mi_atomic_write(&os_huge_reserved.reserved, MI_HUGE_OS_PAGE_SIZE); + *pstart = addr; } - else { - mi_atomic_addu(&os_huge_reserved.reserved,MI_HUGE_OS_PAGE_SIZE); - } - _mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE); + *psize += MI_HUGE_OS_PAGE_SIZE; + *pages_reserved += 1; + _mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE); _mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE); - if (pages_reserved != NULL) { *pages_reserved = page + 1; } - + // check for timeout double elapsed = _mi_clock_end(start_t); if (elapsed > max_secs) return ETIMEDOUT; @@ -943,7 +903,7 @@ int _mi_os_reserve_huge_os_pages( size_t pages, double max_secs, size_t* pages_r if (estimate > 1.5*max_secs) return ETIMEDOUT; // seems like we are going to timeout } } - _mi_verbose_message("reserved %zu huge pages\n", pages); + mi_assert_internal(*psize == size); return 0; } #endif From a6499be074a52232ed131eeabb3bd8040f2743c3 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 1 Nov 2019 19:53:07 -0700 Subject: [PATCH 003/179] initial numa support for arenas --- include/mimalloc-internal.h | 1 + include/mimalloc.h | 8 +- src/arena.c | 128 +++++++++++++----- src/init.c | 2 +- src/options.c | 3 +- src/os.c | 252 +++++++++++++++++++----------------- 6 files changed, 241 insertions(+), 153 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 2b881ac9..dd677a02 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -56,6 +56,7 @@ void _mi_os_init(void); // called fro void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocate thread local data void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data size_t _mi_os_good_alloc_size(size_t size); +int _mi_os_numa_node(void); // memory.c diff --git a/include/mimalloc.h b/include/mimalloc.h index b63ed79d..b155aca6 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -228,9 +228,14 @@ mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_b // Experimental mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept; -mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept; mi_decl_export bool mi_is_redirected() mi_attr_noexcept; +mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept; +mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept; + +// deprecated +mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept; + // ------------------------------------------------------ // Convenience // ------------------------------------------------------ @@ -271,6 +276,7 @@ typedef enum mi_option_e { mi_option_eager_commit_delay, mi_option_segment_reset, mi_option_os_tag, + mi_option_max_numa_node, _mi_option_last } mi_option_t; diff --git a/src/arena.c b/src/arena.c index 469755f2..5bc3900c 100644 --- a/src/arena.c +++ b/src/arena.c @@ -25,8 +25,10 @@ with on-demand coalescing. // os.c void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld); -int _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** pstart, size_t* pages_reserved, size_t* psize) mi_attr_noexcept; +//int _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** pstart, size_t* pages_reserved, size_t* psize) mi_attr_noexcept; void _mi_os_free(void* p, size_t size, mi_stats_t* stats); +void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, size_t* psize); +int _mi_os_numa_node_count(void); /* ----------------------------------------------------------- Arena allocation @@ -44,6 +46,7 @@ typedef uintptr_t mi_block_info_t; typedef struct mi_arena_s { uint8_t* start; // the start of the memory area size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) + int numa_node; // associated NUMA node bool is_zero_init; // is the arena zero initialized? bool is_large; // large OS page allocated _Atomic(uintptr_t) block_bottom; // optimization to start the search for free blocks @@ -223,7 +226,31 @@ static void* mi_arena_alloc(mi_arena_t* arena, size_t needed_bcount, bool* is_ze Arena Allocation ----------------------------------------------------------- */ -void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { +static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, + bool* commit, bool* large, bool* is_zero, + size_t* memid) +{ + size_t block_index = SIZE_MAX; + void* p = mi_arena_alloc(arena, needed_bcount, is_zero, &block_index); + if (p != NULL) { + mi_assert_internal(block_index != SIZE_MAX); +#if MI_DEBUG>=1 + _Atomic(mi_block_info_t)* block = &arena->blocks[block_index]; + mi_block_info_t binfo = mi_atomic_read(block); + mi_assert_internal(mi_block_is_in_use(binfo)); + mi_assert_internal(mi_block_count(binfo) >= needed_bcount); +#endif + *memid = mi_memid_create(arena_index, block_index); + *commit = true; // TODO: support commit on demand? + *large = arena->is_large; + } + return p; +} + +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, + bool* commit, bool* large, bool* is_zero, + size_t* memid, mi_os_tld_t* tld) +{ mi_assert_internal(memid != NULL && tld != NULL); mi_assert_internal(size > 0); *memid = MI_MEMID_OS; @@ -240,33 +267,36 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* { size_t asize = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); size_t bcount = asize / MI_ARENA_BLOCK_SIZE; + int numa_node = _mi_os_numa_node(); // current numa node mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE); + // try numa affine allocation for (size_t i = 0; i < MI_MAX_ARENAS; i++) { mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[i])); - if (arena==NULL) break; - if (*large || !arena->is_large) { // large OS pages allowed, or arena is not large OS pages - size_t block_index = SIZE_MAX; - void* p = mi_arena_alloc(arena, bcount, is_zero, &block_index); - if (p != NULL) { - mi_assert_internal(block_index != SIZE_MAX); - #if MI_DEBUG>=1 - _Atomic(mi_block_info_t)* block = &arena->blocks[block_index]; - mi_block_info_t binfo = mi_atomic_read(block); - mi_assert_internal(mi_block_is_in_use(binfo)); - mi_assert_internal(mi_block_count(binfo)*MI_ARENA_BLOCK_SIZE >= size); - #endif - *memid = mi_memid_create(i, block_index); - *commit = true; // TODO: support commit on demand? - *large = arena->is_large; - mi_assert_internal((uintptr_t)p % alignment == 0); - return p; - } + if (arena==NULL) break; // end reached + if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local? + (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages + { + void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid); + mi_assert_internal((uintptr_t)p % alignment == 0); + if (p != NULL) return p; + } + } + // try from another numa node instead.. + for (size_t i = 0; i < MI_MAX_ARENAS; i++) { + mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[i])); + if (arena==NULL) break; // end reached + if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local! + (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages + { + void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid); + mi_assert_internal((uintptr_t)p % alignment == 0); + if (p != NULL) return p; } } } - // fall back to the OS + // finally, fall back to the OS *is_zero = true; *memid = MI_MEMID_OS; return _mi_os_alloc_aligned(size, alignment, *commit, large, tld); @@ -350,31 +380,61 @@ static bool mi_arena_add(mi_arena_t* arena) { ----------------------------------------------------------- */ #include // ENOMEM -int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept { - size_t pages_reserved_default = 0; - if (pages_reserved==NULL) pages_reserved = &pages_reserved_default; +// reserve at a specific numa node +static int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept { size_t hsize = 0; - void* p = NULL; - int err = _mi_os_alloc_huge_os_pages(pages, max_secs, &p, pages_reserved, &hsize); - _mi_verbose_message("reserved %zu huge pages\n", *pages_reserved); - if (p==NULL) return err; - // err might be != 0 but that is fine, we just got less pages. - mi_assert_internal(*pages_reserved > 0 && hsize > 0 && *pages_reserved <= pages); + void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, &hsize); + if (p==NULL) return ENOMEM; + _mi_verbose_message("reserved %zu huge (1GiB) pages\n", pages); + size_t bcount = hsize / MI_ARENA_BLOCK_SIZE; - size_t asize = sizeof(mi_arena_t) + (bcount*sizeof(mi_block_info_t)); // one too much - mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); + size_t asize = sizeof(mi_arena_t) + (bcount*sizeof(mi_block_info_t)); // one too much + mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS? if (arena == NULL) { - *pages_reserved = 0; _mi_os_free(p, hsize, &_mi_stats_main); return ENOMEM; } arena->block_count = bcount; arena->start = (uint8_t*)p; arena->block_bottom = 0; + arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1) arena->is_large = true; arena->is_zero_init = true; memset(arena->blocks, 0, bcount * sizeof(mi_block_info_t)); - //mi_atomic_write(&arena->blocks[0], mi_block_info_create(bcount, false)); mi_arena_add(arena); return 0; } + + +// reserve huge pages evenly among all numa nodes. +int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept { + if (pages == 0) return 0; + + // pages per numa node + int numa_count = _mi_os_numa_node_count(); + if (numa_count <= 0) numa_count = 1; + size_t pages_per = pages / numa_count; + if (pages_per == 0) pages_per = 1; + + // reserve evenly among numa nodes + for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { + int err = mi_reserve_huge_os_pages_at((pages_per > pages ? pages : pages_per), numa_node); + if (err) return err; + if (pages < pages_per) { + pages = 0; + } + else { + pages -= pages_per; + } + } + + return 0; +} + +int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept { + _mi_verbose_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n"); + if (pages_reserved != NULL) *pages_reserved = 0; + int err = mi_reserve_huge_os_pages_interleave(pages); + if (err==0 && pages_reserved!=NULL) *pages_reserved = pages; + return err; +} diff --git a/src/init.c b/src/init.c index e15d82eb..138b54aa 100644 --- a/src/init.c +++ b/src/init.c @@ -435,7 +435,7 @@ static void mi_process_load(void) { if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { size_t pages = mi_option_get(mi_option_reserve_huge_os_pages); double max_secs = (double)pages / 2.0; // 0.5s per page (1GiB) - mi_reserve_huge_os_pages(pages, max_secs, NULL); + mi_reserve_huge_os_pages_interleave(pages); } } diff --git a/src/options.c b/src/options.c index a49c46ed..32f13d54 100644 --- a/src/options.c +++ b/src/options.c @@ -66,7 +66,8 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(reset_decommits) }, // note: cannot enable this if secure is on { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) - { 100, UNINIT, MI_OPTION(os_tag) } // only apple specific for now but might serve more or less related purpose + { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose + { 256, UNINIT, MI_OPTION(max_numa_node) } // maximum allowed numa node }; static void mi_option_init(mi_option_desc_t* desc); diff --git a/src/os.c b/src/os.c index b7bffa64..c0564174 100644 --- a/src/os.c +++ b/src/os.c @@ -170,7 +170,7 @@ void _mi_os_init() { os_alloc_granularity = os_page_size; } if (mi_option_is_enabled(mi_option_large_os_pages)) { - large_os_page_size = (1UL << 21); // 2MiB + large_os_page_size = 2*MiB; } } #endif @@ -207,31 +207,6 @@ static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size); #ifdef _WIN32 static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment, DWORD flags) { -#if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) - // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages - if ((size % ((uintptr_t)1 << 30)) == 0 /* 1GiB multiple */ - && (flags & MEM_LARGE_PAGES) != 0 && (flags & MEM_COMMIT) != 0 && (flags & MEM_RESERVE) != 0 - && (addr != NULL || try_alignment == 0 || try_alignment % _mi_os_page_size() == 0) - && pNtAllocateVirtualMemoryEx != NULL) - { - #ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE - #define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE (0x10) - #endif - MEM_EXTENDED_PARAMETER param = { 0, 0 }; - param.Type = 5; // == MemExtendedParameterAttributeFlags; - param.ULong64 = MEM_EXTENDED_PARAMETER_NONPAGED_HUGE; - SIZE_T psize = size; - void* base = addr; - NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, ¶m, 1); - if (err == 0) { - return base; - } - else { - // else fall back to regular large OS pages - _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error 0x%lx)\n", err); - } - } -#endif #if (MI_INTPTR_SIZE >= 8) // on 64-bit systems, try to use the virtual address area after 4TiB for 4MiB aligned allocations void* hint; @@ -364,7 +339,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro lflags |= MAP_HUGETLB; #endif #ifdef MAP_HUGE_1GB - if ((size % ((uintptr_t)1 << 30)) == 0) { + if ((size % GiB) == 0) { lflags |= MAP_HUGE_1GB; } else @@ -400,10 +375,10 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, flags, fd); #if defined(MADV_HUGEPAGE) // Many Linux systems don't allow MAP_HUGETLB but they support instead - // transparent huge pages (TPH). It is not required to call `madvise` with MADV_HUGE + // transparent huge pages (THP). It is not required to call `madvise` with MADV_HUGE // though since properly aligned allocations will already use large pages if available // in that case -- in particular for our large regions (in `memory.c`). - // However, some systems only allow TPH if called with explicit `madvise`, so + // However, some systems only allow THP if called with explicit `madvise`, so // when large OS pages are enabled for mimalloc, we call `madvice` anyways. if (allow_large && use_large_os_page(size, try_alignment)) { if (madvise(p, size, MADV_HUGEPAGE) == 0) { @@ -810,101 +785,146 @@ bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize, mi_stats_t* stats) { /* ---------------------------------------------------------------------------- -Support for huge OS pages (1Gib) that are reserved up-front and never -released. Only regions are allocated in here (see `memory.c`) so the memory -will be reused. +Support for allocating huge OS pages (1Gib) that are reserved up-front +and possibly associated with a specific NUMA node. (use `numa_node>=0`) -----------------------------------------------------------------------------*/ -#define MI_HUGE_OS_PAGE_SIZE ((size_t)1 << 30) // 1GiB +#define MI_HUGE_OS_PAGE_SIZE (GiB) +#if defined(WIN32) && (MI_INTPTR_SIZE >= 8) +static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) +{ + mi_assert_internal(size%GiB == 0); -#if !(MI_INTPTR_SIZE >= 8 && (defined(_WIN32) || defined(MI_OS_USE_MMAP))) -int _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** start, size_t* pages_reserved, size_t* size) mi_attr_noexcept { - UNUSED(pages); UNUSED(max_secs); - if (start != NULL) *start = NULL; - if (pages_reserved != NULL) *pages_reserved = 0; - if (size != NULL) *size = 0; - return ENOMEM; -} -#else -static _Atomic(uintptr_t) huge_top; // = 0 - -int _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** pstart, size_t* pages_reserved, size_t* psize) mi_attr_noexcept -{ - *pstart = NULL; - *pages_reserved = 0; - *psize = 0; - if (max_secs==0) return ETIMEDOUT; // timeout - if (pages==0) return 0; // ok - - // Atomically claim a huge address range - size_t size = pages * MI_HUGE_OS_PAGE_SIZE; - uint8_t* start; - do { - start = (uint8_t*)mi_atomic_addu(&huge_top, size); - if (start == NULL) { - uintptr_t top = ((uintptr_t)32 << 40); // 32TiB virtual start address - #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode - uintptr_t r = _mi_random_init((uintptr_t)&_mi_os_alloc_huge_os_pages); - top += ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF)); // (randomly 0-1024)*1GiB == 0 to 1TiB - #endif - mi_atomic_cas_strong(&huge_top, top, 0); - } - } while (start == NULL); - + #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) + DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE; + MEM_EXTENDED_PARAMETER params[4] = { {0,0},{0,0},{0,0},{0,0} }; + MEM_ADDRESS_REQUIREMENTS reqs = {0,0,0}; + reqs.HighestEndingAddress = NULL; + reqs.LowestStartingAddress = NULL; + reqs.Alignment = MI_SEGMENT_SIZE; - // Allocate one page at the time but try to place them contiguously - // We allocate one page at the time to be able to abort if it takes too long - double start_t = _mi_clock_start(); - uint8_t* addr = start; // current top of the allocations - for (size_t page = 0; page < pages; page++, addr += MI_HUGE_OS_PAGE_SIZE ) { - // allocate a page - void* p = NULL; - bool is_large = true; - #ifdef _WIN32 - if (page==0) { mi_win_enable_large_os_pages(); } - p = mi_win_virtual_alloc(addr, MI_HUGE_OS_PAGE_SIZE, 0, MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE, true, true, &is_large); - #elif defined(MI_OS_USE_MMAP) - p = mi_unix_mmap(addr, MI_HUGE_OS_PAGE_SIZE, 0, PROT_READ | PROT_WRITE, true, true, &is_large); - #else - // always fail - #endif - - // Did we succeed at a contiguous address? - if (p != addr) { - // no success, issue a warning and return with an error - if (p != NULL) { - _mi_warning_message("could not allocate contiguous huge page %zu at 0x%p\n", page, addr); - _mi_os_free(p, MI_HUGE_OS_PAGE_SIZE, &_mi_stats_main ); - } - else { - #ifdef _WIN32 - int err = GetLastError(); - #else - int err = errno; - #endif - _mi_warning_message("could not allocate huge page %zu at 0x%p, error: %i\n", page, addr, err); - } - return ENOMEM; + // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages + if (pNtAllocateVirtualMemoryEx != NULL) { + #ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE + #define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE (0x10) + #endif + params[0].Type = MemExtendedParameterAddressRequirements; + params[0].Pointer = &reqs; + params[1].Type = 5; // == MemExtendedParameterAttributeFlags; + params[1].ULong64 = MEM_EXTENDED_PARAMETER_NONPAGED_HUGE; + size_t param_count = 2; + if (numa_node >= 0) { + param_count++; + params[2].Type = MemExtendedParameterNumaNode; + params[2].ULong = (unsigned)numa_node; } - // success, record it - if (page==0) { - *pstart = addr; + SIZE_T psize = size; + void* base = NULL; + NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, params, param_count); + if (err == 0) { + return base; } - *psize += MI_HUGE_OS_PAGE_SIZE; - *pages_reserved += 1; - _mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE); - _mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE); - - // check for timeout - double elapsed = _mi_clock_end(start_t); - if (elapsed > max_secs) return ETIMEDOUT; - if (page >= 1) { - double estimate = ((elapsed / (double)(page+1)) * (double)pages); - if (estimate > 1.5*max_secs) return ETIMEDOUT; // seems like we are going to timeout + else { + // fall back to regular huge pages + _mi_warning_message("unable to allocate using huge (1GiB) pages, trying large (2MiB) pages instead (error 0x%lx)\n", err); } } - mi_assert_internal(*psize == size); - return 0; + // on modern Windows try use VirtualAlloc2 for aligned large OS page allocation + if (pVirtualAlloc2 != NULL) { + params[0].Type = MemExtendedParameterAddressRequirements; + params[0].Pointer = &reqs; + size_t param_count = 1; + if (numa_node >= 0) { + param_count++; + params[1].Type = MemExtendedParameterNumaNode; + params[1].ULong = (unsigned)numa_node; + } + return (*pVirtualAlloc2)(GetCurrentProcess(), NULL, size, flags, PAGE_READWRITE, params, param_count); + } + #endif + return NULL; // give up on older Windows.. +} +#elif defined(MI_OS_USE_MMAP) && (MI_INTPTR_SIZE >= 8) +#ifdef MI_HAS_NUMA +#include // mbind, and use -lnuma +#endif +static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) { + mi_assert_internal(size%GiB == 0); + bool is_large = true; + void* p = mi_unix_mmap(NULL, MI_HUGE_OS_PAGE_SIZE, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); + if (p == NULL) return NULL; + #ifdef MI_HAS_NUMA + if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { + uintptr_t numa_mask = (1UL << numa_node); + long err = mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); + if (err != 0) { + _mi_warning_message("failed to bind huge (1GiB) pages to NUMA node %d: %s\n", numa_node, strerror(errno)); + } + } + #endif + return p; +} +#else +static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) { + return NULL; } #endif +void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, size_t* psize) { + if (psize != NULL) *psize = 0; + size_t size = pages * MI_HUGE_OS_PAGE_SIZE; + void* p = mi_os_alloc_huge_os_pagesx(size, numa_node); + if (p==NULL) return NULL; + if (psize != NULL) *psize = size; + _mi_stat_increase(&_mi_stats_main.committed, size); + _mi_stat_increase(&_mi_stats_main.reserved, size); + return p; +} + +#ifdef WIN32 +static int mi_os_numa_nodex(void) { + PROCESSOR_NUMBER pnum; + USHORT numa_node = 0; + GetCurrentProcessorNumberEx(&pnum); + GetNumaProcessorNodeEx(&pnum,&numa_node); + return (int)numa_node; +} + +static int mi_os_numa_node_countx(void) { + ULONG numa_max = 0; + GetNumaHighestNodeNumber(&numa_max); + return (int)(numa_max + 1); +} +#elif MI_HAS_NUMA +#include +static int mi_os_numa_nodex(void) { + return numa_preferred(); +} +static int mi_os_numa_node_countx(void) { + return (numa_max_node() + 1); +} +#else +static int mi_os_numa_nodex(void) { + return 0; +} +static int mi_os_numa_node_countx(void) { + return 1; +} +#endif + +int _mi_os_numa_node_count(void) { + long ncount = mi_os_numa_node_countx(); + // never more than max numa node and at least 1 + long nmax = 1 + mi_option_get(mi_option_max_numa_node); + if (ncount > nmax) ncount = nmax; + if (ncount <= 0) ncount = 1; + return ncount; +} + +int _mi_os_numa_node(void) { + int nnode = mi_os_numa_nodex(); + // never more than the node count + int ncount = _mi_os_numa_node_count(); + if (nnode >= ncount) { nnode = nnode % ncount; } + return nnode; +} From 3fadf4abaf5ee91c38c6e593a1faabb28d9ab2f9 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 1 Nov 2019 20:01:08 -0700 Subject: [PATCH 004/179] initial numa awareness for regions --- src/memory.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/src/memory.c b/src/memory.c index 9ab7c850..02e82e4d 100644 --- a/src/memory.c +++ b/src/memory.c @@ -45,10 +45,8 @@ bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats); bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); -void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld); -void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats); -void* _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment); -bool _mi_os_is_huge_reserved(void* p); +//void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld); +//void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats); // arena.c void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats); @@ -93,7 +91,8 @@ typedef struct mem_region_s { volatile _Atomic(uintptr_t) map; // in-use bit per MI_SEGMENT_SIZE block volatile _Atomic(mi_region_info_t) info; // start of virtual memory area, and flags volatile _Atomic(uintptr_t) dirty_mask; // bit per block if the contents are not zero'd - size_t arena_memid; + volatile _Atomic(uintptr_t) numa_node; // associated numa node + 1 (so 0 is no association) + size_t arena_memid; // if allocated from a (huge page) arena } mem_region_t; @@ -212,6 +211,7 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit if (mi_atomic_cas_strong(®ion->info, info, 0)) { // update the region count region->arena_memid = arena_memid; + mi_atomic_write(®ion->numa_node, _mi_os_numa_node() + 1); mi_atomic_increment(®ions_count); } else { @@ -220,6 +220,7 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit for(size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) { if (mi_atomic_cas_strong(®ions[idx+i].info, info, 0)) { regions[idx+i].arena_memid = arena_memid; + mi_atomic_write(®ions[idx+i].numa_node, _mi_os_numa_node() + 1); mi_atomic_increment(®ions_count); start = NULL; break; @@ -365,15 +366,18 @@ static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t bloc // Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written // if the blocks were successfully claimed so ensure they are initialized to NULL/0 before the call. // (not being able to claim is not considered an error so check for `p != NULL` afterwards). -static bool mi_region_try_alloc_blocks(size_t idx, size_t blocks, size_t size, - bool* commit, bool* allow_large, bool* is_zero, - void** p, size_t* id, mi_os_tld_t* tld) +static bool mi_region_try_alloc_blocks(int numa_node, size_t idx, size_t blocks, size_t size, + bool* commit, bool* allow_large, bool* is_zero, + void** p, size_t* id, mi_os_tld_t* tld) { // check if there are available blocks in the region.. mi_assert_internal(idx < MI_REGION_MAX); mem_region_t* region = ®ions[idx]; uintptr_t m = mi_atomic_read_relaxed(®ion->map); - if (m != MI_REGION_MAP_FULL) { // some bits are zero + int rnode = ((int)mi_atomic_read_relaxed(®ion->numa_node)) - 1; + if ((rnode < 0 || rnode == numa_node) && // fits current numa node + (m != MI_REGION_MAP_FULL)) // and some bits are zero + { bool ok = (*commit || *allow_large); // committing or allow-large is always ok if (!ok) { // otherwise skip incompatible regions if possible. @@ -426,19 +430,20 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l mi_assert_internal(blocks > 0 && blocks <= 8*MI_INTPTR_SIZE); // find a range of free blocks + int numa_node = _mi_os_numa_node(); void* p = NULL; size_t count = mi_atomic_read(®ions_count); size_t idx = tld->region_idx; // start at 0 to reuse low addresses? Or, use tld->region_idx to reduce contention? for (size_t visited = 0; visited < count; visited++, idx++) { if (idx >= count) idx = 0; // wrap around - if (!mi_region_try_alloc_blocks(idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error + if (!mi_region_try_alloc_blocks(numa_node, idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error if (p != NULL) break; } if (p == NULL) { // no free range in existing regions -- try to extend beyond the count.. but at most 8 regions for (idx = count; idx < mi_atomic_read_relaxed(®ions_count) + 8 && idx < MI_REGION_MAX; idx++) { - if (!mi_region_try_alloc_blocks(idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error + if (!mi_region_try_alloc_blocks(numa_node, idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error if (p != NULL) break; } } From 2d10c78587d6cf781ffb40c24cb727ecff625841 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 1 Nov 2019 20:19:00 -0700 Subject: [PATCH 005/179] fix linux compilation --- CMakeLists.txt | 1 + src/arena.c | 3 ++- src/init.c | 2 +- src/os.c | 4 +++- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 81cc339a..e9eb6feb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,6 +18,7 @@ set(mi_install_dir "lib/mimalloc-${mi_version}") set(mi_sources src/stats.c src/os.c + src/arena.c src/memory.c src/segment.c src/page.c diff --git a/src/arena.c b/src/arena.c index 5bc3900c..bb1c1c10 100644 --- a/src/arena.c +++ b/src/arena.c @@ -381,7 +381,7 @@ static bool mi_arena_add(mi_arena_t* arena) { #include // ENOMEM // reserve at a specific numa node -static int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept { +int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept { size_t hsize = 0; void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, &hsize); if (p==NULL) return ENOMEM; @@ -432,6 +432,7 @@ int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept { } int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept { + UNUSED(max_secs); _mi_verbose_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n"); if (pages_reserved != NULL) *pages_reserved = 0; int err = mi_reserve_huge_os_pages_interleave(pages); diff --git a/src/init.c b/src/init.c index 138b54aa..0813fddd 100644 --- a/src/init.c +++ b/src/init.c @@ -434,7 +434,7 @@ static void mi_process_load(void) { if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { size_t pages = mi_option_get(mi_option_reserve_huge_os_pages); - double max_secs = (double)pages / 2.0; // 0.5s per page (1GiB) + // double max_secs = (double)pages / 2.0; // 0.5s per page (1GiB) mi_reserve_huge_os_pages_interleave(pages); } } diff --git a/src/os.c b/src/os.c index c0564174..2bb3ee3c 100644 --- a/src/os.c +++ b/src/os.c @@ -851,7 +851,7 @@ static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) { mi_assert_internal(size%GiB == 0); bool is_large = true; - void* p = mi_unix_mmap(NULL, MI_HUGE_OS_PAGE_SIZE, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); + void* p = mi_unix_mmap(NULL, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); if (p == NULL) return NULL; #ifdef MI_HAS_NUMA if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { @@ -861,6 +861,8 @@ static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) { _mi_warning_message("failed to bind huge (1GiB) pages to NUMA node %d: %s\n", numa_node, strerror(errno)); } } + #else + UNUSED(numa_node); #endif return p; } From 57dd69265ad294e7cdfcc13ef7ecb69b7c5d61b1 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 1 Nov 2019 20:30:01 -0700 Subject: [PATCH 006/179] normalize numa node --- src/arena.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/arena.c b/src/arena.c index bb1c1c10..381d4486 100644 --- a/src/arena.c +++ b/src/arena.c @@ -383,6 +383,8 @@ static bool mi_arena_add(mi_arena_t* arena) { // reserve at a specific numa node int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept { size_t hsize = 0; + if (numa_node < -1) numa_node = -1; + if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count(); void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, &hsize); if (p==NULL) return ENOMEM; _mi_verbose_message("reserved %zu huge (1GiB) pages\n", pages); From 2c12d7f2234b25308478e22c9342a07623b6f891 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 1 Nov 2019 22:01:52 -0700 Subject: [PATCH 007/179] optimized numa calls; better Linux support --- CMakeLists.txt | 12 ++++ include/mimalloc-internal.h | 2 +- include/mimalloc-types.h | 1 + src/arena.c | 2 +- src/init.c | 3 +- src/memory.c | 6 +- src/os.c | 114 ++++++++++++++++++++++++------------ 7 files changed, 97 insertions(+), 43 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e9eb6feb..1e96c237 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,8 @@ cmake_minimum_required(VERSION 3.0) project(libmimalloc C CXX) include("cmake/mimalloc-config-version.cmake") +include("CheckIncludeFile") + set(CMAKE_C_STANDARD 11) set(CMAKE_CXX_STANDARD 17) @@ -88,6 +90,16 @@ if(MI_USE_CXX MATCHES "ON") set_source_files_properties(src/static.c test/test-api.c PROPERTIES LANGUAGE CXX ) endif() +CHECK_INCLUDE_FILE("numaif.h" MI_HAVE_NUMA_H) +if(MI_HAVE_NUMA_H) + list(APPEND mi_defines MI_HAS_NUMA) + list(APPEND mi_libraries numa) +else() + if (NOT(WIN32)) + message(WARNING "Compiling without using NUMA optimized allocation (on Linux, install libnuma-dev?)") + endif() +endif() + # Compiler flags if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU") list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index dd677a02..b4d3351d 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -56,7 +56,7 @@ void _mi_os_init(void); // called fro void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocate thread local data void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data size_t _mi_os_good_alloc_size(size_t size); -int _mi_os_numa_node(void); +int _mi_os_numa_node(mi_os_tld_t* tld); // memory.c diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 99b6b22b..0208d5c7 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -413,6 +413,7 @@ typedef struct mi_segments_tld_s { // OS thread local data typedef struct mi_os_tld_s { size_t region_idx; // start point for next allocation + int numa_node; // numa node associated with this thread mi_stats_t* stats; // points to tld stats } mi_os_tld_t; diff --git a/src/arena.c b/src/arena.c index 381d4486..7eb755c4 100644 --- a/src/arena.c +++ b/src/arena.c @@ -267,7 +267,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, { size_t asize = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); size_t bcount = asize / MI_ARENA_BLOCK_SIZE; - int numa_node = _mi_os_numa_node(); // current numa node + int numa_node = _mi_os_numa_node(tld); // current numa node mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE); // try numa affine allocation diff --git a/src/init.c b/src/init.c index 0813fddd..166ca451 100644 --- a/src/init.c +++ b/src/init.c @@ -99,7 +99,7 @@ static mi_tld_t tld_main = { 0, false, &_mi_heap_main, { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments - { 0, tld_main_stats }, // os + { 0, -1, tld_main_stats }, // os { MI_STATS_NULL } // stats }; @@ -218,6 +218,7 @@ static bool _mi_heap_init(void) { memset(tld, 0, sizeof(*tld)); tld->heap_backing = heap; tld->segments.stats = &tld->stats; + tld->os.numa_node = -1; tld->os.stats = &tld->stats; _mi_heap_default = heap; } diff --git a/src/memory.c b/src/memory.c index 02e82e4d..a425393c 100644 --- a/src/memory.c +++ b/src/memory.c @@ -211,7 +211,7 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit if (mi_atomic_cas_strong(®ion->info, info, 0)) { // update the region count region->arena_memid = arena_memid; - mi_atomic_write(®ion->numa_node, _mi_os_numa_node() + 1); + mi_atomic_write(®ion->numa_node, _mi_os_numa_node(tld) + 1); mi_atomic_increment(®ions_count); } else { @@ -220,7 +220,7 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit for(size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) { if (mi_atomic_cas_strong(®ions[idx+i].info, info, 0)) { regions[idx+i].arena_memid = arena_memid; - mi_atomic_write(®ions[idx+i].numa_node, _mi_os_numa_node() + 1); + mi_atomic_write(®ions[idx+i].numa_node, _mi_os_numa_node(tld) + 1); mi_atomic_increment(®ions_count); start = NULL; break; @@ -430,7 +430,7 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l mi_assert_internal(blocks > 0 && blocks <= 8*MI_INTPTR_SIZE); // find a range of free blocks - int numa_node = _mi_os_numa_node(); + int numa_node = _mi_os_numa_node(tld); void* p = NULL; size_t count = mi_atomic_read(®ions_count); size_t idx = tld->region_idx; // start at 0 to reuse low addresses? Or, use tld->region_idx to reduce contention? diff --git a/src/os.c b/src/os.c index 2bb3ee3c..677d0ea2 100644 --- a/src/os.c +++ b/src/os.c @@ -97,7 +97,7 @@ typedef NTSTATUS (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T* static PVirtualAlloc2 pVirtualAlloc2 = NULL; static PNtAllocateVirtualMemoryEx pNtAllocateVirtualMemoryEx = NULL; -static bool mi_win_enable_large_os_pages() +static bool mi_win_enable_large_os_pages() { if (large_os_page_size > 0) return true; @@ -148,10 +148,10 @@ void _mi_os_init(void) { FreeLibrary(hDll); } hDll = LoadLibrary(TEXT("ntdll.dll")); - if (hDll != NULL) { + if (hDll != NULL) { pNtAllocateVirtualMemoryEx = (PNtAllocateVirtualMemoryEx)(void (*)(void))GetProcAddress(hDll, "NtAllocateVirtualMemoryEx"); FreeLibrary(hDll); - } + } if (mi_option_is_enabled(mi_option_large_os_pages) || mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { mi_win_enable_large_os_pages(); } @@ -191,7 +191,7 @@ static bool mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats #else err = (munmap(addr, size) == -1); #endif - if (was_committed) _mi_stat_decrease(&stats->committed, size); + if (was_committed) _mi_stat_decrease(&stats->committed, size); _mi_stat_decrease(&stats->reserved, size); if (err) { #pragma warning(suppress:4996) @@ -207,14 +207,14 @@ static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size); #ifdef _WIN32 static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment, DWORD flags) { -#if (MI_INTPTR_SIZE >= 8) +#if (MI_INTPTR_SIZE >= 8) // on 64-bit systems, try to use the virtual address area after 4TiB for 4MiB aligned allocations void* hint; if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment,size)) != NULL) { return VirtualAlloc(hint, size, flags, PAGE_READWRITE); } #endif -#if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) +#if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) // on modern Windows try use VirtualAlloc2 for aligned allocation if (try_alignment > 0 && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) { MEM_ADDRESS_REQUIREMENTS reqs = { 0 }; @@ -232,7 +232,7 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, mi_assert_internal(!(large_only && !allow_large)); static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0; void* p = NULL; - if ((large_only || use_large_os_page(size, try_alignment)) + if ((large_only || use_large_os_page(size, try_alignment)) && allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0) { uintptr_t try_ok = mi_atomic_read(&large_page_try_ok); if (!large_only && try_ok > 0) { @@ -372,7 +372,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro } if (p == NULL) { *is_large = false; - p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, flags, fd); + p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, flags, fd); #if defined(MADV_HUGEPAGE) // Many Linux systems don't allow MAP_HUGETLB but they support instead // transparent huge pages (THP). It is not required to call `madvise` with MADV_HUGE @@ -391,7 +391,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro } #endif -// On 64-bit systems, we can do efficient aligned allocation by using +// On 64-bit systems, we can do efficient aligned allocation by using // the 4TiB to 30TiB area to allocate them. #if (MI_INTPTR_SIZE >= 8) && (defined(_WIN32) || (defined(MI_OS_USE_MMAP) && !defined(MAP_ALIGNED))) static volatile _Atomic(intptr_t) aligned_base; @@ -785,14 +785,14 @@ bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize, mi_stats_t* stats) { /* ---------------------------------------------------------------------------- -Support for allocating huge OS pages (1Gib) that are reserved up-front +Support for allocating huge OS pages (1Gib) that are reserved up-front and possibly associated with a specific NUMA node. (use `numa_node>=0`) -----------------------------------------------------------------------------*/ -#define MI_HUGE_OS_PAGE_SIZE (GiB) +#define MI_HUGE_OS_PAGE_SIZE (GiB) #if defined(WIN32) && (MI_INTPTR_SIZE >= 8) -static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) -{ +static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) +{ mi_assert_internal(size%GiB == 0); #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) @@ -802,8 +802,8 @@ static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) reqs.HighestEndingAddress = NULL; reqs.LowestStartingAddress = NULL; reqs.Alignment = MI_SEGMENT_SIZE; - - // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages + + // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages if (pNtAllocateVirtualMemoryEx != NULL) { #ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE #define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE (0x10) @@ -825,10 +825,10 @@ static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) return base; } else { - // fall back to regular huge pages + // fall back to regular huge pages _mi_warning_message("unable to allocate using huge (1GiB) pages, trying large (2MiB) pages instead (error 0x%lx)\n", err); } - } + } // on modern Windows try use VirtualAlloc2 for aligned large OS page allocation if (pVirtualAlloc2 != NULL) { params[0].Type = MemExtendedParameterAddressRequirements; @@ -842,7 +842,7 @@ static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) return (*pVirtualAlloc2)(GetCurrentProcess(), NULL, size, flags, PAGE_READWRITE, params, param_count); } #endif - return NULL; // give up on older Windows.. + return NULL; // give up on older Windows.. } #elif defined(MI_OS_USE_MMAP) && (MI_INTPTR_SIZE >= 8) #ifdef MI_HAS_NUMA @@ -853,7 +853,7 @@ static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) { bool is_large = true; void* p = mi_unix_mmap(NULL, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); if (p == NULL) return NULL; - #ifdef MI_HAS_NUMA + #ifdef MI_HAS_NUMA if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { uintptr_t numa_mask = (1UL << numa_node); long err = mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); @@ -866,7 +866,7 @@ static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) { #endif return p; } -#else +#else static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) { return NULL; } @@ -884,12 +884,12 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, size_t* psize) { } #ifdef WIN32 -static int mi_os_numa_nodex(void) { +static int mi_os_numa_nodex() { PROCESSOR_NUMBER pnum; USHORT numa_node = 0; GetCurrentProcessorNumberEx(&pnum); GetNumaProcessorNodeEx(&pnum,&numa_node); - return (int)numa_node; + return (int)numa_node; } static int mi_os_numa_node_countx(void) { @@ -898,12 +898,42 @@ static int mi_os_numa_node_countx(void) { return (int)(numa_max + 1); } #elif MI_HAS_NUMA -#include +#include +#include +#include static int mi_os_numa_nodex(void) { - return numa_preferred(); + #define MI_MAX_MASK (4) // support at most 256 nodes + unsigned long mask[MI_MAX_MASK]; + memset(mask,0,MI_MAX_MASK*sizeof(long)); + int mode = 0; + long err = get_mempolicy(&mode, mask, MI_MAX_MASK*sizeof(long)*8, NULL, 0 /* thread policy */); + if (err != 0) return 0; + // find the lowest bit that is set + for(int i = 0; i < MI_MAX_MASK; i++) { + for(int j = 0; j < (int)(sizeof(long)*8); j++) { + if ((mask[i] & (1UL << j)) != 0) { + return (i*sizeof(long)*8 + j); + } + } + } + return 0; } + static int mi_os_numa_node_countx(void) { - return (numa_max_node() + 1); + DIR* d = opendir("/sys/devices/system/node"); + if (d==NULL) return 1; + + struct dirent* de; + int max_node_num = 0; + while ((de = readdir(d)) != NULL) { + int node_num; + if (strncmp(de->d_name, "node", 4) == 0) { + node_num = (int)strtol(de->d_name+4, NULL, 0); + if (max_node_num < node_num) max_node_num = node_num; + } + } + closedir(d); + return (max_node_num + 1); } #else static int mi_os_numa_nodex(void) { @@ -915,18 +945,28 @@ static int mi_os_numa_node_countx(void) { #endif int _mi_os_numa_node_count(void) { - long ncount = mi_os_numa_node_countx(); - // never more than max numa node and at least 1 - long nmax = 1 + mi_option_get(mi_option_max_numa_node); - if (ncount > nmax) ncount = nmax; - if (ncount <= 0) ncount = 1; - return ncount; + static int numa_node_count = 0; + if (mi_unlikely(numa_node_count <= 0)) { + int ncount = mi_os_numa_node_countx(); + // never more than max numa node and at least 1 + int nmax = 1 + (int)mi_option_get(mi_option_max_numa_node); + if (ncount > nmax) ncount = nmax; + if (ncount <= 0) ncount = 1; + numa_node_count = ncount; + } + mi_assert_internal(numa_node_count >= 1); + return numa_node_count; } -int _mi_os_numa_node(void) { - int nnode = mi_os_numa_nodex(); - // never more than the node count - int ncount = _mi_os_numa_node_count(); - if (nnode >= ncount) { nnode = nnode % ncount; } - return nnode; +int _mi_os_numa_node(mi_os_tld_t* tld) { + if (mi_unlikely(tld->numa_node < 0)) { + int nnode = mi_os_numa_nodex(); + // never more than the node count + int ncount = _mi_os_numa_node_count(); + if (nnode >= ncount) { nnode = nnode % ncount; } + if (nnode < 0) nnode = 0; + tld->numa_node = nnode; + } + mi_assert_internal(tld->numa_node >= 0 && tld->numa_node < _mi_os_numa_node_count()); + return tld->numa_node; } From a69016c33e5969b07426669b58e6a927c478c308 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 2 Nov 2019 10:30:16 -0700 Subject: [PATCH 008/179] improve and document numa support --- src/os.c | 39 +++++++++++++++++++++++++++++---------- test/main-override.cpp | 2 +- 2 files changed, 30 insertions(+), 11 deletions(-) diff --git a/src/os.c b/src/os.c index 677d0ea2..fc89d642 100644 --- a/src/os.c +++ b/src/os.c @@ -854,8 +854,11 @@ static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) { void* p = mi_unix_mmap(NULL, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); if (p == NULL) return NULL; #ifdef MI_HAS_NUMA - if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { + if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes uintptr_t numa_mask = (1UL << numa_node); + // TODO: does `mbind` work correctly for huge OS pages? should we + // use `set_mempolicy` before calling mmap instead? + // see: long err = mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); if (err != 0) { _mi_warning_message("failed to bind huge (1GiB) pages to NUMA node %d: %s\n", numa_node, strerror(errno)); @@ -883,6 +886,9 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, size_t* psize) { return p; } +/* ---------------------------------------------------------------------------- +Support NUMA aware allocation +-----------------------------------------------------------------------------*/ #ifdef WIN32 static int mi_os_numa_nodex() { PROCESSOR_NUMBER pnum; @@ -902,6 +908,9 @@ static int mi_os_numa_node_countx(void) { #include #include static int mi_os_numa_nodex(void) { + #define MI_NUMA_NODE_SLOW // too slow, so cache it + // TODO: perhaps use RDTSCP instruction on x64? + // see #define MI_MAX_MASK (4) // support at most 256 nodes unsigned long mask[MI_MAX_MASK]; memset(mask,0,MI_MAX_MASK*sizeof(long)); @@ -945,7 +954,7 @@ static int mi_os_numa_node_countx(void) { #endif int _mi_os_numa_node_count(void) { - static int numa_node_count = 0; + static int numa_node_count = 0; // cache the node count if (mi_unlikely(numa_node_count <= 0)) { int ncount = mi_os_numa_node_countx(); // never more than max numa node and at least 1 @@ -959,14 +968,24 @@ int _mi_os_numa_node_count(void) { } int _mi_os_numa_node(mi_os_tld_t* tld) { + int numa_node; +#ifndef MI_NUMA_NODE_SLOW + UNUSED(tld); + numa_node = mi_os_numa_nodex(); +#else if (mi_unlikely(tld->numa_node < 0)) { - int nnode = mi_os_numa_nodex(); - // never more than the node count - int ncount = _mi_os_numa_node_count(); - if (nnode >= ncount) { nnode = nnode % ncount; } - if (nnode < 0) nnode = 0; - tld->numa_node = nnode; + // Cache the NUMA node of the thread if the call is slow. + // This may not be correct as threads can migrate to another cpu on + // another node -- however, for memory allocation this just means we keep + // using the same 'node id' for its allocations; new OS allocations + // naturally come from the actual node so in practice this may be fine. + tld->numa_node = mi_os_numa_nodex(); } - mi_assert_internal(tld->numa_node >= 0 && tld->numa_node < _mi_os_numa_node_count()); - return tld->numa_node; + numa_node = tld->numa_node +#endif + // never more than the node count and >= 0 + int numa_count = _mi_os_numa_node_count(); + if (numa_node >= numa_count) { numa_node = numa_node % numa_count; } + if (numa_node < 0) numa_node = 0; + return numa_node; } diff --git a/test/main-override.cpp b/test/main-override.cpp index e006ad27..f7a7f1bd 100644 --- a/test/main-override.cpp +++ b/test/main-override.cpp @@ -24,7 +24,7 @@ public: int main() { - //mi_stats_reset(); // ignore earlier allocations + mi_stats_reset(); // ignore earlier allocations atexit(free_p); void* p1 = malloc(78); void* p2 = mi_malloc_aligned(16,24); From 70748ee1ee1da3e9ad14c2d751623e47cb3fd287 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 2 Nov 2019 10:39:09 -0700 Subject: [PATCH 009/179] fix missing semi colon --- src/os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/os.c b/src/os.c index fc89d642..c41d028f 100644 --- a/src/os.c +++ b/src/os.c @@ -981,7 +981,7 @@ int _mi_os_numa_node(mi_os_tld_t* tld) { // naturally come from the actual node so in practice this may be fine. tld->numa_node = mi_os_numa_nodex(); } - numa_node = tld->numa_node + numa_node = tld->numa_node; #endif // never more than the node count and >= 0 int numa_count = _mi_os_numa_node_count(); From fd9d8c85ae40db95feb51da6e5478850bc6722fc Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 2 Nov 2019 11:55:03 -0700 Subject: [PATCH 010/179] change numa support on linux to use getcpu --- include/mimalloc-types.h | 1 - src/init.c | 5 ++--- src/os.c | 45 +++++++++++----------------------------- 3 files changed, 14 insertions(+), 37 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 0208d5c7..99b6b22b 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -413,7 +413,6 @@ typedef struct mi_segments_tld_s { // OS thread local data typedef struct mi_os_tld_s { size_t region_idx; // start point for next allocation - int numa_node; // numa node associated with this thread mi_stats_t* stats; // points to tld stats } mi_os_tld_t; diff --git a/src/init.c b/src/init.c index 166ca451..ed15aeba 100644 --- a/src/init.c +++ b/src/init.c @@ -99,8 +99,8 @@ static mi_tld_t tld_main = { 0, false, &_mi_heap_main, { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments - { 0, -1, tld_main_stats }, // os - { MI_STATS_NULL } // stats + { 0, tld_main_stats }, // os + { MI_STATS_NULL } // stats }; mi_heap_t _mi_heap_main = { @@ -218,7 +218,6 @@ static bool _mi_heap_init(void) { memset(tld, 0, sizeof(*tld)); tld->heap_backing = heap; tld->segments.stats = &tld->stats; - tld->os.numa_node = -1; tld->os.stats = &tld->stats; _mi_heap_default = heap; } diff --git a/src/os.c b/src/os.c index c41d028f..8e1b3e91 100644 --- a/src/os.c +++ b/src/os.c @@ -903,29 +903,21 @@ static int mi_os_numa_node_countx(void) { GetNumaHighestNodeNumber(&numa_max); return (int)(numa_max + 1); } -#elif MI_HAS_NUMA +#elif defined(__linux__) #include #include -#include +#include + static int mi_os_numa_nodex(void) { - #define MI_NUMA_NODE_SLOW // too slow, so cache it - // TODO: perhaps use RDTSCP instruction on x64? - // see - #define MI_MAX_MASK (4) // support at most 256 nodes - unsigned long mask[MI_MAX_MASK]; - memset(mask,0,MI_MAX_MASK*sizeof(long)); - int mode = 0; - long err = get_mempolicy(&mode, mask, MI_MAX_MASK*sizeof(long)*8, NULL, 0 /* thread policy */); +#ifdef SYS_getcpu + unsigned node = 0; + unsigned ncpu = 0; + int err = syscall(SYS_getcpu, &ncpu, &node, NULL); if (err != 0) return 0; - // find the lowest bit that is set - for(int i = 0; i < MI_MAX_MASK; i++) { - for(int j = 0; j < (int)(sizeof(long)*8); j++) { - if ((mask[i] & (1UL << j)) != 0) { - return (i*sizeof(long)*8 + j); - } - } - } - return 0; + return (int)node; +#else + return 0; +#endif } static int mi_os_numa_node_countx(void) { @@ -968,21 +960,8 @@ int _mi_os_numa_node_count(void) { } int _mi_os_numa_node(mi_os_tld_t* tld) { - int numa_node; -#ifndef MI_NUMA_NODE_SLOW UNUSED(tld); - numa_node = mi_os_numa_nodex(); -#else - if (mi_unlikely(tld->numa_node < 0)) { - // Cache the NUMA node of the thread if the call is slow. - // This may not be correct as threads can migrate to another cpu on - // another node -- however, for memory allocation this just means we keep - // using the same 'node id' for its allocations; new OS allocations - // naturally come from the actual node so in practice this may be fine. - tld->numa_node = mi_os_numa_nodex(); - } - numa_node = tld->numa_node; -#endif + int numa_node = mi_os_numa_nodex(); // never more than the node count and >= 0 int numa_count = _mi_os_numa_node_count(); if (numa_node >= numa_count) { numa_node = numa_node % numa_count; } From ee323aabac42ab4333e40cedd02f0eb1d4356b4e Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 2 Nov 2019 15:56:21 -0700 Subject: [PATCH 011/179] fix vs2017 build --- ide/vs2017/mimalloc-override.vcxproj | 1 + ide/vs2017/mimalloc-override.vcxproj.filters | 3 +++ ide/vs2017/mimalloc.vcxproj | 1 + ide/vs2017/mimalloc.vcxproj.filters | 3 +++ src/os.c | 5 +++-- 5 files changed, 11 insertions(+), 2 deletions(-) diff --git a/ide/vs2017/mimalloc-override.vcxproj b/ide/vs2017/mimalloc-override.vcxproj index 511c0fab..1fc70b33 100644 --- a/ide/vs2017/mimalloc-override.vcxproj +++ b/ide/vs2017/mimalloc-override.vcxproj @@ -231,6 +231,7 @@ + diff --git a/ide/vs2017/mimalloc-override.vcxproj.filters b/ide/vs2017/mimalloc-override.vcxproj.filters index 6ac0c0b5..75a8e032 100644 --- a/ide/vs2017/mimalloc-override.vcxproj.filters +++ b/ide/vs2017/mimalloc-override.vcxproj.filters @@ -70,5 +70,8 @@ Source Files + + Source Files + \ No newline at end of file diff --git a/ide/vs2017/mimalloc.vcxproj b/ide/vs2017/mimalloc.vcxproj index 6147c349..484c4db8 100644 --- a/ide/vs2017/mimalloc.vcxproj +++ b/ide/vs2017/mimalloc.vcxproj @@ -217,6 +217,7 @@ + diff --git a/ide/vs2017/mimalloc.vcxproj.filters b/ide/vs2017/mimalloc.vcxproj.filters index a2b64314..598b8643 100644 --- a/ide/vs2017/mimalloc.vcxproj.filters +++ b/ide/vs2017/mimalloc.vcxproj.filters @@ -53,6 +53,9 @@ Source Files + + Source Files + diff --git a/src/os.c b/src/os.c index 8e1b3e91..4aa4abf3 100644 --- a/src/os.c +++ b/src/os.c @@ -794,6 +794,7 @@ and possibly associated with a specific NUMA node. (use `numa_node>=0`) static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) { mi_assert_internal(size%GiB == 0); + mi_win_enable_large_os_pages(); #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE; @@ -812,7 +813,7 @@ static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) params[0].Pointer = &reqs; params[1].Type = 5; // == MemExtendedParameterAttributeFlags; params[1].ULong64 = MEM_EXTENDED_PARAMETER_NONPAGED_HUGE; - size_t param_count = 2; + ULONG param_count = 2; if (numa_node >= 0) { param_count++; params[2].Type = MemExtendedParameterNumaNode; @@ -833,7 +834,7 @@ static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) if (pVirtualAlloc2 != NULL) { params[0].Type = MemExtendedParameterAddressRequirements; params[0].Pointer = &reqs; - size_t param_count = 1; + ULONG param_count = 1; if (numa_node >= 0) { param_count++; params[1].Type = MemExtendedParameterNumaNode; From 62cd0237fc8583f357fe4599889011f845690af1 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 2 Nov 2019 17:49:34 -0700 Subject: [PATCH 012/179] fix aligned huge page allocation on windows --- src/arena.c | 2 +- src/os.c | 118 +++++++++++++++++++++++++++++++++------------------- 2 files changed, 76 insertions(+), 44 deletions(-) diff --git a/src/arena.c b/src/arena.c index 7eb755c4..56b09859 100644 --- a/src/arena.c +++ b/src/arena.c @@ -435,7 +435,7 @@ int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept { int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept { UNUSED(max_secs); - _mi_verbose_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n"); + _mi_warning_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n"); if (pages_reserved != NULL) *pages_reserved = 0; int err = mi_reserve_huge_os_pages_interleave(pages); if (err==0 && pages_reserved!=NULL) *pages_reserved = pages; diff --git a/src/os.c b/src/os.c index 4aa4abf3..e1dc31f8 100644 --- a/src/os.c +++ b/src/os.c @@ -791,68 +791,68 @@ and possibly associated with a specific NUMA node. (use `numa_node>=0`) #define MI_HUGE_OS_PAGE_SIZE (GiB) #if defined(WIN32) && (MI_INTPTR_SIZE >= 8) -static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) +static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) { mi_assert_internal(size%GiB == 0); + mi_assert_internal(addr != NULL); + const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE; + mi_win_enable_large_os_pages(); - + + void* p = NULL; #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) - DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE; - MEM_EXTENDED_PARAMETER params[4] = { {0,0},{0,0},{0,0},{0,0} }; - MEM_ADDRESS_REQUIREMENTS reqs = {0,0,0}; - reqs.HighestEndingAddress = NULL; - reqs.LowestStartingAddress = NULL; - reqs.Alignment = MI_SEGMENT_SIZE; - + MEM_EXTENDED_PARAMETER params[3] = { {0,0},{0,0},{0,0} }; // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages if (pNtAllocateVirtualMemoryEx != NULL) { #ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE #define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE (0x10) #endif - params[0].Type = MemExtendedParameterAddressRequirements; - params[0].Pointer = &reqs; - params[1].Type = 5; // == MemExtendedParameterAttributeFlags; - params[1].ULong64 = MEM_EXTENDED_PARAMETER_NONPAGED_HUGE; - ULONG param_count = 2; - if (numa_node >= 0) { - param_count++; - params[2].Type = MemExtendedParameterNumaNode; - params[2].ULong = (unsigned)numa_node; - } - SIZE_T psize = size; - void* base = NULL; - NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, params, param_count); - if (err == 0) { - return base; - } - else { - // fall back to regular huge pages - _mi_warning_message("unable to allocate using huge (1GiB) pages, trying large (2MiB) pages instead (error 0x%lx)\n", err); - } - } - // on modern Windows try use VirtualAlloc2 for aligned large OS page allocation - if (pVirtualAlloc2 != NULL) { - params[0].Type = MemExtendedParameterAddressRequirements; - params[0].Pointer = &reqs; + params[0].Type = 5; // == MemExtendedParameterAttributeFlags; + params[0].ULong64 = MEM_EXTENDED_PARAMETER_NONPAGED_HUGE; ULONG param_count = 1; if (numa_node >= 0) { param_count++; params[1].Type = MemExtendedParameterNumaNode; params[1].ULong = (unsigned)numa_node; } - return (*pVirtualAlloc2)(GetCurrentProcess(), NULL, size, flags, PAGE_READWRITE, params, param_count); + SIZE_T psize = size; + void* base = addr; + NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, params, param_count); + if (err == 0 && base != NULL) { + return base; + } + else { + // fall back to regular huge pages + _mi_warning_message("unable to allocate using huge (1GiB) pages, trying large (2MiB) pages instead (status 0x%lx)\n", err); + } } + // on modern Windows try use VirtualAlloc2 for numa aware large OS page allocation + if (pVirtualAlloc2 != NULL && numa_node >= 0) { + params[0].Type = MemExtendedParameterNumaNode; + params[0].ULong = (unsigned)numa_node; + p = (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1); + } + else #endif - return NULL; // give up on older Windows.. + // use regular virtual alloc on older windows + { + p = VirtualAlloc(addr, size, flags, PAGE_READWRITE); + } + + if (p == NULL) { + _mi_warning_message("failed to allocate huge OS pages (size %zu) (error %d)\n", size, GetLastError()); + } + return p; } + #elif defined(MI_OS_USE_MMAP) && (MI_INTPTR_SIZE >= 8) #ifdef MI_HAS_NUMA #include // mbind, and use -lnuma #endif -static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) { +static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) { mi_assert_internal(size%GiB == 0); bool is_large = true; - void* p = mi_unix_mmap(NULL, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); + void* p = mi_unix_mmap(addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); if (p == NULL) return NULL; #ifdef MI_HAS_NUMA if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes @@ -871,19 +871,51 @@ static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) { return p; } #else -static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) { +static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) { return NULL; } #endif +// To ensure proper alignment, use our own area for huge OS pages +static _Atomic(uintptr_t) mi_huge_start; // = 0 + +// Allocate MI_SEGMENT_SIZE aligned huge pages void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, size_t* psize) { if (psize != NULL) *psize = 0; - size_t size = pages * MI_HUGE_OS_PAGE_SIZE; - void* p = mi_os_alloc_huge_os_pagesx(size, numa_node); - if (p==NULL) return NULL; - if (psize != NULL) *psize = size; + const size_t size = pages * MI_HUGE_OS_PAGE_SIZE; + + // Find a new aligned address for the huge pages + uintptr_t start = 0; + uintptr_t end = 0; + uintptr_t expected; + do { + start = expected = mi_atomic_read_relaxed(&mi_huge_start); + if (start == 0) { + // Initialize the start address after the 32TiB area + start = ((uintptr_t)32 << 40); // 32TiB virtual start address + #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode + uintptr_t r = _mi_random_init((uintptr_t)&_mi_os_alloc_huge_os_pages); + start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF)); // (randomly 0-1024)*1GiB == 0 to 1TiB + #endif + } + end = start + size; + mi_assert_internal(end % MI_SEGMENT_SIZE == 0); + } while (!mi_atomic_cas_strong(&mi_huge_start, end, expected)); + + // And allocate + void* p = mi_os_alloc_huge_os_pagesx((void*)start, size, numa_node); + if (p == NULL) { + return NULL; + } _mi_stat_increase(&_mi_stats_main.committed, size); _mi_stat_increase(&_mi_stats_main.reserved, size); + if ((uintptr_t)p % MI_SEGMENT_SIZE != 0) { // must be aligned + _mi_warning_message("huge page area was not aligned\n"); + _mi_os_free(p,size,&_mi_stats_main); + return NULL; + } + + if (psize != NULL) *psize = size; return p; } From 723fbba2596e663b6dac40da5e486c0ac52501f3 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 3 Nov 2019 12:18:20 -0800 Subject: [PATCH 013/179] fix output during preloading enabling stderr only after the crt has loaded --- src/options.c | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/src/options.c b/src/options.c index 32f13d54..3a7833a2 100644 --- a/src/options.c +++ b/src/options.c @@ -14,6 +14,8 @@ terms of the MIT license. A copy of the license can be found in the file #include // toupper #include +static void mi_add_stderr_output(); + int mi_version(void) mi_attr_noexcept { return MI_MALLOC_VERSION; } @@ -73,7 +75,9 @@ static mi_option_desc_t options[_mi_option_last] = static void mi_option_init(mi_option_desc_t* desc); void _mi_options_init(void) { - // called on process load + // called on process load; should not be called before the CRT is initialized! + // (e.g. do not call this from process_init as that may run before CRT initialization) + mi_add_stderr_output(); // now it safe to use stderr for output for(int i = 0; i < _mi_option_last; i++ ) { mi_option_t option = (mi_option_t)i; mi_option_get(option); // initialize @@ -135,7 +139,7 @@ static void mi_out_stderr(const char* msg) { #ifdef _WIN32 // on windows with redirection, the C runtime cannot handle locale dependent output // after the main thread closes so we use direct console output. - _cputs(msg); + if (!_mi_preloading()) { _cputs(msg); } #else fputs(msg, stderr); #endif @@ -166,23 +170,29 @@ static void mi_out_buf(const char* msg) { memcpy(&out_buf[start], msg, n); } -static void mi_out_buf_flush(mi_output_fun* out) { +static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf) { if (out==NULL) return; - // claim all (no more output will be added after this point) - size_t count = mi_atomic_addu(&out_len, MI_MAX_DELAY_OUTPUT); + // claim (if `no_more_buf == true`, no more output will be added after this point) + size_t count = mi_atomic_addu(&out_len, (no_more_buf ? MI_MAX_DELAY_OUTPUT : 1)); // and output the current contents if (count>MI_MAX_DELAY_OUTPUT) count = MI_MAX_DELAY_OUTPUT; out_buf[count] = 0; out(out_buf); + if (!no_more_buf) { + out_buf[count] = '\n'; // if continue with the buffer, insert a newline + } } -// The initial default output, outputs to stderr and the delayed output buffer. + +// Once this module is loaded, switch to this routine +// which outputs to stderr and the delayed output buffer. static void mi_out_buf_stderr(const char* msg) { mi_out_stderr(msg); mi_out_buf(msg); } + // -------------------------------------------------------- // Default output handler // -------------------------------------------------------- @@ -194,14 +204,19 @@ static mi_output_fun* volatile mi_out_default; // = NULL static mi_output_fun* mi_out_get_default(void) { mi_output_fun* out = mi_out_default; - return (out == NULL ? &mi_out_buf_stderr : out); + return (out == NULL ? &mi_out_buf : out); } void mi_register_output(mi_output_fun* out) mi_attr_noexcept { mi_out_default = (out == NULL ? &mi_out_stderr : out); // stop using the delayed output buffer - if (out!=NULL) mi_out_buf_flush(out); // output the delayed output now + if (out!=NULL) mi_out_buf_flush(out,true); // output all the delayed output now } +// add stderr to the delayed output after the module is loaded +static void mi_add_stderr_output() { + mi_out_buf_flush(&mi_out_stderr, false); // flush current contents to stderr + mi_out_default = &mi_out_buf_stderr; // and add stderr to the delayed output +} // -------------------------------------------------------- // Messages, all end up calling `_mi_fputs`. @@ -214,7 +229,7 @@ static volatile _Atomic(uintptr_t) error_count; // = 0; // when MAX_ERROR_COUNT static mi_decl_thread bool recurse = false; void _mi_fputs(mi_output_fun* out, const char* prefix, const char* message) { - if (_mi_preloading() || recurse) return; + if (recurse) return; if (out==NULL || (FILE*)out==stdout || (FILE*)out==stderr) out = mi_out_get_default(); recurse = true; if (prefix != NULL) out(prefix); @@ -228,7 +243,7 @@ void _mi_fputs(mi_output_fun* out, const char* prefix, const char* message) { static void mi_vfprintf( mi_output_fun* out, const char* prefix, const char* fmt, va_list args ) { char buf[512]; if (fmt==NULL) return; - if (_mi_preloading() || recurse) return; + if (recurse) return; recurse = true; vsnprintf(buf,sizeof(buf)-1,fmt,args); recurse = false; From e32048879183c2672db7d06138ca6f4eb80ebfa1 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 3 Nov 2019 12:18:32 -0800 Subject: [PATCH 014/179] add numa nodes to stats --- include/mimalloc-internal.h | 2 +- src/os.c | 7 +++++-- src/stats.c | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index b4d3351d..c28cf0fd 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -57,7 +57,7 @@ void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocat void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data size_t _mi_os_good_alloc_size(size_t size); int _mi_os_numa_node(mi_os_tld_t* tld); - +int _mi_os_numa_node_count(void); // memory.c void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* id, mi_os_tld_t* tld); diff --git a/src/os.c b/src/os.c index e1dc31f8..af3c440c 100644 --- a/src/os.c +++ b/src/os.c @@ -840,7 +840,8 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) } if (p == NULL) { - _mi_warning_message("failed to allocate huge OS pages (size %zu) (error %d)\n", size, GetLastError()); + DWORD winerr = GetLastError(); + _mi_warning_message("failed to allocate huge OS pages (size %zu) (windows error %d%s)\n", size, winerr, (winerr==1450 ? " (insufficient resources)" : "")); } return p; } @@ -981,12 +982,14 @@ static int mi_os_numa_node_countx(void) { int _mi_os_numa_node_count(void) { static int numa_node_count = 0; // cache the node count if (mi_unlikely(numa_node_count <= 0)) { - int ncount = mi_os_numa_node_countx(); + int ncount = mi_os_numa_node_countx(); + int ncount0 = ncount; // never more than max numa node and at least 1 int nmax = 1 + (int)mi_option_get(mi_option_max_numa_node); if (ncount > nmax) ncount = nmax; if (ncount <= 0) ncount = 1; numa_node_count = ncount; + _mi_verbose_message("using %i numa regions (%i nodes detected)\n", numa_node_count, ncount0); } mi_assert_internal(numa_node_count >= 1); return numa_node_count; diff --git a/src/stats.c b/src/stats.c index 50bd029d..79362cc4 100644 --- a/src/stats.c +++ b/src/stats.c @@ -265,7 +265,7 @@ static void _mi_stats_print(mi_stats_t* stats, double secs, mi_output_fun* out) mi_stat_counter_print(&stats->commit_calls, "commits", out); mi_stat_print(&stats->threads, "threads", -1, out); mi_stat_counter_print_avg(&stats->searches, "searches", out); - + _mi_fprintf(out, "%10s: %7i\n", "numa nodes", _mi_os_numa_node_count()); if (secs >= 0.0) _mi_fprintf(out, "%10s: %9.3f s\n", "elapsed", secs); double user_time; From f36ec5d9d8275777e05526468524dfd9d433164e Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 3 Nov 2019 13:16:07 -0800 Subject: [PATCH 015/179] reserve huge pages incrementally --- src/arena.c | 23 ++++++---- src/options.c | 1 - src/os.c | 120 ++++++++++++++++++++++++++++++++++---------------- 3 files changed, 96 insertions(+), 48 deletions(-) diff --git a/src/arena.c b/src/arena.c index 56b09859..24fd2114 100644 --- a/src/arena.c +++ b/src/arena.c @@ -27,7 +27,10 @@ with on-demand coalescing. void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld); //int _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** pstart, size_t* pages_reserved, size_t* psize) mi_attr_noexcept; void _mi_os_free(void* p, size_t size, mi_stats_t* stats); -void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, size_t* psize); + +void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, double max_secs, size_t* pages_reserved, size_t* psize); +void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats); + int _mi_os_numa_node_count(void); /* ----------------------------------------------------------- @@ -234,12 +237,12 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n void* p = mi_arena_alloc(arena, needed_bcount, is_zero, &block_index); if (p != NULL) { mi_assert_internal(block_index != SIZE_MAX); -#if MI_DEBUG>=1 + #if MI_DEBUG>=1 _Atomic(mi_block_info_t)* block = &arena->blocks[block_index]; mi_block_info_t binfo = mi_atomic_read(block); mi_assert_internal(mi_block_is_in_use(binfo)); mi_assert_internal(mi_block_count(binfo) >= needed_bcount); -#endif + #endif *memid = mi_memid_create(arena_index, block_index); *commit = true; // TODO: support commit on demand? *large = arena->is_large; @@ -382,18 +385,22 @@ static bool mi_arena_add(mi_arena_t* arena) { // reserve at a specific numa node int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept { - size_t hsize = 0; if (numa_node < -1) numa_node = -1; if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count(); - void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, &hsize); - if (p==NULL) return ENOMEM; - _mi_verbose_message("reserved %zu huge (1GiB) pages\n", pages); + size_t hsize = 0; + size_t pages_reserved = 0; + void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, (double)pages / 2.0, &pages_reserved, &hsize); + if (p==NULL || pages_reserved==0) { + _mi_warning_message("failed to reserve %zu gb huge pages\n", pages); + return ENOMEM; + } + _mi_verbose_message("reserved %zu gb huge pages\n", pages_reserved); size_t bcount = hsize / MI_ARENA_BLOCK_SIZE; size_t asize = sizeof(mi_arena_t) + (bcount*sizeof(mi_block_info_t)); // one too much mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS? if (arena == NULL) { - _mi_os_free(p, hsize, &_mi_stats_main); + _mi_os_free_huge_pages(p, hsize, &_mi_stats_main); return ENOMEM; } arena->block_count = bcount; diff --git a/src/options.c b/src/options.c index 3a7833a2..11d12187 100644 --- a/src/options.c +++ b/src/options.c @@ -221,7 +221,6 @@ static void mi_add_stderr_output() { // -------------------------------------------------------- // Messages, all end up calling `_mi_fputs`. // -------------------------------------------------------- -#define MAX_ERROR_COUNT (10) static volatile _Atomic(uintptr_t) error_count; // = 0; // when MAX_ERROR_COUNT stop emitting errors and warnings // When overriding malloc, we may recurse into mi_vfprintf if an allocation diff --git a/src/os.c b/src/os.c index af3c440c..5947333d 100644 --- a/src/os.c +++ b/src/os.c @@ -339,7 +339,8 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro lflags |= MAP_HUGETLB; #endif #ifdef MAP_HUGE_1GB - if ((size % GiB) == 0) { + static bool mi_huge_pages_available = true; + if ((size % GiB) == 0 && mi_huge_pages_available) { lflags |= MAP_HUGE_1GB; } else @@ -358,6 +359,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd); #ifdef MAP_HUGE_1GB if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) { + mi_huge_pages_available = false; // don't try huge 1GiB pages again _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error %i)\n", errno); lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB); p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd); @@ -799,11 +801,11 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) mi_win_enable_large_os_pages(); - void* p = NULL; #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) MEM_EXTENDED_PARAMETER params[3] = { {0,0},{0,0},{0,0} }; // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages - if (pNtAllocateVirtualMemoryEx != NULL) { + static bool mi_huge_pages_available = true; + if (pNtAllocateVirtualMemoryEx != NULL && mi_huge_pages_available) { #ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE #define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE (0x10) #endif @@ -822,7 +824,8 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) return base; } else { - // fall back to regular huge pages + // fall back to regular large pages + mi_huge_pages_available = false; // don't try further huge pages _mi_warning_message("unable to allocate using huge (1GiB) pages, trying large (2MiB) pages instead (status 0x%lx)\n", err); } } @@ -830,20 +833,11 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) if (pVirtualAlloc2 != NULL && numa_node >= 0) { params[0].Type = MemExtendedParameterNumaNode; params[0].ULong = (unsigned)numa_node; - p = (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1); + return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1); } - else #endif - // use regular virtual alloc on older windows - { - p = VirtualAlloc(addr, size, flags, PAGE_READWRITE); - } - - if (p == NULL) { - DWORD winerr = GetLastError(); - _mi_warning_message("failed to allocate huge OS pages (size %zu) (windows error %d%s)\n", size, winerr, (winerr==1450 ? " (insufficient resources)" : "")); - } - return p; + // otherwise use regular virtual alloc on older windows + return VirtualAlloc(addr, size, flags, PAGE_READWRITE); } #elif defined(MI_OS_USE_MMAP) && (MI_INTPTR_SIZE >= 8) @@ -880,44 +874,92 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) // To ensure proper alignment, use our own area for huge OS pages static _Atomic(uintptr_t) mi_huge_start; // = 0 -// Allocate MI_SEGMENT_SIZE aligned huge pages -void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, size_t* psize) { - if (psize != NULL) *psize = 0; +// Claim an aligned address range for huge pages +static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { + if (total_size != NULL) *total_size = 0; const size_t size = pages * MI_HUGE_OS_PAGE_SIZE; - // Find a new aligned address for the huge pages uintptr_t start = 0; uintptr_t end = 0; uintptr_t expected; do { - start = expected = mi_atomic_read_relaxed(&mi_huge_start); + start = expected = mi_atomic_read_relaxed(&mi_huge_start); if (start == 0) { // Initialize the start address after the 32TiB area - start = ((uintptr_t)32 << 40); // 32TiB virtual start address - #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode - uintptr_t r = _mi_random_init((uintptr_t)&_mi_os_alloc_huge_os_pages); + start = ((uintptr_t)32 << 40); // 32TiB virtual start address +#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode + uintptr_t r = _mi_random_init((uintptr_t)&mi_os_claim_huge_pages); start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF)); // (randomly 0-1024)*1GiB == 0 to 1TiB - #endif +#endif } end = start + size; mi_assert_internal(end % MI_SEGMENT_SIZE == 0); } while (!mi_atomic_cas_strong(&mi_huge_start, end, expected)); - // And allocate - void* p = mi_os_alloc_huge_os_pagesx((void*)start, size, numa_node); - if (p == NULL) { - return NULL; - } - _mi_stat_increase(&_mi_stats_main.committed, size); - _mi_stat_increase(&_mi_stats_main.reserved, size); - if ((uintptr_t)p % MI_SEGMENT_SIZE != 0) { // must be aligned - _mi_warning_message("huge page area was not aligned\n"); - _mi_os_free(p,size,&_mi_stats_main); - return NULL; - } + if (total_size != NULL) *total_size = size; + return (uint8_t*)start; +} + +// Allocate MI_SEGMENT_SIZE aligned huge pages +void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, double max_secs, size_t* pages_reserved, size_t* psize) { + if (psize != NULL) *psize = 0; + if (pages_reserved != NULL) *pages_reserved = 0; + size_t size = 0; + uint8_t* start = mi_os_claim_huge_pages(pages, &size); - if (psize != NULL) *psize = size; - return p; + // Allocate one page at the time but try to place them contiguously + // We allocate one page at the time to be able to abort if it takes too long + // or to at least allocate as many as available on the system. + double start_t = _mi_clock_start(); + size_t page; + for (page = 0; page < pages; page++) { + // allocate a page + bool is_large = true; + void* addr = start + (page * MI_HUGE_OS_PAGE_SIZE); + void* p = mi_os_alloc_huge_os_pagesx(addr, MI_HUGE_OS_PAGE_SIZE, numa_node); + + // Did we succeed at a contiguous address? + if (p != addr) { + // no success, issue a warning and break + if (p != NULL) { + _mi_warning_message("could not allocate contiguous huge page %zu at 0x%p\n", page, addr); + _mi_os_free(p, MI_HUGE_OS_PAGE_SIZE, &_mi_stats_main); + } + break; + } + + // success, record it + _mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE); + _mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE); + + // check for timeout + double elapsed = _mi_clock_end(start_t); + if (page >= 1) { + double estimate = ((elapsed / (double)(page+1)) * (double)pages); + if (estimate > 1.5*max_secs) { // seems like we are going to timeout, break + elapsed = max_secs + 1.0; + } + } + if (elapsed > max_secs) { + _mi_warning_message("huge page allocation timed out\n"); + break; + } + } + mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size); + if (pages_reserved != NULL) *pages_reserved = page; + if (psize != NULL) *psize = page * MI_HUGE_OS_PAGE_SIZE; + return (page == 0 ? NULL : start); +} + +// free every huge page in a range individually (as we allocated per page) +// note: needed with VirtualAlloc but could potentially be done in one go on mmap'd systems. +void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) { + if (p==NULL || size==0) return; + uint8_t* base = (uint8_t*)p; + while (size >= MI_HUGE_OS_PAGE_SIZE) { + _mi_os_free(base, MI_HUGE_OS_PAGE_SIZE, stats); + size -= MI_HUGE_OS_PAGE_SIZE; + } } /* ---------------------------------------------------------------------------- From 520a8dafee0747e1da8b220b28b35298f10512b2 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 3 Nov 2019 13:25:28 -0800 Subject: [PATCH 016/179] divide huge pages more even --- src/arena.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/arena.c b/src/arena.c index 24fd2114..95a102d1 100644 --- a/src/arena.c +++ b/src/arena.c @@ -385,6 +385,7 @@ static bool mi_arena_add(mi_arena_t* arena) { // reserve at a specific numa node int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept { + if (pages==0) return 0; if (numa_node < -1) numa_node = -1; if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count(); size_t hsize = 0; @@ -422,18 +423,20 @@ int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept { // pages per numa node int numa_count = _mi_os_numa_node_count(); if (numa_count <= 0) numa_count = 1; - size_t pages_per = pages / numa_count; - if (pages_per == 0) pages_per = 1; + const size_t pages_per = pages / numa_count; + const size_t pages_mod = pages % numa_count; // reserve evenly among numa nodes for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { - int err = mi_reserve_huge_os_pages_at((pages_per > pages ? pages : pages_per), numa_node); + size_t node_pages = pages_per; // can be 0 + if (numa_node < pages_mod) node_pages++; + int err = mi_reserve_huge_os_pages_at(node_pages, numa_node); if (err) return err; - if (pages < pages_per) { + if (pages < node_pages) { pages = 0; } else { - pages -= pages_per; + pages -= node_pages; } } From d1d65fbca4d037c5b9cc0838074804fde1f505c7 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 3 Nov 2019 13:25:41 -0800 Subject: [PATCH 017/179] make max error messages configurable --- include/mimalloc.h | 1 + src/options.c | 10 +++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index b155aca6..c03ddc1e 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -277,6 +277,7 @@ typedef enum mi_option_e { mi_option_segment_reset, mi_option_os_tag, mi_option_max_numa_node, + mi_option_max_errors, _mi_option_last } mi_option_t; diff --git a/src/options.c b/src/options.c index 11d12187..63b1612a 100644 --- a/src/options.c +++ b/src/options.c @@ -14,6 +14,8 @@ terms of the MIT license. A copy of the license can be found in the file #include // toupper #include +static uintptr_t mi_max_error_count = 16; // stop outputting errors after this + static void mi_add_stderr_output(); int mi_version(void) mi_attr_noexcept { @@ -69,7 +71,8 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose - { 256, UNINIT, MI_OPTION(max_numa_node) } // maximum allowed numa node + { 256, UNINIT, MI_OPTION(max_numa_node) }, // maximum allowed numa node + { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output }; static void mi_option_init(mi_option_desc_t* desc); @@ -86,6 +89,7 @@ void _mi_options_init(void) { _mi_verbose_message("option '%s': %ld\n", desc->name, desc->value); } } + mi_max_error_count = mi_option_get(mi_option_max_errors); } long mi_option_get(mi_option_t option) { @@ -275,7 +279,7 @@ void _mi_verbose_message(const char* fmt, ...) { void _mi_error_message(const char* fmt, ...) { if (!mi_option_is_enabled(mi_option_show_errors) && !mi_option_is_enabled(mi_option_verbose)) return; - if (mi_atomic_increment(&error_count) > MAX_ERROR_COUNT) return; + if (mi_atomic_increment(&error_count) > mi_max_error_count) return; va_list args; va_start(args,fmt); mi_vfprintf(NULL, "mimalloc: error: ", fmt, args); @@ -285,7 +289,7 @@ void _mi_error_message(const char* fmt, ...) { void _mi_warning_message(const char* fmt, ...) { if (!mi_option_is_enabled(mi_option_show_errors) && !mi_option_is_enabled(mi_option_verbose)) return; - if (mi_atomic_increment(&error_count) > MAX_ERROR_COUNT) return; + if (mi_atomic_increment(&error_count) > mi_max_error_count) return; va_list args; va_start(args,fmt); mi_vfprintf(NULL, "mimalloc: warning: ", fmt, args); From 9d6a5acb228db9cd4ae8f50ef2295e9b5d57e3c8 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 3 Nov 2019 13:34:54 -0800 Subject: [PATCH 018/179] fix unix build warnings --- CMakeLists.txt | 5 +++-- src/arena.c | 2 +- src/heap.c | 2 +- src/os.c | 1 - src/page.c | 2 +- src/segment.c | 6 ++++-- 6 files changed, 10 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1e96c237..12540f68 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,5 @@ cmake_minimum_required(VERSION 3.0) project(libmimalloc C CXX) -include("cmake/mimalloc-config-version.cmake") -include("CheckIncludeFile") set(CMAKE_C_STANDARD 11) set(CMAKE_CXX_STANDARD 17) @@ -15,6 +13,9 @@ option(MI_SECURE "Use security mitigations (like guard pages and rand option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanism (Unix)" OFF) option(MI_BUILD_TESTS "Build test executables" ON) +include("cmake/mimalloc-config-version.cmake") +include("CheckIncludeFile") + set(mi_install_dir "lib/mimalloc-${mi_version}") set(mi_sources diff --git a/src/arena.c b/src/arena.c index 95a102d1..08a36415 100644 --- a/src/arena.c +++ b/src/arena.c @@ -429,7 +429,7 @@ int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept { // reserve evenly among numa nodes for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { size_t node_pages = pages_per; // can be 0 - if (numa_node < pages_mod) node_pages++; + if ((size_t)numa_node < pages_mod) node_pages++; int err = mi_reserve_huge_os_pages_at(node_pages, numa_node); if (err) return err; if (pages < node_pages) { diff --git a/src/heap.c b/src/heap.c index 15c5d02a..162cf406 100644 --- a/src/heap.c +++ b/src/heap.c @@ -45,7 +45,7 @@ static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void } -#if MI_DEBUG>1 +#if MI_DEBUG>=3 static bool _mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) { UNUSED(arg1); UNUSED(arg2); diff --git a/src/os.c b/src/os.c index 5947333d..3f299362 100644 --- a/src/os.c +++ b/src/os.c @@ -914,7 +914,6 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, double max_secs, s size_t page; for (page = 0; page < pages; page++) { // allocate a page - bool is_large = true; void* addr = start + (page * MI_HUGE_OS_PAGE_SIZE); void* p = mi_os_alloc_huge_os_pagesx(addr, MI_HUGE_OS_PAGE_SIZE, numa_node); diff --git a/src/page.c b/src/page.c index f7fad764..32b68edb 100644 --- a/src/page.c +++ b/src/page.c @@ -38,7 +38,7 @@ static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_sta static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_stats_t* stats); -#if (MI_DEBUG>1) +#if (MI_DEBUG>=3) static size_t mi_page_list_count(mi_page_t* page, mi_block_t* head) { size_t count = 0; while (head != NULL) { diff --git a/src/segment.c b/src/segment.c index dcc6a04b..178e0eda 100644 --- a/src/segment.c +++ b/src/segment.c @@ -41,7 +41,7 @@ terms of the MIT license. A copy of the license can be found in the file ----------------------------------------------------------- */ -#if (MI_DEBUG>1) +#if (MI_DEBUG>=3) static bool mi_segment_queue_contains(const mi_segment_queue_t* queue, mi_segment_t* segment) { mi_assert_internal(segment != NULL); mi_segment_t* list = queue->first; @@ -111,7 +111,7 @@ static void mi_segment_insert_in_free_queue(mi_segment_t* segment, mi_segments_t Invariant checking ----------------------------------------------------------- */ -#if (MI_DEBUG > 1) +#if (MI_DEBUG>=2) static bool mi_segment_is_in_free_queue(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_segment_queue_t* queue = mi_segment_free_queue(segment, tld); bool in_queue = (queue!=NULL && (segment->next != NULL || segment->prev != NULL || queue->first == segment)); @@ -120,7 +120,9 @@ static bool mi_segment_is_in_free_queue(mi_segment_t* segment, mi_segments_tld_t } return in_queue; } +#endif +#if (MI_DEBUG>=3) static size_t mi_segment_pagesize(mi_segment_t* segment) { return ((size_t)1 << segment->page_shift); } From 8afd06b248f6a82763292821bf5096e35f6a5a0b Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 4 Nov 2019 08:44:40 -0800 Subject: [PATCH 019/179] use int64 for time (instead of double) --- include/mimalloc-internal.h | 7 ++- src/arena.c | 4 +- src/memory.c | 1 + src/os.c | 22 ++++++--- src/stats.c | 95 ++++++++++++++++++------------------- 5 files changed, 70 insertions(+), 59 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index c28cf0fd..413f76e6 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -106,8 +106,11 @@ uintptr_t _mi_heap_random(mi_heap_t* heap); // "stats.c" void _mi_stats_done(mi_stats_t* stats); -double _mi_clock_end(double start); -double _mi_clock_start(void); + +typedef int64_t mi_msecs_t; +mi_msecs_t _mi_clock_now(void); +mi_msecs_t _mi_clock_end(mi_msecs_t start); +mi_msecs_t _mi_clock_start(void); // "alloc.c" void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept; // called from `_mi_malloc_generic` diff --git a/src/arena.c b/src/arena.c index 08a36415..6faf7d3e 100644 --- a/src/arena.c +++ b/src/arena.c @@ -28,7 +28,7 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* lar //int _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** pstart, size_t* pages_reserved, size_t* psize) mi_attr_noexcept; void _mi_os_free(void* p, size_t size, mi_stats_t* stats); -void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, double max_secs, size_t* pages_reserved, size_t* psize); +void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize); void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats); int _mi_os_numa_node_count(void); @@ -390,7 +390,7 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept { if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count(); size_t hsize = 0; size_t pages_reserved = 0; - void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, (double)pages / 2.0, &pages_reserved, &hsize); + void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, pages*500, &pages_reserved, &hsize); if (p==NULL || pages_reserved==0) { _mi_warning_message("failed to reserve %zu gb huge pages\n", pages); return ENOMEM; diff --git a/src/memory.c b/src/memory.c index a425393c..75a1df92 100644 --- a/src/memory.c +++ b/src/memory.c @@ -564,6 +564,7 @@ void _mi_mem_collect(mi_stats_t* stats) { } } + /* ---------------------------------------------------------------------------- Other -----------------------------------------------------------------------------*/ diff --git a/src/os.c b/src/os.c index 3f299362..44ef9830 100644 --- a/src/os.c +++ b/src/os.c @@ -871,6 +871,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) } #endif +#if (MI_INTPTR_SIZE >= 8) // To ensure proper alignment, use our own area for huge OS pages static _Atomic(uintptr_t) mi_huge_start; // = 0 @@ -899,18 +900,25 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { if (total_size != NULL) *total_size = size; return (uint8_t*)start; } +#else +static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { + if (total_size != NULL) *total_size = 0; + return NULL; +} +#endif // Allocate MI_SEGMENT_SIZE aligned huge pages -void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, double max_secs, size_t* pages_reserved, size_t* psize) { +void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize) { if (psize != NULL) *psize = 0; if (pages_reserved != NULL) *pages_reserved = 0; size_t size = 0; uint8_t* start = mi_os_claim_huge_pages(pages, &size); + if (start == NULL) return NULL; // or 32-bit systems // Allocate one page at the time but try to place them contiguously // We allocate one page at the time to be able to abort if it takes too long // or to at least allocate as many as available on the system. - double start_t = _mi_clock_start(); + mi_msecs_t start_t = _mi_clock_start(); size_t page; for (page = 0; page < pages; page++) { // allocate a page @@ -932,14 +940,14 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, double max_secs, s _mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE); // check for timeout - double elapsed = _mi_clock_end(start_t); + mi_msecs_t elapsed = _mi_clock_end(start_t); if (page >= 1) { - double estimate = ((elapsed / (double)(page+1)) * (double)pages); - if (estimate > 1.5*max_secs) { // seems like we are going to timeout, break - elapsed = max_secs + 1.0; + mi_msecs_t estimate = ((elapsed / (page+1)) * pages); + if (estimate > 2*max_msecs) { // seems like we are going to timeout, break + elapsed = max_msecs + 1; } } - if (elapsed > max_secs) { + if (elapsed > max_msecs) { _mi_warning_message("huge page allocation timed out\n"); break; } diff --git a/src/stats.c b/src/stats.c index 79362cc4..a1248043 100644 --- a/src/stats.c +++ b/src/stats.c @@ -231,9 +231,9 @@ static void mi_stats_print_bins(mi_stat_count_t* all, const mi_stat_count_t* bin #endif -static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit); +static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit); -static void _mi_stats_print(mi_stats_t* stats, double secs, mi_output_fun* out) mi_attr_noexcept { +static void _mi_stats_print(mi_stats_t* stats, mi_msecs_t elapsed, mi_output_fun* out) mi_attr_noexcept { mi_print_header(out); #if MI_STAT>1 mi_stat_count_t normal = { 0,0,0,0 }; @@ -266,16 +266,16 @@ static void _mi_stats_print(mi_stats_t* stats, double secs, mi_output_fun* out) mi_stat_print(&stats->threads, "threads", -1, out); mi_stat_counter_print_avg(&stats->searches, "searches", out); _mi_fprintf(out, "%10s: %7i\n", "numa nodes", _mi_os_numa_node_count()); - if (secs >= 0.0) _mi_fprintf(out, "%10s: %9.3f s\n", "elapsed", secs); + if (elapsed > 0) _mi_fprintf(out, "%10s: %7ld.%03ld s\n", "elapsed", elapsed/1000, elapsed%1000); - double user_time; - double sys_time; + mi_msecs_t user_time; + mi_msecs_t sys_time; size_t peak_rss; size_t page_faults; size_t page_reclaim; size_t peak_commit; mi_process_info(&user_time, &sys_time, &peak_rss, &page_faults, &page_reclaim, &peak_commit); - _mi_fprintf(out,"%10s: user: %.3f s, system: %.3f s, faults: %lu, reclaims: %lu, rss: ", "process", user_time, sys_time, (unsigned long)page_faults, (unsigned long)page_reclaim ); + _mi_fprintf(out,"%10s: user: %ld.%03ld s, system: %ld.%03ld s, faults: %lu, reclaims: %lu, rss: ", "process", user_time/1000, user_time%1000, sys_time/1000, sys_time%1000, (unsigned long)page_faults, (unsigned long)page_reclaim ); mi_printf_amount((int64_t)peak_rss, 1, out, "%s"); if (peak_commit > 0) { _mi_fprintf(out,", commit charge: "); @@ -284,9 +284,7 @@ static void _mi_stats_print(mi_stats_t* stats, double secs, mi_output_fun* out) _mi_fprintf(out,"\n"); } -double _mi_clock_end(double start); -double _mi_clock_start(void); -static double mi_time_start = 0.0; +static mi_msecs_t mi_time_start; // = 0 static mi_stats_t* mi_stats_get_default(void) { mi_heap_t* heap = mi_heap_get_default(); @@ -316,71 +314,72 @@ void _mi_stats_done(mi_stats_t* stats) { // called from `mi_thread_done` } -static void mi_stats_print_ex(mi_stats_t* stats, double secs, mi_output_fun* out) { +static void mi_stats_print_ex(mi_stats_t* stats, mi_msecs_t elapsed, mi_output_fun* out) { mi_stats_merge_from(stats); - _mi_stats_print(&_mi_stats_main, secs, out); + _mi_stats_print(&_mi_stats_main, elapsed, out); } void mi_stats_print(mi_output_fun* out) mi_attr_noexcept { - mi_stats_print_ex(mi_stats_get_default(),_mi_clock_end(mi_time_start),out); + mi_msecs_t elapsed = _mi_clock_end(mi_time_start); + mi_stats_print_ex(mi_stats_get_default(),elapsed,out); } void mi_thread_stats_print(mi_output_fun* out) mi_attr_noexcept { - _mi_stats_print(mi_stats_get_default(), _mi_clock_end(mi_time_start), out); + mi_msecs_t elapsed = _mi_clock_end(mi_time_start); + _mi_stats_print(mi_stats_get_default(), elapsed, out); } - -// -------------------------------------------------------- -// Basic timer for convenience -// -------------------------------------------------------- - +// ---------------------------------------------------------------- +// Basic timer for convenience; use milli-seconds to avoid doubles +// ---------------------------------------------------------------- #ifdef _WIN32 #include -static double mi_to_seconds(LARGE_INTEGER t) { - static double freq = 0.0; - if (freq <= 0.0) { +static mi_msecs_t mi_to_msecs(LARGE_INTEGER t) { + static LARGE_INTEGER mfreq; // = 0 + if (mfreq.QuadPart == 0LL) { LARGE_INTEGER f; QueryPerformanceFrequency(&f); - freq = (double)(f.QuadPart); + mfreq.QuadPart = f.QuadPart/1000LL; + if (mfreq.QuadPart == 0) mfreq.QuadPart = 1; } - return ((double)(t.QuadPart) / freq); + return (mi_msecs_t)(t.QuadPart / mfreq.QuadPart); } -static double mi_clock_now(void) { +mi_msecs_t _mi_clock_now(void) { LARGE_INTEGER t; QueryPerformanceCounter(&t); - return mi_to_seconds(t); + return mi_to_msecs(t); } #else #include #ifdef CLOCK_REALTIME -static double mi_clock_now(void) { +mi_msecs_t _mi_clock_now(void) { struct timespec t; clock_gettime(CLOCK_REALTIME, &t); - return (double)t.tv_sec + (1.0e-9 * (double)t.tv_nsec); + return ((mi_msecs_t)t.tv_sec * 1000) + ((mi_msecs_t)t.tv_nsec / 1000000); } #else // low resolution timer -static double mi_clock_now(void) { - return ((double)clock() / (double)CLOCKS_PER_SEC); +mi_msecs_t _mi_clock_now(void) { + return ((mi_msecs_t)clock() / ((mi_msecs_t)CLOCKS_PER_SEC / 1000)); } #endif #endif -static double mi_clock_diff = 0.0; +static mi_msecs_t mi_clock_diff; -double _mi_clock_start(void) { +mi_msecs_t _mi_clock_start(void) { if (mi_clock_diff == 0.0) { - double t0 = mi_clock_now(); - mi_clock_diff = mi_clock_now() - t0; + mi_msecs_t t0 = _mi_clock_now(); + mi_clock_diff = _mi_clock_now() - t0; } - return mi_clock_now(); + return _mi_clock_now(); } -double _mi_clock_end(double start) { - double end = mi_clock_now(); +mi_msecs_t _mi_clock_end(mi_msecs_t start) { + mi_msecs_t end = _mi_clock_now(); return (end - start - mi_clock_diff); } @@ -394,21 +393,21 @@ double _mi_clock_end(double start) { #include #pragma comment(lib,"psapi.lib") -static double filetime_secs(const FILETIME* ftime) { +static mi_msecs_t filetime_msecs(const FILETIME* ftime) { ULARGE_INTEGER i; i.LowPart = ftime->dwLowDateTime; i.HighPart = ftime->dwHighDateTime; - double secs = (double)(i.QuadPart) * 1.0e-7; // FILETIME is in 100 nano seconds - return secs; + mi_msecs_t msecs = (i.QuadPart / 10000); // FILETIME is in 100 nano seconds + return msecs; } -static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) { +static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) { FILETIME ct; FILETIME ut; FILETIME st; FILETIME et; GetProcessTimes(GetCurrentProcess(), &ct, &et, &st, &ut); - *utime = filetime_secs(&ut); - *stime = filetime_secs(&st); + *utime = filetime_msecs(&ut); + *stime = filetime_msecs(&st); PROCESS_MEMORY_COUNTERS info; GetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info)); @@ -427,11 +426,11 @@ static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size #include #endif -static double timeval_secs(const struct timeval* tv) { - return (double)tv->tv_sec + ((double)tv->tv_usec * 1.0e-6); +static mi_msecs_t timeval_secs(const struct timeval* tv) { + return ((mi_msecs_t)tv->tv_sec * 1000L) + ((mi_msecs_t)tv->tv_usec / 1000L); } -static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) { +static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) { struct rusage rusage; getrusage(RUSAGE_SELF, &rusage); #if defined(__APPLE__) && defined(__MACH__) @@ -452,12 +451,12 @@ static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size #pragma message("define a way to get process info") #endif -static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) { +static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) { *peak_rss = 0; *page_faults = 0; *page_reclaim = 0; *peak_commit = 0; - *utime = 0.0; - *stime = 0.0; + *utime = 0; + *stime = 0; } #endif From 3d0a1e249fa113e93792838a00a7acd9fc98aa34 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 4 Nov 2019 09:40:10 -0800 Subject: [PATCH 020/179] remove all floating point types and arithmetic --- src/arena.c | 1 - src/init.c | 3 +-- src/stats.c | 32 +++++++++++++++++++------------- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/src/arena.c b/src/arena.c index 6faf7d3e..e58d2c47 100644 --- a/src/arena.c +++ b/src/arena.c @@ -25,7 +25,6 @@ with on-demand coalescing. // os.c void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld); -//int _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** pstart, size_t* pages_reserved, size_t* psize) mi_attr_noexcept; void _mi_os_free(void* p, size_t size, mi_stats_t* stats); void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize); diff --git a/src/init.c b/src/init.c index ed15aeba..ef848de4 100644 --- a/src/init.c +++ b/src/init.c @@ -433,8 +433,7 @@ static void mi_process_load(void) { } if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { - size_t pages = mi_option_get(mi_option_reserve_huge_os_pages); - // double max_secs = (double)pages / 2.0; // 0.5s per page (1GiB) + size_t pages = mi_option_get(mi_option_reserve_huge_os_pages); mi_reserve_huge_os_pages_interleave(pages); } } diff --git a/src/stats.c b/src/stats.c index a1248043..011fab64 100644 --- a/src/stats.c +++ b/src/stats.c @@ -130,19 +130,23 @@ static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, const char buf[32]; int len = 32; const char* suffix = (unit <= 0 ? " " : "b"); - double base = (unit == 0 ? 1000.0 : 1024.0); + const int64_t base = (unit == 0 ? 1000 : 1024); if (unit>0) n *= unit; - double pos = (double)(n < 0 ? -n : n); - if (pos < base) - snprintf(buf,len, "%d %s ", (int)n, suffix); - else if (pos < base*base) - snprintf(buf, len, "%.1f k%s", (double)n / base, suffix); - else if (pos < base*base*base) - snprintf(buf, len, "%.1f m%s", (double)n / (base*base), suffix); - else - snprintf(buf, len, "%.1f g%s", (double)n / (base*base*base), suffix); - + const int64_t pos = (n < 0 ? -n : n); + if (pos < base) { + snprintf(buf, len, "%d %s ", (int)n, suffix); + } + else { + int64_t divider = base; + const char* magnitude = "k"; + if (pos >= divider*base) { divider *= base; magnitude = "m"; } + if (pos >= divider*base) { divider *= base; magnitude = "g"; } + const int64_t tens = (n / (divider/10)); + const long whole = (long)(tens/10); + const long frac1 = (long)(tens%10); + snprintf(buf, len, "%ld.%ld %s%s", whole, frac1, magnitude, suffix); + } _mi_fprintf(out, (fmt==NULL ? "%11s" : fmt), buf); } @@ -199,8 +203,10 @@ static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg } static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out) { - double avg = (stat->count == 0 ? 0.0 : (double)stat->total / (double)stat->count); - _mi_fprintf(out, "%10s: %7.1f avg\n", msg, avg); + const int64_t avg_tens = (stat->count == 0 ? 0 : (stat->total*10 / stat->count)); + const long avg_whole = (long)(avg_tens/10); + const long avg_frac1 = (long)(avg_tens%10); + _mi_fprintf(out, "%10s: %5ld.%ld avg %ld %ld\n", msg, avg_whole, avg_frac1); } From 829fd872f407c5e201cd844b8f26f2c87915e89b Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 4 Nov 2019 11:48:41 -0800 Subject: [PATCH 021/179] initial delay slots --- include/mimalloc-internal.h | 11 ++- include/mimalloc-types.h | 26 +++++-- include/mimalloc.h | 1 + src/heap.c | 2 +- src/init.c | 4 +- src/memory.c | 143 +++++++++++++++++++++++++++++++----- src/options.c | 1 + src/segment.c | 31 ++++---- src/stats.c | 2 +- 9 files changed, 171 insertions(+), 50 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 413f76e6..25a3d93d 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -61,15 +61,15 @@ int _mi_os_numa_node_count(void); // memory.c void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* id, mi_os_tld_t* tld); -void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats); +void _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld); -bool _mi_mem_reset(void* p, size_t size, mi_stats_t* stats); -bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); -bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); +bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld); +bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld); +bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld); bool _mi_mem_protect(void* addr, size_t size); bool _mi_mem_unprotect(void* addr, size_t size); -void _mi_mem_collect(mi_stats_t* stats); +void _mi_mem_collect(mi_os_tld_t* tld); // "segment.c" mi_page_t* _mi_segment_page_alloc(size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); @@ -107,7 +107,6 @@ uintptr_t _mi_heap_random(mi_heap_t* heap); // "stats.c" void _mi_stats_done(mi_stats_t* stats); -typedef int64_t mi_msecs_t; mi_msecs_t _mi_clock_now(void); mi_msecs_t _mi_clock_end(mi_msecs_t start); mi_msecs_t _mi_clock_start(void); diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 99b6b22b..8a3ffff4 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -385,6 +385,19 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount); #define mi_heap_stat_decrease(heap,stat,amount) mi_stat_decrease( (heap)->tld->stats.stat, amount) +// ------------------------------------------------------ +// Delay slots (to avoid expensive OS calls) +// ------------------------------------------------------ +typedef int64_t mi_msecs_t; + +typedef struct mi_delay_slot_s { + mi_msecs_t expire; + uint8_t* addr; + size_t size; +} mi_delay_slot_t; + +#define MI_RESET_DELAY_SLOTS (128) + // ------------------------------------------------------ // Thread Local data // ------------------------------------------------------ @@ -395,6 +408,12 @@ typedef struct mi_segment_queue_s { mi_segment_t* last; } mi_segment_queue_t; +// OS thread local data +typedef struct mi_os_tld_s { + size_t region_idx; // start point for next allocation + mi_stats_t* stats; // points to tld stats + mi_delay_slot_t reset_delay[MI_RESET_DELAY_SLOTS]; +} mi_os_tld_t; // Segments thread local data typedef struct mi_segments_tld_s { @@ -408,14 +427,9 @@ typedef struct mi_segments_tld_s { size_t cache_size; // total size of all segments in the cache mi_segment_t* cache; // (small) cache of segments mi_stats_t* stats; // points to tld stats + mi_os_tld_t* os; // points to os stats } mi_segments_tld_t; -// OS thread local data -typedef struct mi_os_tld_s { - size_t region_idx; // start point for next allocation - mi_stats_t* stats; // points to tld stats -} mi_os_tld_t; - // Thread local data struct mi_tld_s { unsigned long long heartbeat; // monotonic heartbeat count diff --git a/include/mimalloc.h b/include/mimalloc.h index c03ddc1e..e6fa9c2b 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -275,6 +275,7 @@ typedef enum mi_option_e { mi_option_reset_decommits, mi_option_eager_commit_delay, mi_option_segment_reset, + mi_option_reset_delay, mi_option_os_tag, mi_option_max_numa_node, mi_option_max_errors, diff --git a/src/heap.c b/src/heap.c index 162cf406..d03925d5 100644 --- a/src/heap.c +++ b/src/heap.c @@ -149,7 +149,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) // collect regions if (collect >= FORCE && _mi_is_main_thread()) { - _mi_mem_collect(&heap->tld->stats); + _mi_mem_collect(&heap->tld->os); } } diff --git a/src/init.c b/src/init.c index ef848de4..971a93c0 100644 --- a/src/init.c +++ b/src/init.c @@ -94,11 +94,12 @@ mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; #define tld_main_stats ((mi_stats_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,stats))) +#define tld_main_os ((mi_os_tld_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,os))) static mi_tld_t tld_main = { 0, false, &_mi_heap_main, - { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments + { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats, tld_main_os }, // segments { 0, tld_main_stats }, // os { MI_STATS_NULL } // stats }; @@ -218,6 +219,7 @@ static bool _mi_heap_init(void) { memset(tld, 0, sizeof(*tld)); tld->heap_backing = heap; tld->segments.stats = &tld->stats; + tld->segments.os = &tld->os; tld->os.stats = &tld->stats; _mi_heap_default = heap; } diff --git a/src/memory.c b/src/memory.c index 75a1df92..e12405c1 100644 --- a/src/memory.c +++ b/src/memory.c @@ -53,6 +53,9 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats); void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); +// local +static bool mi_delay_remove(mi_delay_slot_t* slots, size_t count, void* p, size_t size); + // Constants #if (MI_INTPTR_SIZE==8) @@ -470,16 +473,19 @@ Free -----------------------------------------------------------------------------*/ // Free previously allocated memory with a given id. -void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { - mi_assert_internal(size > 0 && stats != NULL); +void _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld) { + mi_assert_internal(size > 0 && tld != NULL); if (p==NULL) return; if (size==0) return; + + mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size); + size_t arena_memid = 0; size_t idx = 0; size_t bitidx = 0; if (mi_memid_indices(id,&idx,&bitidx,&arena_memid)) { // was a direct arena allocation, pass through - _mi_arena_free(p, size, arena_memid, stats); + _mi_arena_free(p, size, arena_memid, tld->stats); } else { // allocated in a region @@ -512,14 +518,14 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { (mi_option_is_enabled(mi_option_eager_commit) || // cannot reset halfway committed segments, use `option_page_reset` instead mi_option_is_enabled(mi_option_reset_decommits))) // but we can decommit halfway committed segments { - _mi_os_reset(p, size, stats); + _mi_os_reset(p, size, tld->stats); // cannot use delay reset! (due to concurrent allocation in the same region) //_mi_os_decommit(p, size, stats); // todo: and clear dirty bits? } } } if (!is_eager_committed) { // adjust commit statistics as we commit again when re-using the same slot - _mi_stat_decrease(&stats->committed, mi_good_commit_size(size)); + _mi_stat_decrease(&tld->stats->committed, mi_good_commit_size(size)); } // TODO: should we free empty regions? currently only done _mi_mem_collect. @@ -539,7 +545,7 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { /* ---------------------------------------------------------------------------- collection -----------------------------------------------------------------------------*/ -void _mi_mem_collect(mi_stats_t* stats) { +void _mi_mem_collect(mi_os_tld_t* tld) { // free every region that has no segments in use. for (size_t i = 0; i < regions_count; i++) { mem_region_t* region = ®ions[i]; @@ -554,7 +560,8 @@ void _mi_mem_collect(mi_stats_t* stats) { bool is_eager_committed; void* start = mi_region_info_read(mi_atomic_read(®ion->info), NULL, &is_eager_committed); if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { - _mi_arena_free(start, MI_REGION_SIZE, region->arena_memid, stats); + mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, start, MI_REGION_SIZE); + _mi_arena_free(start, MI_REGION_SIZE, region->arena_memid, tld->stats); } // and release mi_atomic_write(®ion->info,0); @@ -564,25 +571,123 @@ void _mi_mem_collect(mi_stats_t* stats) { } } +/* ---------------------------------------------------------------------------- + Delay slots +-----------------------------------------------------------------------------*/ + +typedef void (mi_delay_resolve_fun)(void* addr, size_t size, void* arg); + +static void mi_delay_insert(mi_delay_slot_t* slots, size_t count, + mi_msecs_t delay, uint8_t* addr, size_t size, + mi_delay_resolve_fun* resolve, void* arg) +{ + if (delay==0) { + resolve(addr, size, arg); + return; + } + + mi_msecs_t now = _mi_clock_now(); + mi_delay_slot_t* oldest = slots; + // walk through all slots, resolving expired ones. + // remember the oldest slot to insert the new entry in. + for (size_t i = 0; i < count; i++) { + mi_delay_slot_t* slot = &slots[i]; + + if (slot->expire == 0) { + // empty slot + oldest = slot; + } + // TODO: should we handle overlapping areas too? + else if (slot->addr <= addr && slot->addr + slot->size >= addr + size) { + // earlier slot encompasses new area, increase expiration + slot->expire = now + delay; + delay = 0; + } + else if (addr <= slot->addr && addr + size >= slot->addr + slot->size) { + // new one encompasses old slot, overwrite + slot->expire = now + delay; + slot->addr = addr; + slot->size = size; + delay = 0; + } + else if (slot->expire < now) { + // expired slot, resolve now + slot->expire = 0; + resolve(slot->addr, slot->size, arg); + } + else if (oldest->expire > slot->expire) { + oldest = slot; + } + } + if (delay>0) { + // not yet registered, use the oldest slot + if (oldest->expire > 0) { + resolve(oldest->addr, oldest->size, arg); // evict if not empty + } + oldest->expire = now + delay; + oldest->addr = addr; + oldest->size = size; + } +} + +static bool mi_delay_remove(mi_delay_slot_t* slots, size_t count, void* p, size_t size) +{ + uint8_t* addr = (uint8_t*)p; + bool done = false; + // walk through all slots + for (size_t i = 0; i < count; i++) { + mi_delay_slot_t* slot = &slots[i]; + if (slot->addr <= addr && slot->addr + slot->size >= addr + size) { + // earlier slot encompasses the area; remove it + slot->expire = 0; + done = true; + } + else if (addr <= slot->addr && addr + size >= slot->addr + slot->size) { + // new one encompasses old slot, remove it + slot->expire = 0; + } + else if ((addr <= slot->addr && addr + size > slot->addr) || + (addr < slot->addr + slot->size && addr + size >= slot->addr + slot->size)) { + // partial overlap, remove slot + mi_assert_internal(false); + slot->expire = 0; + } + } + return done; +} + +static void mi_resolve_reset(void* p, size_t size, void* vtld) { + mi_os_tld_t* tld = (mi_os_tld_t*)vtld; + _mi_os_reset(p, size, tld->stats); +} + +bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) { + mi_delay_insert(tld->reset_delay, MI_RESET_DELAY_SLOTS, mi_option_get(mi_option_reset_delay), + (uint8_t*)p, size, &mi_resolve_reset, tld); + return true; +} + +bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { + if (!mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, (uint8_t*)p, size)) { + return _mi_os_unreset(p, size, is_zero, tld->stats); + } + return true; +} + + /* ---------------------------------------------------------------------------- Other -----------------------------------------------------------------------------*/ -bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats) { - return _mi_os_commit(p, size, is_zero, stats); +bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { + mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size); + return _mi_os_commit(p, size, is_zero, tld->stats); } -bool _mi_mem_decommit(void* p, size_t size, mi_stats_t* stats) { - return _mi_os_decommit(p, size, stats); -} - -bool _mi_mem_reset(void* p, size_t size, mi_stats_t* stats) { - return _mi_os_reset(p, size, stats); -} - -bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats) { - return _mi_os_unreset(p, size, is_zero, stats); +bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld) { + mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size); + return _mi_os_decommit(p, size, tld->stats); } bool _mi_mem_protect(void* p, size_t size) { diff --git a/src/options.c b/src/options.c index 63b1612a..e098af0b 100644 --- a/src/options.c +++ b/src/options.c @@ -70,6 +70,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(reset_decommits) }, // note: cannot enable this if secure is on { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) + { 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose { 256, UNINIT, MI_OPTION(max_numa_node) }, // maximum allowed numa node { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output diff --git a/src/segment.c b/src/segment.c index 178e0eda..b9abe2b3 100644 --- a/src/segment.c +++ b/src/segment.c @@ -234,7 +234,7 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se mi_assert_internal(!segment->mem_is_fixed); _mi_mem_unprotect(segment, segment->segment_size); // ensure no more guard pages are set } - _mi_mem_free(segment, segment_size, segment->memid, tld->stats); + _mi_mem_free(segment, segment_size, segment->memid, tld->os); } @@ -281,7 +281,7 @@ static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld) } mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE); if (!segment->mem_is_fixed && mi_option_is_enabled(mi_option_cache_reset)) { - _mi_mem_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size, tld->stats); + _mi_mem_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size, tld->os); } segment->next = tld->cache; tld->cache = segment; @@ -346,13 +346,13 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, } if (!segment->mem_is_committed && page_kind > MI_PAGE_MEDIUM) { mi_assert_internal(!segment->mem_is_fixed); - _mi_mem_commit(segment, segment->segment_size, &is_zero, tld->stats); + _mi_mem_commit(segment, segment->segment_size, &is_zero, tld->os); segment->mem_is_committed = true; } if (!segment->mem_is_fixed && (mi_option_is_enabled(mi_option_cache_reset) || mi_option_is_enabled(mi_option_page_reset))) { bool reset_zero = false; - _mi_mem_unreset(segment, segment->segment_size, &reset_zero, tld->stats); + _mi_mem_unreset(segment, segment->segment_size, &reset_zero, tld->os); if (reset_zero) is_zero = true; } } @@ -365,7 +365,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, if (!commit) { // ensure the initial info is committed bool commit_zero = false; - _mi_mem_commit(segment, info_size, &commit_zero, tld->stats); + _mi_mem_commit(segment, info_size, &commit_zero, tld->os); if (commit_zero) is_zero = true; } segment->memid = memid; @@ -459,7 +459,7 @@ static bool mi_segment_has_free(const mi_segment_t* segment) { return (segment->used < segment->capacity); } -static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_stats_t* stats) { +static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(mi_segment_has_free(segment)); mi_assert_expensive(mi_segment_is_valid(segment)); for (size_t i = 0; i < segment->capacity; i++) { @@ -472,14 +472,14 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_stats_t* stats) mi_assert_internal(!segment->mem_is_fixed); page->is_committed = true; bool is_zero = false; - _mi_mem_commit(start,psize,&is_zero,stats); + _mi_mem_commit(start,psize,&is_zero,tld->os); if (is_zero) page->is_zero_init = true; } if (page->is_reset) { mi_assert_internal(!segment->mem_is_fixed); page->is_reset = false; bool is_zero = false; - _mi_mem_unreset(start, psize, &is_zero, stats); + _mi_mem_unreset(start, psize, &is_zero, tld->os); if (is_zero) page->is_zero_init = true; } } @@ -497,21 +497,20 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_stats_t* stats) static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld); -static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_stats_t* stats) { - UNUSED(stats); +static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { mi_assert_internal(page->segment_in_use); mi_assert_internal(mi_page_all_free(page)); mi_assert_internal(page->is_committed); size_t inuse = page->capacity * page->block_size; - _mi_stat_decrease(&stats->page_committed, inuse); - _mi_stat_decrease(&stats->pages, 1); + _mi_stat_decrease(&tld->stats->page_committed, inuse); + _mi_stat_decrease(&tld->stats->pages, 1); // reset the page memory to reduce memory pressure? if (!segment->mem_is_fixed && !page->is_reset && mi_option_is_enabled(mi_option_page_reset)) { size_t psize; uint8_t* start = _mi_page_start(segment, page, &psize); page->is_reset = true; - _mi_mem_reset(start, psize, stats); + _mi_mem_reset(start, psize, tld->os); } // zero the page data, but not the segment fields @@ -529,7 +528,7 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) mi_assert_expensive(mi_segment_is_valid(segment)); // mark it as free now - mi_segment_page_clear(segment, page, tld->stats); + mi_segment_page_clear(segment, page, tld); if (segment->used == 0) { // no more used pages; remove from the free list and free the segment @@ -634,7 +633,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen _mi_stat_decrease(&tld->stats->pages_abandoned, 1); if (mi_page_all_free(page)) { // if everything free by now, free the page - mi_segment_page_clear(segment,page,tld->stats); + mi_segment_page_clear(segment,page,tld); } else { // otherwise reclaim it @@ -666,7 +665,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen // Requires that the page has free pages static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(mi_segment_has_free(segment)); - mi_page_t* page = mi_segment_find_free(segment, tld->stats); + mi_page_t* page = mi_segment_find_free(segment, tld); page->segment_in_use = true; segment->used++; mi_assert_internal(segment->used <= segment->capacity); diff --git a/src/stats.c b/src/stats.c index 011fab64..cb6d8866 100644 --- a/src/stats.c +++ b/src/stats.c @@ -206,7 +206,7 @@ static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* const int64_t avg_tens = (stat->count == 0 ? 0 : (stat->total*10 / stat->count)); const long avg_whole = (long)(avg_tens/10); const long avg_frac1 = (long)(avg_tens%10); - _mi_fprintf(out, "%10s: %5ld.%ld avg %ld %ld\n", msg, avg_whole, avg_frac1); + _mi_fprintf(out, "%10s: %5ld.%ld avg\n", msg, avg_whole, avg_frac1); } From 288726606390edb4ffb9664b9bce0271516b550d Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 6 Nov 2019 14:17:36 -0800 Subject: [PATCH 022/179] optimize get numa node for single node systems --- src/os.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/os.c b/src/os.c index 44ef9830..254f85f1 100644 --- a/src/os.c +++ b/src/os.c @@ -1046,9 +1046,10 @@ int _mi_os_numa_node_count(void) { int _mi_os_numa_node(mi_os_tld_t* tld) { UNUSED(tld); - int numa_node = mi_os_numa_nodex(); - // never more than the node count and >= 0 int numa_count = _mi_os_numa_node_count(); + if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0 + // never more than the node count and >= 0 + int numa_node = mi_os_numa_nodex(); if (numa_node >= numa_count) { numa_node = numa_node % numa_count; } if (numa_node < 0) numa_node = 0; return numa_node; From 00e19cad9abd225bb4c0975c4f9b6e440a81b97c Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 6 Nov 2019 21:37:23 -0800 Subject: [PATCH 023/179] refactor region code, split out atomic bitmap --- ide/vs2019/mimalloc-override.vcxproj | 2 +- ide/vs2019/mimalloc.vcxproj | 3 +- include/mimalloc-atomic.h | 31 ++- src/bitmap.inc.c | 160 +++++++++++++ src/memory.c | 339 ++++++++++----------------- 5 files changed, 318 insertions(+), 217 deletions(-) create mode 100644 src/bitmap.inc.c diff --git a/ide/vs2019/mimalloc-override.vcxproj b/ide/vs2019/mimalloc-override.vcxproj index 09fd37fb..e1c7535c 100644 --- a/ide/vs2019/mimalloc-override.vcxproj +++ b/ide/vs2019/mimalloc-override.vcxproj @@ -123,7 +123,7 @@ true true ../../include - MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions); + MI_DEBUG=3;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions); MultiThreadedDebugDLL false Default diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index 1fabff5e..19696c10 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -116,7 +116,7 @@ true true ../../include - MI_DEBUG=1;%(PreprocessorDefinitions); + MI_DEBUG=3;%(PreprocessorDefinitions); CompileAsCpp false stdcpp17 @@ -218,6 +218,7 @@ + diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h index dff0f011..c18f990f 100644 --- a/include/mimalloc-atomic.h +++ b/include/mimalloc-atomic.h @@ -36,6 +36,13 @@ static inline void mi_atomic_add64(volatile int64_t* p, int64_t add); // Atomically add a value; returns the previous value. Memory ordering is relaxed. static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add); +// Atomically "and" a value; returns the previous value. Memory ordering is relaxed. +static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x); + +// Atomically "or" a value; returns the previous value. Memory ordering is relaxed. +static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x); + + // Atomically compare and exchange a value; returns `true` if successful. // May fail spuriously. Memory ordering as release on success, and relaxed on failure. // (Note: expected and desired are in opposite order from atomic_compare_exchange) @@ -121,22 +128,28 @@ static inline void* mi_atomic_exchange_ptr(volatile _Atomic(void*)* p, void* exc #include #ifdef _WIN64 typedef LONG64 msc_intptr_t; -#define RC64(f) f##64 +#define MI_64(f) f##64 #else typedef LONG msc_intptr_t; -#define RC64(f) f +#define MI_64(f) f #endif static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add) { - return (intptr_t)RC64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add); + return (intptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add); +} +static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x) { + return (uintptr_t)MI_64(_InterlockedAnd)((volatile msc_intptr_t*)p, (msc_intptr_t)x); +} +static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x) { + return (uintptr_t)MI_64(_InterlockedOr)((volatile msc_intptr_t*)p, (msc_intptr_t)x); } static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) { - return (expected == (uintptr_t)RC64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)expected)); + return (expected == (uintptr_t)MI_64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)expected)); } static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) { return mi_atomic_cas_strong(p,desired,expected); } static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange) { - return (uintptr_t)RC64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange); + return (uintptr_t)MI_64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange); } static inline uintptr_t mi_atomic_read(volatile _Atomic(uintptr_t) const* p) { return *p; @@ -177,6 +190,14 @@ static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add MI_USING_STD return atomic_fetch_add_explicit(p, add, memory_order_relaxed); } +static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x) { + MI_USING_STD + return atomic_fetch_and_explicit(p, x, memory_order_relaxed); +} +static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x) { + MI_USING_STD + return atomic_fetch_or_explicit(p, x, memory_order_relaxed); +} static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) { MI_USING_STD return atomic_compare_exchange_weak_explicit(p, &expected, desired, memory_order_release, memory_order_relaxed); diff --git a/src/bitmap.inc.c b/src/bitmap.inc.c new file mode 100644 index 00000000..5bea4748 --- /dev/null +++ b/src/bitmap.inc.c @@ -0,0 +1,160 @@ +#pragma once +#ifndef MI_BITMAP_H +#define MI_BITMAP_H + +#include "mimalloc.h" +#include "mimalloc-internal.h" + +// Use bit scan forward to quickly find the first zero bit if it is available +#if defined(_MSC_VER) +#define MI_HAVE_BITSCAN +#include +static inline size_t mi_bsf(uintptr_t x) { + if (x==0) return 8*MI_INTPTR_SIZE; + DWORD idx; + MI_64(_BitScanForward)(&idx, x); + return idx; +} +static inline size_t mi_bsr(uintptr_t x) { + if (x==0) return 8*MI_INTPTR_SIZE; + DWORD idx; + MI_64(_BitScanReverse)(&idx, x); + return idx; +} +#elif defined(__GNUC__) || defined(__clang__) +#define MI_HAVE_BITSCAN +#if (INTPTR_MAX == LONG_MAX) +# define MI_L(x) x##l +#else +# define MI_L(x) x##ll +#endif +static inline size_t mi_bsf(uintptr_t x) { + return (x==0 ? 8*MI_INTPTR_SIZE : MI_L(__builtin_ctz)(x)); +} +static inline size_t mi_bsr(uintptr_t x) { + return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - MI_L(__builtin_clz)(x)); +} +#endif + + +#define MI_BITMAP_FIELD_BITS (8*MI_INTPTR_SIZE) +#define MI_BITMAP_FIELD_FULL (~((uintptr_t)0)) // all bits set + +// An atomic bitmap of `uintptr_t` fields +typedef volatile _Atomic(uintptr_t) mi_bitmap_field_t; +typedef mi_bitmap_field_t* mi_bitmap_t; + +// A bitmap index is the index of the bit in a bitmap. +typedef size_t mi_bitmap_index_t; + +// Create a bit index. +static inline mi_bitmap_index_t mi_bitmap_index_create(size_t idx, size_t bitidx) { + mi_assert_internal(bitidx < MI_BITMAP_FIELD_BITS); + return (idx*MI_BITMAP_FIELD_BITS) + bitidx; +} + +// Get the field index from a bit index. +static inline size_t mi_bitmap_index_field(mi_bitmap_index_t bitmap_idx) { + return (bitmap_idx / MI_BITMAP_FIELD_BITS); +} + +// Get the bit index in a bitmap field +static inline size_t mi_bitmap_index_bit_in_field(mi_bitmap_index_t bitmap_idx) { + return (bitmap_idx % MI_BITMAP_FIELD_BITS); +} + +// The bit mask for a given number of blocks at a specified bit index. +static uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) { + mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS); + return ((((uintptr_t)1 << count) - 1) << bitidx); +} + +// Try to atomically claim a sequence of `count` bits in a single field at `idx` in `bitmap`. +// Returns `true` on success. +static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx) +{ + mi_assert_internal(bitmap_idx != NULL); + volatile _Atomic(uintptr_t)* field = &bitmap[idx]; + uintptr_t map = mi_atomic_read(field); + if (map==MI_BITMAP_FIELD_FULL) return false; // short cut + + // search for 0-bit sequence of length count + const uintptr_t mask = mi_bitmap_mask_(count, 0); + const size_t bitidx_max = MI_BITMAP_FIELD_BITS - count; + +#ifdef MI_HAVE_BITSCAN + size_t bitidx = mi_bsf(~map); // quickly find the first zero bit if possible +#else + size_t bitidx = 0; // otherwise start at 0 +#endif + uintptr_t m = (mask << bitidx); // invariant: m == mask shifted by bitidx + + // scan linearly for a free range of zero bits + while (bitidx <= bitidx_max) { + if ((map & m) == 0) { // are the mask bits free at bitidx? + mi_assert_internal((m >> bitidx) == mask); // no overflow? + uintptr_t newmap = map | m; + mi_assert_internal((newmap^map) >> bitidx == mask); + if (!mi_atomic_cas_weak(field, newmap, map)) { // TODO: use strong cas here? + // no success, another thread claimed concurrently.. keep going + map = mi_atomic_read(field); + continue; + } + else { + // success, we claimed the bits! + *bitmap_idx = mi_bitmap_index_create(idx, bitidx); + return true; + } + } + else { + // on to the next bit range +#ifdef MI_HAVE_BITSCAN + size_t shift = (count == 1 ? 1 : mi_bsr(map & m) - bitidx + 1); + mi_assert_internal(shift > 0 && shift <= count); +#else + size_t shift = 1; +#endif + bitidx += shift; + m <<= shift; + } + } + // no bits found + return false; +} + + +// Find `count` bits of 0 and set them to 1 atomically; returns `true` on success. +// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never span fields. +static inline bool mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t* bitmap_idx) { + for (size_t idx = 0; idx < bitmap_fields; idx++) { + if (mi_bitmap_try_claim_field(bitmap, idx, count, bitmap_idx)) { + return true; + } + } + return false; +} + +// Set `count` bits at `bitmap_idx` to 0 atomically +static inline void mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + const size_t idx = mi_bitmap_index_field(bitmap_idx); + const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); + const uintptr_t mask = mi_bitmap_mask_(count, bitidx); + mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); + mi_assert_internal((bitmap[idx] & mask) == mask); + mi_atomic_and(&bitmap[idx], ~mask); +} + + +// Set `count` bits at `bitmap_idx` to 1 atomically +// Returns `true` if all `count` bits were 0 previously +static inline bool mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + const size_t idx = mi_bitmap_index_field(bitmap_idx); + const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); + const uintptr_t mask = mi_bitmap_mask_(count, bitidx); + mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); + // mi_assert_internal((bitmap[idx] & mask) == 0); + uintptr_t prev = mi_atomic_or(&bitmap[idx], mask); + return ((prev & mask) == 0); +} + +#endif \ No newline at end of file diff --git a/src/memory.c b/src/memory.c index 75a1df92..29e0e412 100644 --- a/src/memory.c +++ b/src/memory.c @@ -37,6 +37,8 @@ Possible issues: #include // memset +#include "bitmap.inc.c" + // Internal raw OS interface size_t _mi_os_large_page_size(); bool _mi_os_protect(void* addr, size_t size); @@ -56,22 +58,22 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, boo // Constants #if (MI_INTPTR_SIZE==8) -#define MI_HEAP_REGION_MAX_SIZE (256 * (1ULL << 30)) // 256GiB => 16KiB for the region map +#define MI_HEAP_REGION_MAX_SIZE (256 * GiB) // 16KiB for the region map #elif (MI_INTPTR_SIZE==4) -#define MI_HEAP_REGION_MAX_SIZE (3 * (1UL << 30)) // 3GiB => 196 bytes for the region map +#define MI_HEAP_REGION_MAX_SIZE (3 * GiB) // 196 bytes for the region map #else #error "define the maximum heap space allowed for regions on this platform" #endif #define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE -#define MI_REGION_MAP_BITS (MI_INTPTR_SIZE * 8) -#define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_REGION_MAP_BITS) -#define MI_REGION_MAX_ALLOC_SIZE ((MI_REGION_MAP_BITS/4)*MI_SEGMENT_SIZE) // 64MiB -#define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE) -#define MI_REGION_MAP_FULL UINTPTR_MAX +#define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS) // 256MiB +#define MI_REGION_MAX_ALLOC_SIZE (MI_REGION_SIZE/4) // 64MiB +#define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE) +// Region info is a pointer to the memory region and two bits for +// its flags: is_large, and is_committed. typedef uintptr_t mi_region_info_t; static inline mi_region_info_t mi_region_info_create(void* start, bool is_large, bool is_committed) { @@ -88,19 +90,22 @@ static inline void* mi_region_info_read(mi_region_info_t info, bool* is_large, b // A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with // a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block. typedef struct mem_region_s { - volatile _Atomic(uintptr_t) map; // in-use bit per MI_SEGMENT_SIZE block - volatile _Atomic(mi_region_info_t) info; // start of virtual memory area, and flags - volatile _Atomic(uintptr_t) dirty_mask; // bit per block if the contents are not zero'd + volatile _Atomic(mi_region_info_t) info; // start of the memory area (and flags) volatile _Atomic(uintptr_t) numa_node; // associated numa node + 1 (so 0 is no association) - size_t arena_memid; // if allocated from a (huge page) arena + size_t arena_memid; // if allocated from a (huge page) arena } mem_region_t; - // The region map; 16KiB for a 256GiB HEAP_REGION_MAX -// TODO: in the future, maintain a map per NUMA node for numa aware allocation static mem_region_t regions[MI_REGION_MAX]; -static volatile _Atomic(uintptr_t) regions_count; // = 0; // allocated regions +// A bit mask per region for its claimed MI_SEGMENT_SIZE blocks. +static mi_bitmap_field_t regions_map[MI_REGION_MAX]; + +// A bit mask per region to track which blocks are dirty (= potentially written to) +static mi_bitmap_field_t regions_dirty[MI_REGION_MAX]; + +// Allocated regions +static volatile _Atomic(uintptr_t) regions_count; // = 0; /* ---------------------------------------------------------------------------- @@ -113,12 +118,6 @@ static size_t mi_region_block_count(size_t size) { return (size + MI_SEGMENT_SIZE - 1) / MI_SEGMENT_SIZE; } -// The bit mask for a given number of blocks at a specified bit index. -static uintptr_t mi_region_block_mask(size_t blocks, size_t bitidx) { - mi_assert_internal(blocks + bitidx <= MI_REGION_MAP_BITS); - return ((((uintptr_t)1 << blocks) - 1) << bitidx); -} - // Return a rounded commit/reset size such that we don't fragment large OS pages into small ones. static size_t mi_good_commit_size(size_t size) { if (size > (SIZE_MAX - _mi_os_large_page_size())) return size; @@ -137,8 +136,8 @@ bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { } -static size_t mi_memid_create(size_t idx, size_t bitidx) { - return ((idx*MI_REGION_MAP_BITS) + bitidx)<<1; +static size_t mi_memid_create(mi_bitmap_index_t bitmap_idx) { + return bitmap_idx<<1; } static size_t mi_memid_create_from_arena(size_t arena_memid) { @@ -149,78 +148,57 @@ static bool mi_memid_is_arena(size_t id) { return ((id&1)==1); } -static bool mi_memid_indices(size_t id, size_t* idx, size_t* bitidx, size_t* arena_memid) { +static bool mi_memid_indices(size_t id, mi_bitmap_index_t* bitmap_idx, size_t* arena_memid) { if (mi_memid_is_arena(id)) { *arena_memid = (id>>1); return true; } else { - *idx = ((id>>1) / MI_REGION_MAP_BITS); - *bitidx = ((id>>1) % MI_REGION_MAP_BITS); + *bitmap_idx = (mi_bitmap_index_t)(id>>1); return false; } } /* ---------------------------------------------------------------------------- -Commit from a region + Ensure a region is allocated from the OS (or an arena) -----------------------------------------------------------------------------*/ -// Commit the `blocks` in `region` at `idx` and `bitidx` of a given `size`. -// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written -// if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call. -// (not being able to claim is not considered an error so check for `p != NULL` afterwards). -static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bitidx, size_t blocks, - size_t size, bool* commit, bool* allow_large, bool* is_zero, void** p, size_t* id, mi_os_tld_t* tld) +static bool mi_region_ensure_allocated(size_t idx, bool allow_large, mi_region_info_t* pinfo, mi_os_tld_t* tld) { - size_t mask = mi_region_block_mask(blocks,bitidx); - mi_assert_internal(mask != 0); - mi_assert_internal((mask & mi_atomic_read_relaxed(®ion->map)) == mask); - mi_assert_internal(®ions[idx] == region); - // ensure the region is reserved - mi_region_info_t info = mi_atomic_read(®ion->info); - if (info == 0) + mi_region_info_t info = mi_atomic_read(®ions[idx].info); + if (mi_unlikely(info == 0)) { bool region_commit = mi_option_is_enabled(mi_option_eager_region_commit); - bool region_large = *allow_large; + bool region_large = allow_large; + bool is_zero = false; size_t arena_memid = 0; - void* start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, ®ion_commit, ®ion_large, is_zero, &arena_memid, tld); - /* - void* start = NULL; - if (region_large) { - start = _mi_os_try_alloc_from_huge_reserved(MI_REGION_SIZE, MI_SEGMENT_ALIGN); - if (start != NULL) { region_commit = true; } - } - if (start == NULL) { - start = _mi_os_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, region_commit, ®ion_large, tld); - } - */ - mi_assert_internal(!(region_large && !*allow_large)); + void* start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, ®ion_commit, ®ion_large, &is_zero, &arena_memid, tld); + mi_assert_internal(!(region_large && !allow_large)); if (start == NULL) { - // failure to allocate from the OS! unclaim the blocks and fail - size_t map; - do { - map = mi_atomic_read_relaxed(®ion->map); - } while (!mi_atomic_cas_weak(®ion->map, map & ~mask, map)); + // failure to allocate from the OS! fail + *pinfo = 0; return false; } // set the newly allocated region - info = mi_region_info_create(start,region_large,region_commit); - if (mi_atomic_cas_strong(®ion->info, info, 0)) { + info = mi_region_info_create(start, region_large, region_commit); + if (mi_atomic_cas_strong(®ions[idx].info, info, 0)) { // update the region count - region->arena_memid = arena_memid; - mi_atomic_write(®ion->numa_node, _mi_os_numa_node(tld) + 1); + regions[idx].arena_memid = arena_memid; + mi_atomic_write(®ions[idx].numa_node, _mi_os_numa_node(tld) + 1); + mi_atomic_write(®ions_dirty[idx], is_zero ? 0 : ~((uintptr_t)0)); mi_atomic_increment(®ions_count); } else { // failed, another thread allocated just before us! // we assign it to a later slot instead (up to 4 tries). - for(size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) { + for (size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) { if (mi_atomic_cas_strong(®ions[idx+i].info, info, 0)) { regions[idx+i].arena_memid = arena_memid; mi_atomic_write(®ions[idx+i].numa_node, _mi_os_numa_node(tld) + 1); + mi_atomic_write(®ions_dirty[idx], is_zero ? 0 : ~((uintptr_t)0)); mi_atomic_increment(®ions_count); start = NULL; break; @@ -232,27 +210,33 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit // _mi_os_free_ex(start, MI_REGION_SIZE, region_commit, tld->stats); } // and continue with the memory at our index - info = mi_atomic_read(®ion->info); + info = mi_atomic_read(®ions[idx].info); } } - mi_assert_internal(info == mi_atomic_read(®ion->info)); + mi_assert_internal(info == mi_atomic_read(®ions[idx].info)); mi_assert_internal(info != 0); + *pinfo = info; + return true; +} + + +/* ---------------------------------------------------------------------------- + Commit blocks +-----------------------------------------------------------------------------*/ + +static void* mi_region_commit_blocks(mi_bitmap_index_t bitmap_idx, mi_region_info_t info, size_t blocks, size_t size, bool* commit, bool* is_large, bool* is_zero, mi_os_tld_t* tld) +{ + // set dirty bits + *is_zero = mi_bitmap_claim(regions_dirty, MI_REGION_MAX, blocks, bitmap_idx); // Commit the blocks to memory bool region_is_committed = false; bool region_is_large = false; - void* start = mi_region_info_read(info,®ion_is_large,®ion_is_committed); - mi_assert_internal(!(region_is_large && !*allow_large)); + void* start = mi_region_info_read(info, ®ion_is_large, ®ion_is_committed); + mi_assert_internal(!(region_is_large && !*is_large)); mi_assert_internal(start!=NULL); - // set dirty bits - uintptr_t m; - do { - m = mi_atomic_read(®ion->dirty_mask); - } while (!mi_atomic_cas_weak(®ion->dirty_mask, m | mask, m)); - *is_zero = ((m & mask) == 0); // no dirty bit set in our claimed range? - - void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE); + void* blocks_start = (uint8_t*)start + (mi_bitmap_index_bit_in_field(bitmap_idx) * MI_SEGMENT_SIZE); if (*commit && !region_is_committed) { // ensure commit bool commit_zero = false; @@ -266,99 +250,58 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit // and return the allocation mi_assert_internal(blocks_start != NULL); - *allow_large = region_is_large; - *p = blocks_start; - *id = mi_memid_create(idx, bitidx); + *is_large = region_is_large; + return blocks_start; +} + +/* ---------------------------------------------------------------------------- + Claim and allocate blocks in a region +-----------------------------------------------------------------------------*/ + +static bool mi_region_alloc_blocks( + size_t idx, size_t blocks, size_t size, + bool* commit, bool* allow_large, bool* is_zero, + void** p, size_t* id, mi_os_tld_t* tld) +{ + mi_bitmap_index_t bitmap_idx; + if (!mi_bitmap_try_claim_field(regions_map, idx, blocks, &bitmap_idx)) { + return true; // no error, but also no success + } + mi_region_info_t info; + if (!mi_region_ensure_allocated(idx,*allow_large,&info,tld)) { + // failed to allocate region memory, unclaim the bits and fail + mi_bitmap_unclaim(regions_map, MI_REGION_MAX, blocks, bitmap_idx); + return false; + } + *p = mi_region_commit_blocks(bitmap_idx,info,blocks,size,commit,allow_large,is_zero,tld); + *id = mi_memid_create(bitmap_idx); return true; } -// Use bit scan forward to quickly find the first zero bit if it is available -#if defined(_MSC_VER) -#define MI_HAVE_BITSCAN -#include -static inline size_t mi_bsf(uintptr_t x) { - if (x==0) return 8*MI_INTPTR_SIZE; - DWORD idx; - #if (MI_INTPTR_SIZE==8) - _BitScanForward64(&idx, x); - #else - _BitScanForward(&idx, x); - #endif - return idx; -} -static inline size_t mi_bsr(uintptr_t x) { - if (x==0) return 8*MI_INTPTR_SIZE; - DWORD idx; - #if (MI_INTPTR_SIZE==8) - _BitScanReverse64(&idx, x); - #else - _BitScanReverse(&idx, x); - #endif - return idx; -} -#elif defined(__GNUC__) || defined(__clang__) -#define MI_HAVE_BITSCAN -static inline size_t mi_bsf(uintptr_t x) { - return (x==0 ? 8*MI_INTPTR_SIZE : __builtin_ctzl(x)); -} -static inline size_t mi_bsr(uintptr_t x) { - return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - __builtin_clzl(x)); -} -#endif -// Allocate `blocks` in a `region` at `idx` of a given `size`. -// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written -// if the blocks were successfully claimed so ensure they are initialized to NULL/0 before the call. -// (not being able to claim is not considered an error so check for `p != NULL` afterwards). -static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t blocks, size_t size, - bool* commit, bool* allow_large, bool* is_zero, void** p, size_t* id, mi_os_tld_t* tld) -{ - mi_assert_internal(p != NULL && id != NULL); - mi_assert_internal(blocks < MI_REGION_MAP_BITS); +/* ---------------------------------------------------------------------------- + Try to allocate blocks in suitable regions +-----------------------------------------------------------------------------*/ - const uintptr_t mask = mi_region_block_mask(blocks, 0); - const size_t bitidx_max = MI_REGION_MAP_BITS - blocks; - uintptr_t map = mi_atomic_read(®ion->map); - if (map==MI_REGION_MAP_FULL) return true; - - #ifdef MI_HAVE_BITSCAN - size_t bitidx = mi_bsf(~map); // quickly find the first zero bit if possible - #else - size_t bitidx = 0; // otherwise start at 0 - #endif - uintptr_t m = (mask << bitidx); // invariant: m == mask shifted by bitidx - - // scan linearly for a free range of zero bits - while(bitidx <= bitidx_max) { - if ((map & m) == 0) { // are the mask bits free at bitidx? - mi_assert_internal((m >> bitidx) == mask); // no overflow? - uintptr_t newmap = map | m; - mi_assert_internal((newmap^map) >> bitidx == mask); - if (!mi_atomic_cas_weak(®ion->map, newmap, map)) { // TODO: use strong cas here? - // no success, another thread claimed concurrently.. keep going - map = mi_atomic_read(®ion->map); - continue; - } - else { - // success, we claimed the bits - // now commit the block memory -- this can still fail - return mi_region_commit_blocks(region, idx, bitidx, blocks, - size, commit, allow_large, is_zero, p, id, tld); - } - } - else { - // on to the next bit range - #ifdef MI_HAVE_BITSCAN - size_t shift = (blocks == 1 ? 1 : mi_bsr(map & m) - bitidx + 1); - mi_assert_internal(shift > 0 && shift <= blocks); - #else - size_t shift = 1; - #endif - bitidx += shift; - m <<= shift; - } +static bool mi_region_is_suitable(int numa_node, size_t idx, bool commit, bool allow_large ) { + uintptr_t m = mi_atomic_read_relaxed(®ions_map[idx]); + if (m == MI_BITMAP_FIELD_FULL) return false; + if (numa_node >= 0) { // use negative numa node to always succeed + int rnode = ((int)mi_atomic_read_relaxed(®ions->numa_node)) - 1; + if (rnode != numa_node) return false; + } + if (mi_unlikely(!(commit || allow_large))) { + // otherwise skip incompatible regions if possible. + // this is not guaranteed due to multiple threads allocating at the same time but + // that's ok. In secure mode, large is never allowed for any thread, so that works out; + // otherwise we might just not be able to reset/decommit individual pages sometimes. + mi_region_info_t info = mi_atomic_read_relaxed(®ions->info); + bool is_large; + bool is_committed; + void* start = mi_region_info_read(info, &is_large, &is_committed); + bool ok = (start == NULL || (commit || !is_committed) || (allow_large || !is_large)); // Todo: test with one bitmap operation? + if (!ok) return false; } - // no error, but also no bits found return true; } @@ -366,33 +309,15 @@ static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t bloc // Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written // if the blocks were successfully claimed so ensure they are initialized to NULL/0 before the call. // (not being able to claim is not considered an error so check for `p != NULL` afterwards). -static bool mi_region_try_alloc_blocks(int numa_node, size_t idx, size_t blocks, size_t size, +static bool mi_region_try_alloc_blocks( + int numa_node, size_t idx, size_t blocks, size_t size, bool* commit, bool* allow_large, bool* is_zero, void** p, size_t* id, mi_os_tld_t* tld) { // check if there are available blocks in the region.. mi_assert_internal(idx < MI_REGION_MAX); - mem_region_t* region = ®ions[idx]; - uintptr_t m = mi_atomic_read_relaxed(®ion->map); - int rnode = ((int)mi_atomic_read_relaxed(®ion->numa_node)) - 1; - if ((rnode < 0 || rnode == numa_node) && // fits current numa node - (m != MI_REGION_MAP_FULL)) // and some bits are zero - { - bool ok = (*commit || *allow_large); // committing or allow-large is always ok - if (!ok) { - // otherwise skip incompatible regions if possible. - // this is not guaranteed due to multiple threads allocating at the same time but - // that's ok. In secure mode, large is never allowed for any thread, so that works out; - // otherwise we might just not be able to reset/decommit individual pages sometimes. - mi_region_info_t info = mi_atomic_read_relaxed(®ion->info); - bool is_large; - bool is_committed; - void* start = mi_region_info_read(info,&is_large,&is_committed); - ok = (start == NULL || (*commit || !is_committed) || (*allow_large || !is_large)); // Todo: test with one bitmap operation? - } - if (ok) { - return mi_region_alloc_blocks(region, idx, blocks, size, commit, allow_large, is_zero, p, id, tld); - } + if (mi_region_is_suitable(numa_node, idx, *commit, *allow_large)) { + return mi_region_alloc_blocks(idx, blocks, size, commit, allow_large, is_zero, p, id, tld); } return true; // no error, but no success either } @@ -426,14 +351,14 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l size = _mi_align_up(size, _mi_os_page_size()); // calculate the number of needed blocks - size_t blocks = mi_region_block_count(size); + const size_t blocks = mi_region_block_count(size); mi_assert_internal(blocks > 0 && blocks <= 8*MI_INTPTR_SIZE); // find a range of free blocks - int numa_node = _mi_os_numa_node(tld); + const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld)); void* p = NULL; - size_t count = mi_atomic_read(®ions_count); - size_t idx = tld->region_idx; // start at 0 to reuse low addresses? Or, use tld->region_idx to reduce contention? + const size_t count = mi_atomic_read(®ions_count); + size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? for (size_t visited = 0; visited < count; visited++, idx++) { if (idx >= count) idx = 0; // wrap around if (!mi_region_try_alloc_blocks(numa_node, idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error @@ -456,7 +381,7 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l *id = mi_memid_create_from_arena(arena_memid); } else { - tld->region_idx = idx; // next start of search? currently not used as we use first-fit + tld->region_idx = idx; // next start of search } mi_assert_internal( p == NULL || (uintptr_t)p % alignment == 0); @@ -475,9 +400,8 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { if (p==NULL) return; if (size==0) return; size_t arena_memid = 0; - size_t idx = 0; - size_t bitidx = 0; - if (mi_memid_indices(id,&idx,&bitidx,&arena_memid)) { + mi_bitmap_index_t bitmap_idx; + if (mi_memid_indices(id,&bitmap_idx,&arena_memid)) { // was a direct arena allocation, pass through _mi_arena_free(p, size, arena_memid, stats); } @@ -487,11 +411,11 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { // we can align the size up to page size (as we allocate that way too) // this ensures we fully commit/decommit/reset size = _mi_align_up(size, _mi_os_page_size()); - size_t blocks = mi_region_block_count(size); - size_t mask = mi_region_block_mask(blocks, bitidx); + const size_t blocks = mi_region_block_count(size); + const size_t idx = mi_bitmap_index_field(bitmap_idx); + const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); mi_assert_internal(idx < MI_REGION_MAX); if (idx >= MI_REGION_MAX) return; // or `abort`? mem_region_t* region = ®ions[idx]; - mi_assert_internal((mi_atomic_read_relaxed(®ion->map) & mask) == mask ); // claimed? mi_region_info_t info = mi_atomic_read(®ion->info); bool is_large; bool is_eager_committed; @@ -499,8 +423,8 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { mi_assert_internal(start != NULL); void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE); mi_assert_internal(blocks_start == p); // not a pointer in our area? - mi_assert_internal(bitidx + blocks <= MI_REGION_MAP_BITS); - if (blocks_start != p || bitidx + blocks > MI_REGION_MAP_BITS) return; // or `abort`? + mi_assert_internal(bitidx + blocks <= MI_BITMAP_FIELD_BITS); + if (blocks_start != p || bitidx + blocks > MI_BITMAP_FIELD_BITS) return; // or `abort`? // decommit (or reset) the blocks to reduce the working set. // TODO: implement delayed decommit/reset as these calls are too expensive @@ -526,12 +450,7 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { // this frees up virtual address space which might be useful on 32-bit systems? // and unclaim - uintptr_t map; - uintptr_t newmap; - do { - map = mi_atomic_read_relaxed(®ion->map); - newmap = map & ~mask; - } while (!mi_atomic_cas_weak(®ion->map, newmap, map)); + mi_bitmap_unclaim(regions_map, MI_REGION_MAX, blocks, bitmap_idx); } } @@ -542,23 +461,23 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { void _mi_mem_collect(mi_stats_t* stats) { // free every region that has no segments in use. for (size_t i = 0; i < regions_count; i++) { - mem_region_t* region = ®ions[i]; - if (mi_atomic_read_relaxed(®ion->map) == 0) { + if (mi_atomic_read_relaxed(®ions_map[i]) == 0) { // if no segments used, try to claim the whole region uintptr_t m; do { - m = mi_atomic_read_relaxed(®ion->map); - } while(m == 0 && !mi_atomic_cas_weak(®ion->map, ~((uintptr_t)0), 0 )); + m = mi_atomic_read_relaxed(®ions_map[i]); + } while(m == 0 && !mi_atomic_cas_weak(®ions_map[i], MI_BITMAP_FIELD_FULL, 0 )); if (m == 0) { // on success, free the whole region bool is_eager_committed; - void* start = mi_region_info_read(mi_atomic_read(®ion->info), NULL, &is_eager_committed); + void* start = mi_region_info_read(mi_atomic_read(®ions[i].info), NULL, &is_eager_committed); if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { - _mi_arena_free(start, MI_REGION_SIZE, region->arena_memid, stats); + _mi_arena_free(start, MI_REGION_SIZE, regions[i].arena_memid, stats); } // and release - mi_atomic_write(®ion->info,0); - mi_atomic_write(®ion->map,0); + mi_atomic_write(®ions[i].info,0); + mi_atomic_write(®ions_dirty[i],0); + mi_atomic_write(®ions_map[i],0); } } } From b09282bc0d6e3228c556eac833331438dbe774be Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 6 Nov 2019 22:49:01 -0800 Subject: [PATCH 024/179] change arena allocator to atomic bitmap as well --- include/mimalloc.h | 4 +- src/arena.c | 268 +++++++++++++-------------------------------- src/bitmap.inc.c | 6 +- src/init.c | 4 +- src/os.c | 20 ++-- 5 files changed, 94 insertions(+), 208 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index c03ddc1e..70b6e412 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -230,8 +230,8 @@ mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_b mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept; mi_decl_export bool mi_is_redirected() mi_attr_noexcept; -mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept; -mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept; +mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages, size_t timeout_msecs) mi_attr_noexcept; +mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept; // deprecated mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept; diff --git a/src/arena.c b/src/arena.c index e58d2c47..b807cd47 100644 --- a/src/arena.c +++ b/src/arena.c @@ -7,15 +7,19 @@ terms of the MIT license. A copy of the license can be found in the file /* ---------------------------------------------------------------------------- "Arenas" are fixed area's of OS memory from which we can allocate -large blocks (>= MI_ARENA_BLOCK_SIZE, 16MiB). Currently only used to +large blocks (>= MI_ARENA_BLOCK_SIZE, 32MiB). Currently only used to allocate in one arena consisting of huge OS pages -- otherwise it delegates to direct allocation from the OS. In the future, we can expose an API to manually add more arenas which is sometimes needed for embedded devices or shared memory for example. -The arena allocation needs to be thread safe and we use a lock-free scan -with on-demand coalescing. +The arena allocation needs to be thread safe and we use an atomic +bitmap to allocate. The current implementation of the bitmap can +only do this within a field (`uintptr_t`) so we can allocate at most +blocks of 2GiB (64*32MiB) and no object can cross the boundary. This +can lead to fragmentation but fortunately most objects will be regions +of 256MiB in practice. -----------------------------------------------------------------------------*/ #include "mimalloc.h" #include "mimalloc-internal.h" @@ -23,6 +27,8 @@ with on-demand coalescing. #include // memset +#include "bitmap.inc.c" // atomic bitmap + // os.c void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld); void _mi_os_free(void* p, size_t size, mi_stats_t* stats); @@ -36,9 +42,11 @@ int _mi_os_numa_node_count(void); Arena allocation ----------------------------------------------------------- */ -#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE -#define MI_ARENA_BLOCK_SIZE (4*MI_SEGMENT_ALIGN) // 16MiB -#define MI_MAX_ARENAS (64) +#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE +#define MI_ARENA_BLOCK_SIZE (8*MI_SEGMENT_ALIGN) // 32MiB +#define MI_ARENA_MAX_OBJ_SIZE (MI_BITMAP_FIELD_BITS * MI_ARENA_BLOCK_SIZE) // 2GiB +#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 16MiB +#define MI_MAX_ARENAS (64) // not more than 256 (since we use 8 bits in the memid) // Block info: bit 0 contains the `in_use` bit, the upper bits the // size in count of arena blocks. @@ -48,11 +56,13 @@ typedef uintptr_t mi_block_info_t; typedef struct mi_arena_s { uint8_t* start; // the start of the memory area size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) + size_t field_count; // number of bitmap fields int numa_node; // associated NUMA node bool is_zero_init; // is the arena zero initialized? bool is_large; // large OS page allocated - _Atomic(uintptr_t) block_bottom; // optimization to start the search for free blocks - _Atomic(mi_block_info_t) blocks[1]; // `block_count` block info's + volatile _Atomic(uintptr_t) search_idx; // optimization to start the search for free blocks + mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? + mi_bitmap_field_t blocks_map[1]; // bitmap of in-use blocks } mi_arena_t; @@ -69,180 +79,55 @@ static _Atomic(uintptr_t) mi_arena_count; // = 0 // Use `0` as a special id for direct OS allocated memory. #define MI_MEMID_OS 0 -static size_t mi_memid_create(size_t arena_index, size_t block_index) { +static size_t mi_memid_create(size_t arena_index, mi_bitmap_index_t bitmap_index) { mi_assert_internal(arena_index < 0xFE); - return ((block_index << 8) | ((arena_index+1) & 0xFF)); + return ((bitmap_index << 8) | ((arena_index+1) & 0xFF)); } -static void mi_memid_indices(size_t memid, size_t* arena_index, size_t* block_index) { +static void mi_memid_indices(size_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) { mi_assert_internal(memid != MI_MEMID_OS); *arena_index = (memid & 0xFF) - 1; - *block_index = (memid >> 8); + *bitmap_index = (memid >> 8); } -/* ----------------------------------------------------------- - Block info ------------------------------------------------------------ */ -static bool mi_block_is_in_use(mi_block_info_t info) { - return ((info&1) != 0); +static size_t mi_arena_block_count_of_size(size_t size) { + const size_t asize = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); + const size_t bcount = asize / MI_ARENA_BLOCK_SIZE; + return bcount; } -static size_t mi_block_count(mi_block_info_t info) { - return (info>>1); -} - -static mi_block_info_t mi_block_info_create(size_t bcount, bool in_use) { - return (((mi_block_info_t)bcount << 1) | (in_use ? 1 : 0)); -} - - /* ----------------------------------------------------------- Thread safe allocation in an arena ----------------------------------------------------------- */ - -static void* mi_arena_allocx(mi_arena_t* arena, size_t start_idx, size_t end_idx, size_t needed_bcount, bool* is_zero, size_t* block_index) +static void* mi_arena_alloc(mi_arena_t* arena, size_t blocks, bool* is_zero, mi_bitmap_index_t* bitmap_idx) { - // Scan linearly through all block info's - // Skipping used ranges, coalescing free ranges on demand. - mi_assert_internal(needed_bcount > 0); - mi_assert_internal(start_idx <= arena->block_count); - mi_assert_internal(end_idx <= arena->block_count); - _Atomic(mi_block_info_t)* block = &arena->blocks[start_idx]; - _Atomic(mi_block_info_t)* end = &arena->blocks[end_idx]; - while (block < end) { - mi_block_info_t binfo = mi_atomic_read_relaxed(block); - size_t bcount = mi_block_count(binfo); - if (mi_block_is_in_use(binfo)) { - // in-use, skip ahead - mi_assert_internal(bcount > 0); - block += bcount; - } - else { - // free blocks - if (bcount==0) { - // optimization: - // use 0 initialized blocks at the end, to use single atomic operation - // initially to reduce contention (as we don't need to split) - if (block + needed_bcount > end) { - return NULL; // does not fit - } - else if (!mi_atomic_cas_weak(block, mi_block_info_create(needed_bcount, true), binfo)) { - // ouch, someone else was quicker. Try again.. - continue; - } - else { - // we got it: return a pointer to the claimed memory - ptrdiff_t idx = (block - arena->blocks); - *is_zero = arena->is_zero_init; - *block_index = idx; - return (arena->start + (idx*MI_ARENA_BLOCK_SIZE)); - } - } - - mi_assert_internal(bcount>0); - if (needed_bcount > bcount) { -#if 0 // MI_NO_ARENA_COALESCE - block += bcount; // too small, skip to the next range - continue; -#else - // too small, try to coalesce - _Atomic(mi_block_info_t)* block_next = block + bcount; - if (block_next >= end) { - return NULL; // does not fit - } - mi_block_info_t binfo_next = mi_atomic_read(block_next); - size_t bcount_next = mi_block_count(binfo_next); - if (mi_block_is_in_use(binfo_next)) { - // next block is in use, cannot coalesce - block += (bcount + bcount_next); // skip ahea over both blocks - } - else { - // next block is free, try to coalesce - // first set the next one to being used to prevent dangling ranges - if (!mi_atomic_cas_strong(block_next, mi_block_info_create(bcount_next, true), binfo_next)) { - // someone else got in before us.. try again - continue; - } - else { - if (!mi_atomic_cas_strong(block, mi_block_info_create(bcount + bcount_next, true), binfo)) { // use strong to increase success chance - // someone claimed/coalesced the block in the meantime - // first free the next block again.. - bool ok = mi_atomic_cas_strong(block_next, mi_block_info_create(bcount_next, false), binfo_next); // must be strong - mi_assert(ok); UNUSED(ok); - // and try again - continue; - } - else { - // coalesced! try again - // todo: we could optimize here to immediately claim the block if the - // coalesced size is a fit instead of retrying. Keep it simple for now. - continue; - } - } - } -#endif - } - else { // needed_bcount <= bcount - mi_assert_internal(needed_bcount <= bcount); - // it fits, claim the whole block - if (!mi_atomic_cas_weak(block, mi_block_info_create(bcount, true), binfo)) { - // ouch, someone else was quicker. Try again.. - continue; - } - else { - // got it, now split off the needed part - if (needed_bcount < bcount) { - mi_atomic_write(block + needed_bcount, mi_block_info_create(bcount - needed_bcount, false)); - mi_atomic_write(block, mi_block_info_create(needed_bcount, true)); - } - // return a pointer to the claimed memory - ptrdiff_t idx = (block - arena->blocks); - *is_zero = false; - *block_index = idx; - return (arena->start + (idx*MI_ARENA_BLOCK_SIZE)); - } - } + const size_t fcount = arena->field_count; + size_t idx = mi_atomic_read(&arena->search_idx); // start from last search + for (size_t visited = 0; visited < fcount; visited++, idx++) { + if (idx >= fcount) idx = 0; // wrap around + if (mi_bitmap_try_claim_field(arena->blocks_map, idx, blocks, bitmap_idx)) { + // claimed it! set the dirty bits + *is_zero = mi_bitmap_claim(arena->blocks_dirty, fcount, blocks, *bitmap_idx); + mi_atomic_write(&arena->search_idx, idx); // start search from here next time + return (arena->start + (*bitmap_idx)*MI_ARENA_BLOCK_SIZE); } } - // no success return NULL; } -// Try to reduce search time by starting from bottom and wrap around. -static void* mi_arena_alloc(mi_arena_t* arena, size_t needed_bcount, bool* is_zero, size_t* block_index) -{ - uintptr_t bottom = mi_atomic_read_relaxed(&arena->block_bottom); - void* p = mi_arena_allocx(arena, bottom, arena->block_count, needed_bcount, is_zero, block_index); - if (p == NULL && bottom > 0) { - // try again from the start - p = mi_arena_allocx(arena, 0, bottom, needed_bcount, is_zero, block_index); - } - if (p != NULL) { - mi_atomic_write(&arena->block_bottom, *block_index); - } - return p; -} /* ----------------------------------------------------------- Arena Allocation ----------------------------------------------------------- */ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, - bool* commit, bool* large, bool* is_zero, - size_t* memid) + bool* commit, bool* large, bool* is_zero, size_t* memid) { - size_t block_index = SIZE_MAX; - void* p = mi_arena_alloc(arena, needed_bcount, is_zero, &block_index); + mi_bitmap_index_t bitmap_index; + void* p = mi_arena_alloc(arena, needed_bcount, is_zero, &bitmap_index); if (p != NULL) { - mi_assert_internal(block_index != SIZE_MAX); - #if MI_DEBUG>=1 - _Atomic(mi_block_info_t)* block = &arena->blocks[block_index]; - mi_block_info_t binfo = mi_atomic_read(block); - mi_assert_internal(mi_block_is_in_use(binfo)); - mi_assert_internal(mi_block_count(binfo) >= needed_bcount); - #endif - *memid = mi_memid_create(arena_index, block_index); + *memid = mi_memid_create(arena_index, bitmap_index); *commit = true; // TODO: support commit on demand? *large = arena->is_large; } @@ -261,15 +146,13 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, if (large==NULL) large = &default_large; // ensure `large != NULL` // try to allocate in an arena if the alignment is small enough - // and if there is not too much waste around the `MI_ARENA_BLOCK_SIZE`. - if (alignment <= MI_SEGMENT_ALIGN && - size >= 3*(MI_ARENA_BLOCK_SIZE/4) && // > 12MiB (not more than 25% waste) - !(size > MI_ARENA_BLOCK_SIZE && size < 3*(MI_ARENA_BLOCK_SIZE/2)) // ! <16MiB - 24MiB> - ) + // and the object is not too large or too small. + if (alignment <= MI_SEGMENT_ALIGN && + size <= MI_ARENA_MAX_OBJ_SIZE && + size >= MI_ARENA_MIN_OBJ_SIZE) { - size_t asize = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); - size_t bcount = asize / MI_ARENA_BLOCK_SIZE; - int numa_node = _mi_os_numa_node(tld); // current numa node + const size_t bcount = mi_arena_block_count_of_size(size); + const int numa_node = _mi_os_numa_node(tld); // current numa node mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE); // try numa affine allocation @@ -324,8 +207,8 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) { else { // allocated in an arena size_t arena_idx; - size_t block_idx; - mi_memid_indices(memid, &arena_idx, &block_idx); + size_t bitmap_idx; + mi_memid_indices(memid, &arena_idx, &bitmap_idx); mi_assert_internal(arena_idx < MI_MAX_ARENAS); mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[arena_idx])); mi_assert_internal(arena != NULL); @@ -333,27 +216,17 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) { _mi_fatal_error("trying to free from non-existent arena: %p, size %zu, memid: 0x%zx\n", p, size, memid); return; } - mi_assert_internal(arena->block_count > block_idx); - if (arena->block_count <= block_idx) { - _mi_fatal_error("trying to free from non-existent block: %p, size %zu, memid: 0x%zx\n", p, size, memid); + mi_assert_internal(arena->field_count > mi_bitmap_index_field(bitmap_idx)); + if (arena->field_count <= mi_bitmap_index_field(bitmap_idx)) { + _mi_fatal_error("trying to free from non-existent arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid); return; } - _Atomic(mi_block_info_t)* block = &arena->blocks[block_idx]; - mi_block_info_t binfo = mi_atomic_read_relaxed(block); - mi_assert_internal(mi_block_is_in_use(binfo)); - mi_assert_internal(mi_block_count(binfo)*MI_ARENA_BLOCK_SIZE >= size); - if (!mi_block_is_in_use(binfo)) { + const size_t blocks = mi_arena_block_count_of_size(size); + bool ones = mi_bitmap_unclaim(arena->blocks_map, arena->field_count, blocks, bitmap_idx); + if (!ones) { _mi_fatal_error("trying to free an already freed block: %p, size %zu\n", p, size); return; }; - bool ok = mi_atomic_cas_strong(block, mi_block_info_create(mi_block_count(binfo), false), binfo); - mi_assert_internal(ok); - if (!ok) { - _mi_warning_message("unable to free arena block: %p, info 0x%zx", p, binfo); - } - if (block_idx < mi_atomic_read_relaxed(&arena->block_bottom)) { - mi_atomic_write(&arena->block_bottom, block_idx); - } } } @@ -365,8 +238,7 @@ static bool mi_arena_add(mi_arena_t* arena) { mi_assert_internal(arena != NULL); mi_assert_internal((uintptr_t)arena->start % MI_SEGMENT_ALIGN == 0); mi_assert_internal(arena->block_count > 0); - mi_assert_internal(mi_mem_is_zero(arena->blocks,arena->block_count*sizeof(mi_block_info_t))); - + uintptr_t i = mi_atomic_addu(&mi_arena_count,1); if (i >= MI_MAX_ARENAS) { mi_atomic_subu(&mi_arena_count, 1); @@ -383,40 +255,49 @@ static bool mi_arena_add(mi_arena_t* arena) { #include // ENOMEM // reserve at a specific numa node -int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept { +int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept { if (pages==0) return 0; if (numa_node < -1) numa_node = -1; if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count(); size_t hsize = 0; size_t pages_reserved = 0; - void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, pages*500, &pages_reserved, &hsize); + void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize); if (p==NULL || pages_reserved==0) { _mi_warning_message("failed to reserve %zu gb huge pages\n", pages); return ENOMEM; } _mi_verbose_message("reserved %zu gb huge pages\n", pages_reserved); - size_t bcount = hsize / MI_ARENA_BLOCK_SIZE; - size_t asize = sizeof(mi_arena_t) + (bcount*sizeof(mi_block_info_t)); // one too much + size_t bcount = mi_arena_block_count_of_size(hsize); + size_t fields = (bcount + MI_BITMAP_FIELD_BITS - 1) / MI_BITMAP_FIELD_BITS; + size_t asize = sizeof(mi_arena_t) + (2*fields*sizeof(mi_bitmap_field_t)); mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS? if (arena == NULL) { _mi_os_free_huge_pages(p, hsize, &_mi_stats_main); return ENOMEM; } arena->block_count = bcount; - arena->start = (uint8_t*)p; - arena->block_bottom = 0; + arena->field_count = fields; + arena->start = (uint8_t*)p; arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1) arena->is_large = true; arena->is_zero_init = true; - memset(arena->blocks, 0, bcount * sizeof(mi_block_info_t)); + arena->search_idx = 0; + arena->blocks_dirty = &arena->blocks_map[bcount]; + size_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount; + if (post > 0) { + // don't use leftover bits at the end + mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post); + mi_bitmap_claim(arena->blocks_map, fields, post, postidx); + } + mi_arena_add(arena); return 0; } // reserve huge pages evenly among all numa nodes. -int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept { +int mi_reserve_huge_os_pages_interleave(size_t pages, size_t timeout_msecs) mi_attr_noexcept { if (pages == 0) return 0; // pages per numa node @@ -424,12 +305,13 @@ int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept { if (numa_count <= 0) numa_count = 1; const size_t pages_per = pages / numa_count; const size_t pages_mod = pages % numa_count; + const size_t timeout_per = (timeout_msecs / numa_count) + 50; // reserve evenly among numa nodes for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { size_t node_pages = pages_per; // can be 0 if ((size_t)numa_node < pages_mod) node_pages++; - int err = mi_reserve_huge_os_pages_at(node_pages, numa_node); + int err = mi_reserve_huge_os_pages_at(node_pages, numa_node, timeout_per); if (err) return err; if (pages < node_pages) { pages = 0; @@ -446,7 +328,7 @@ int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserv UNUSED(max_secs); _mi_warning_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n"); if (pages_reserved != NULL) *pages_reserved = 0; - int err = mi_reserve_huge_os_pages_interleave(pages); + int err = mi_reserve_huge_os_pages_interleave(pages, (size_t)(max_secs * 1000.0)); if (err==0 && pages_reserved!=NULL) *pages_reserved = pages; return err; } diff --git a/src/bitmap.inc.c b/src/bitmap.inc.c index 5bea4748..aeb185d1 100644 --- a/src/bitmap.inc.c +++ b/src/bitmap.inc.c @@ -135,13 +135,15 @@ static inline bool mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, } // Set `count` bits at `bitmap_idx` to 0 atomically -static inline void mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { +// Returns `true` if all `count` bits were 1 previously +static inline bool mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { const size_t idx = mi_bitmap_index_field(bitmap_idx); const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); const uintptr_t mask = mi_bitmap_mask_(count, bitidx); mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); mi_assert_internal((bitmap[idx] & mask) == mask); - mi_atomic_and(&bitmap[idx], ~mask); + uintptr_t prev = mi_atomic_and(&bitmap[idx], ~mask); + return ((prev & mask) == mask); } diff --git a/src/init.c b/src/init.c index ef848de4..f6d253f9 100644 --- a/src/init.c +++ b/src/init.c @@ -433,8 +433,8 @@ static void mi_process_load(void) { } if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { - size_t pages = mi_option_get(mi_option_reserve_huge_os_pages); - mi_reserve_huge_os_pages_interleave(pages); + size_t pages = mi_option_get(mi_option_reserve_huge_os_pages); + mi_reserve_huge_os_pages_interleave(pages, pages*500); } } diff --git a/src/os.c b/src/os.c index 254f85f1..027df6ab 100644 --- a/src/os.c +++ b/src/os.c @@ -940,16 +940,18 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse _mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE); // check for timeout - mi_msecs_t elapsed = _mi_clock_end(start_t); - if (page >= 1) { - mi_msecs_t estimate = ((elapsed / (page+1)) * pages); - if (estimate > 2*max_msecs) { // seems like we are going to timeout, break - elapsed = max_msecs + 1; + if (max_msecs > 0) { + mi_msecs_t elapsed = _mi_clock_end(start_t); + if (page >= 1) { + mi_msecs_t estimate = ((elapsed / (page+1)) * pages); + if (estimate > 2*max_msecs) { // seems like we are going to timeout, break + elapsed = max_msecs + 1; + } + } + if (elapsed > max_msecs) { + _mi_warning_message("huge page allocation timed out\n"); + break; } - } - if (elapsed > max_msecs) { - _mi_warning_message("huge page allocation timed out\n"); - break; } } mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size); From 378716c46724d839411166a0bba68b0722cf9d8b Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 7 Nov 2019 10:26:52 -0800 Subject: [PATCH 025/179] refactor and improve atomic bitmap usage --- CMakeLists.txt | 12 ++- ide/vs2019/mimalloc-override.vcxproj | 3 + ide/vs2019/mimalloc.vcxproj | 4 +- include/mimalloc-internal.h | 11 ++- include/mimalloc-types.h | 10 +-- src/arena.c | 62 +++++++-------- src/bitmap.inc.c | 110 ++++++++++++++++++--------- src/memory.c | 96 +++++++++++------------ src/page.c | 2 + test/test-stress.c | 4 +- 10 files changed, 183 insertions(+), 131 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 12540f68..0726c601 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,6 +10,7 @@ option(MI_SEE_ASM "Generate assembly files" OFF) option(MI_CHECK_FULL "Use full internal invariant checking in DEBUG mode" OFF) option(MI_USE_CXX "Use the C++ compiler to compile the library" OFF) option(MI_SECURE "Use security mitigations (like guard pages and randomization)" OFF) +option(MI_SECURE_FULL "Use full security mitigations (like double free protection, more expensive)" OFF) option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanism (Unix)" OFF) option(MI_BUILD_TESTS "Build test executables" ON) @@ -70,9 +71,14 @@ if(MI_OVERRIDE MATCHES "ON") endif() endif() -if(MI_SECURE MATCHES "ON") - message(STATUS "Set secure build (MI_SECURE=ON)") - list(APPEND mi_defines MI_SECURE=3) +if(MI_SECURE_FULL MATCHES "ON") + message(STATUS "Set full secure build (experimental) (MI_SECURE_FULL=ON)") + list(APPEND mi_defines MI_SECURE=4) +else() + if(MI_SECURE MATCHES "ON") + message(STATUS "Set secure build (MI_SECURE=ON)") + list(APPEND mi_defines MI_SECURE=3) + endif() endif() if(MI_SEE_ASM MATCHES "ON") diff --git a/ide/vs2019/mimalloc-override.vcxproj b/ide/vs2019/mimalloc-override.vcxproj index e1c7535c..49f3d213 100644 --- a/ide/vs2019/mimalloc-override.vcxproj +++ b/ide/vs2019/mimalloc-override.vcxproj @@ -232,6 +232,9 @@ + + true + diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index 19696c10..bae49bab 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -218,7 +218,9 @@ - + + true + diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 413f76e6..4d8b6a77 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -163,7 +163,6 @@ bool _mi_page_is_valid(mi_page_t* page); // Overflow detecting multiply -#define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX) static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { #if __has_builtin(__builtin_umul_overflow) || __GNUC__ >= 5 #include // UINT_MAX, ULONG_MAX @@ -175,6 +174,7 @@ static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { return __builtin_umulll_overflow(count, size, total); #endif #else /* __builtin_umul_overflow is unavailable */ + #define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX) *total = count * size; return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW) && size > 0 && (SIZE_MAX / size) < count); @@ -188,6 +188,7 @@ static inline bool _mi_is_power_of_two(uintptr_t x) { // Align upwards static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) { + mi_assert_internal(alignment != 0); uintptr_t mask = alignment - 1; if ((alignment & mask) == 0) { // power of two? return ((sz + mask) & ~mask); @@ -197,6 +198,12 @@ static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) { } } +// Divide upwards: `s <= _mi_divide_up(s,d)*d < s+d`. +static inline uintptr_t _mi_divide_up(uintptr_t size, size_t divider) { + mi_assert_internal(divider != 0); + return (divider == 0 ? size : ((size + divider - 1) / divider)); +} + // Is memory zero initialized? static inline bool mi_mem_is_zero(void* p, size_t size) { for (size_t i = 0; i < size; i++) { @@ -283,7 +290,7 @@ static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) { static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) { // if (segment->page_size > MI_SEGMENT_SIZE) return &segment->pages[0]; // huge pages ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment; - mi_assert_internal(diff >= 0 && diff < MI_SEGMENT_SIZE); + mi_assert_internal(diff >= 0 && (size_t)diff < MI_SEGMENT_SIZE); uintptr_t idx = (uintptr_t)diff >> segment->page_shift; mi_assert_internal(idx < segment->capacity); mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM || idx == 0); diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 99b6b22b..ced8e7a9 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -29,7 +29,7 @@ terms of the MIT license. A copy of the license can be found in the file // #define MI_SECURE 4 // experimental, may be more expensive: checks for double free. #if !defined(MI_SECURE) -#define MI_SECURE 0 +#define MI_SECURE 4 #endif // Define MI_DEBUG for debug mode @@ -93,12 +93,12 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_SEGMENT_SHIFT ( MI_LARGE_PAGE_SHIFT) // 4mb // Derived constants -#define MI_SEGMENT_SIZE (1<= MI_ARENA_BLOCK_SIZE, 32MiB). Currently only used to -allocate in one arena consisting of huge OS pages -- otherwise it -delegates to direct allocation from the OS. +large blocks (>= MI_ARENA_BLOCK_SIZE, 32MiB). +In contrast to the rest of mimalloc, the arenas are shared between +threads and need to be accessed using atomic operations. -In the future, we can expose an API to manually add more arenas which -is sometimes needed for embedded devices or shared memory for example. +Currently arenas are only used to for huge OS page (1GiB) reservations, +otherwise it delegates to direct allocation from the OS. +In the future, we can expose an API to manually add more kinds of arenas +which is sometimes needed for embedded devices or shared memory for example. +(We can also employ this with WASI or `sbrk` systems to reserve large arenas + on demand and be able to reuse them efficiently). The arena allocation needs to be thread safe and we use an atomic bitmap to allocate. The current implementation of the bitmap can @@ -48,10 +52,6 @@ int _mi_os_numa_node_count(void); #define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 16MiB #define MI_MAX_ARENAS (64) // not more than 256 (since we use 8 bits in the memid) -// Block info: bit 0 contains the `in_use` bit, the upper bits the -// size in count of arena blocks. -typedef uintptr_t mi_block_info_t; - // A memory arena descriptor typedef struct mi_arena_s { uint8_t* start; // the start of the memory area @@ -61,8 +61,8 @@ typedef struct mi_arena_s { bool is_zero_init; // is the arena zero initialized? bool is_large; // large OS page allocated volatile _Atomic(uintptr_t) search_idx; // optimization to start the search for free blocks - mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? - mi_bitmap_field_t blocks_map[1]; // bitmap of in-use blocks + mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? + mi_bitmap_field_t blocks_map[1]; // bitmap of in-use blocks } mi_arena_t; @@ -81,6 +81,7 @@ static _Atomic(uintptr_t) mi_arena_count; // = 0 static size_t mi_memid_create(size_t arena_index, mi_bitmap_index_t bitmap_index) { mi_assert_internal(arena_index < 0xFE); + mi_assert_internal(((bitmap_index << 8) >> 8) == bitmap_index); // no overflow? return ((bitmap_index << 8) | ((arena_index+1) & 0xFF)); } @@ -90,30 +91,25 @@ static void mi_memid_indices(size_t memid, size_t* arena_index, mi_bitmap_index_ *bitmap_index = (memid >> 8); } - -static size_t mi_arena_block_count_of_size(size_t size) { - const size_t asize = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); - const size_t bcount = asize / MI_ARENA_BLOCK_SIZE; - return bcount; +static size_t mi_block_count_of_size(size_t size) { + return _mi_divide_up(size, MI_ARENA_BLOCK_SIZE); } /* ----------------------------------------------------------- Thread safe allocation in an arena ----------------------------------------------------------- */ -static void* mi_arena_alloc(mi_arena_t* arena, size_t blocks, bool* is_zero, mi_bitmap_index_t* bitmap_idx) +static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx) { const size_t fcount = arena->field_count; size_t idx = mi_atomic_read(&arena->search_idx); // start from last search for (size_t visited = 0; visited < fcount; visited++, idx++) { if (idx >= fcount) idx = 0; // wrap around if (mi_bitmap_try_claim_field(arena->blocks_map, idx, blocks, bitmap_idx)) { - // claimed it! set the dirty bits - *is_zero = mi_bitmap_claim(arena->blocks_dirty, fcount, blocks, *bitmap_idx); mi_atomic_write(&arena->search_idx, idx); // start search from here next time - return (arena->start + (*bitmap_idx)*MI_ARENA_BLOCK_SIZE); + return true; } } - return NULL; + return false; } @@ -125,13 +121,15 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n bool* commit, bool* large, bool* is_zero, size_t* memid) { mi_bitmap_index_t bitmap_index; - void* p = mi_arena_alloc(arena, needed_bcount, is_zero, &bitmap_index); - if (p != NULL) { - *memid = mi_memid_create(arena_index, bitmap_index); - *commit = true; // TODO: support commit on demand? - *large = arena->is_large; + if (mi_arena_alloc(arena, needed_bcount, &bitmap_index)) { + // claimed it! set the dirty bits (todo: no need for an atomic op here?) + *is_zero = mi_bitmap_claim(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index); + *memid = mi_memid_create(arena_index, bitmap_index); + *commit = true; // TODO: support commit on demand? + *large = arena->is_large; + return (arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE)); } - return p; + return NULL; } void* _mi_arena_alloc_aligned(size_t size, size_t alignment, @@ -140,7 +138,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, { mi_assert_internal(memid != NULL && tld != NULL); mi_assert_internal(size > 0); - *memid = MI_MEMID_OS; + *memid = MI_MEMID_OS; *is_zero = false; bool default_large = false; if (large==NULL) large = &default_large; // ensure `large != NULL` @@ -151,7 +149,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size <= MI_ARENA_MAX_OBJ_SIZE && size >= MI_ARENA_MIN_OBJ_SIZE) { - const size_t bcount = mi_arena_block_count_of_size(size); + const size_t bcount = mi_block_count_of_size(size); const int numa_node = _mi_os_numa_node(tld); // current numa node mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE); @@ -221,7 +219,7 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) { _mi_fatal_error("trying to free from non-existent arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid); return; } - const size_t blocks = mi_arena_block_count_of_size(size); + const size_t blocks = mi_block_count_of_size(size); bool ones = mi_bitmap_unclaim(arena->blocks_map, arena->field_count, blocks, bitmap_idx); if (!ones) { _mi_fatal_error("trying to free an already freed block: %p, size %zu\n", p, size); @@ -268,7 +266,7 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec } _mi_verbose_message("reserved %zu gb huge pages\n", pages_reserved); - size_t bcount = mi_arena_block_count_of_size(hsize); + size_t bcount = mi_block_count_of_size(hsize); size_t fields = (bcount + MI_BITMAP_FIELD_BITS - 1) / MI_BITMAP_FIELD_BITS; size_t asize = sizeof(mi_arena_t) + (2*fields*sizeof(mi_bitmap_field_t)); mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS? @@ -284,6 +282,8 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec arena->is_zero_init = true; arena->search_idx = 0; arena->blocks_dirty = &arena->blocks_map[bcount]; + // the bitmaps are already zero initialized due to os_alloc + // just claim leftover blocks if needed size_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount; if (post > 0) { // don't use leftover bits at the end diff --git a/src/bitmap.inc.c b/src/bitmap.inc.c index aeb185d1..19e6bbb8 100644 --- a/src/bitmap.inc.c +++ b/src/bitmap.inc.c @@ -1,41 +1,30 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2019, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +/* ---------------------------------------------------------------------------- +This file is meant to be included in other files for efficiency. +It implements a bitmap that can set/reset sequences of bits atomically +and is used to concurrently claim memory ranges. + +A bitmap is an array of fields where each field is a machine word (`uintptr_t`) + +A current limitation is that the bit sequences cannot cross fields +and that the sequence must be smaller or equal to the bits in a field. +---------------------------------------------------------------------------- */ #pragma once -#ifndef MI_BITMAP_H -#define MI_BITMAP_H +#ifndef MI_BITMAP_C +#define MI_BITMAP_C #include "mimalloc.h" #include "mimalloc-internal.h" -// Use bit scan forward to quickly find the first zero bit if it is available -#if defined(_MSC_VER) -#define MI_HAVE_BITSCAN -#include -static inline size_t mi_bsf(uintptr_t x) { - if (x==0) return 8*MI_INTPTR_SIZE; - DWORD idx; - MI_64(_BitScanForward)(&idx, x); - return idx; -} -static inline size_t mi_bsr(uintptr_t x) { - if (x==0) return 8*MI_INTPTR_SIZE; - DWORD idx; - MI_64(_BitScanReverse)(&idx, x); - return idx; -} -#elif defined(__GNUC__) || defined(__clang__) -#define MI_HAVE_BITSCAN -#if (INTPTR_MAX == LONG_MAX) -# define MI_L(x) x##l -#else -# define MI_L(x) x##ll -#endif -static inline size_t mi_bsf(uintptr_t x) { - return (x==0 ? 8*MI_INTPTR_SIZE : MI_L(__builtin_ctz)(x)); -} -static inline size_t mi_bsr(uintptr_t x) { - return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - MI_L(__builtin_clz)(x)); -} -#endif - +/* ----------------------------------------------------------- + Bitmap definition +----------------------------------------------------------- */ #define MI_BITMAP_FIELD_BITS (8*MI_INTPTR_SIZE) #define MI_BITMAP_FIELD_FULL (~((uintptr_t)0)) // all bits set @@ -63,14 +52,59 @@ static inline size_t mi_bitmap_index_bit_in_field(mi_bitmap_index_t bitmap_idx) return (bitmap_idx % MI_BITMAP_FIELD_BITS); } +// Get the full bit index +static inline size_t mi_bitmap_index_bit(mi_bitmap_index_t bitmap_idx) { + return bitmap_idx; +} + + // The bit mask for a given number of blocks at a specified bit index. static uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) { mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS); return ((((uintptr_t)1 << count) - 1) << bitidx); } -// Try to atomically claim a sequence of `count` bits in a single field at `idx` in `bitmap`. -// Returns `true` on success. + +/* ----------------------------------------------------------- + Use bit scan forward/reverse to quickly find the first zero bit if it is available +----------------------------------------------------------- */ +#if defined(_MSC_VER) +#define MI_HAVE_BITSCAN +#include +static inline size_t mi_bsf(uintptr_t x) { + if (x==0) return 8*MI_INTPTR_SIZE; + DWORD idx; + MI_64(_BitScanForward)(&idx, x); + return idx; +} +static inline size_t mi_bsr(uintptr_t x) { + if (x==0) return 8*MI_INTPTR_SIZE; + DWORD idx; + MI_64(_BitScanReverse)(&idx, x); + return idx; +} +#elif defined(__GNUC__) || defined(__clang__) +#include // LONG_MAX +#define MI_HAVE_BITSCAN +#if (INTPTR_MAX == LONG_MAX) +# define MI_L(x) x##l +#else +# define MI_L(x) x##ll +#endif +static inline size_t mi_bsf(uintptr_t x) { + return (x==0 ? 8*MI_INTPTR_SIZE : MI_L(__builtin_ctz)(x)); +} +static inline size_t mi_bsr(uintptr_t x) { + return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - MI_L(__builtin_clz)(x)); +} +#endif + +/* ----------------------------------------------------------- + Claim a bit sequence atomically +----------------------------------------------------------- */ + +// Try to atomically claim a sequence of `count` bits in a single +// field at `idx` in `bitmap`. Returns `true` on success. static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx) { mi_assert_internal(bitmap_idx != NULL); @@ -93,7 +127,7 @@ static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, con while (bitidx <= bitidx_max) { if ((map & m) == 0) { // are the mask bits free at bitidx? mi_assert_internal((m >> bitidx) == mask); // no overflow? - uintptr_t newmap = map | m; + const uintptr_t newmap = map | m; mi_assert_internal((newmap^map) >> bitidx == mask); if (!mi_atomic_cas_weak(field, newmap, map)) { // TODO: use strong cas here? // no success, another thread claimed concurrently.. keep going @@ -109,10 +143,10 @@ static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, con else { // on to the next bit range #ifdef MI_HAVE_BITSCAN - size_t shift = (count == 1 ? 1 : mi_bsr(map & m) - bitidx + 1); + const size_t shift = (count == 1 ? 1 : mi_bsr(map & m) - bitidx + 1); mi_assert_internal(shift > 0 && shift <= count); #else - size_t shift = 1; + const size_t shift = 1; #endif bitidx += shift; m <<= shift; diff --git a/src/memory.c b/src/memory.c index 29e0e412..bdbf1e48 100644 --- a/src/memory.c +++ b/src/memory.c @@ -16,10 +16,10 @@ We need this memory layer between the raw OS calls because of: 1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order to reuse memory effectively. 2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of - an OS allocation/free is still (much) too expensive relative to the accesses in that - object :-( (`malloc-large` tests this). This means we need a cheaper way to - reuse memory. -3. This layer can help with a NUMA aware allocation in the future. + an OS allocation/free is still (much) too expensive relative to the accesses + in that object :-( (`malloc-large` tests this). This means we need a cheaper + way to reuse memory. +3. This layer allows for NUMA aware allocation. Possible issues: - (2) can potentially be addressed too with a small cache per thread which is much @@ -47,8 +47,6 @@ bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats); bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); -//void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld); -//void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats); // arena.c void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats); @@ -58,18 +56,18 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, boo // Constants #if (MI_INTPTR_SIZE==8) -#define MI_HEAP_REGION_MAX_SIZE (256 * GiB) // 16KiB for the region map +#define MI_HEAP_REGION_MAX_SIZE (256 * GiB) // 40KiB for the region map #elif (MI_INTPTR_SIZE==4) -#define MI_HEAP_REGION_MAX_SIZE (3 * GiB) // 196 bytes for the region map +#define MI_HEAP_REGION_MAX_SIZE (3 * GiB) // ~ KiB for the region map #else #error "define the maximum heap space allowed for regions on this platform" #endif #define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE -#define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS) // 256MiB +#define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS) // 256MiB (64MiB on 32 bits) #define MI_REGION_MAX_ALLOC_SIZE (MI_REGION_SIZE/4) // 64MiB -#define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE) +#define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE) // 1024 (48 on 32 bits) // Region info is a pointer to the memory region and two bits for @@ -95,7 +93,7 @@ typedef struct mem_region_s { size_t arena_memid; // if allocated from a (huge page) arena } mem_region_t; -// The region map; 16KiB for a 256GiB HEAP_REGION_MAX +// The region map static mem_region_t regions[MI_REGION_MAX]; // A bit mask per region for its claimed MI_SEGMENT_SIZE blocks. @@ -173,7 +171,7 @@ static bool mi_region_ensure_allocated(size_t idx, bool allow_large, mi_region_i bool region_large = allow_large; bool is_zero = false; size_t arena_memid = 0; - void* start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, ®ion_commit, ®ion_large, &is_zero, &arena_memid, tld); + void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, ®ion_commit, ®ion_large, &is_zero, &arena_memid, tld); mi_assert_internal(!(region_large && !allow_large)); if (start == NULL) { @@ -183,35 +181,31 @@ static bool mi_region_ensure_allocated(size_t idx, bool allow_large, mi_region_i } // set the newly allocated region + // try to initialize any region up to 4 beyond the current one in + // care multiple threads are doing this concurrently (common at startup) info = mi_region_info_create(start, region_large, region_commit); - if (mi_atomic_cas_strong(®ions[idx].info, info, 0)) { - // update the region count - regions[idx].arena_memid = arena_memid; - mi_atomic_write(®ions[idx].numa_node, _mi_os_numa_node(tld) + 1); - mi_atomic_write(®ions_dirty[idx], is_zero ? 0 : ~((uintptr_t)0)); - mi_atomic_increment(®ions_count); - } - else { - // failed, another thread allocated just before us! - // we assign it to a later slot instead (up to 4 tries). - for (size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) { - if (mi_atomic_cas_strong(®ions[idx+i].info, info, 0)) { - regions[idx+i].arena_memid = arena_memid; - mi_atomic_write(®ions[idx+i].numa_node, _mi_os_numa_node(tld) + 1); - mi_atomic_write(®ions_dirty[idx], is_zero ? 0 : ~((uintptr_t)0)); - mi_atomic_increment(®ions_count); - start = NULL; - break; - } + bool claimed = false; + for (size_t i = 0; i <= 4 && idx + i < MI_REGION_MAX && !claimed; i++) { + if (!is_zero) { + // set dirty bits before CAS; this might race with a zero block but that is ok. + // (but writing before cas prevents a concurrent allocation to assume it is not dirty) + mi_atomic_write(®ions_dirty[idx+i], MI_BITMAP_FIELD_FULL); } - if (start != NULL) { - // free it if we didn't succeed to save it to some other region - _mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats); - // _mi_os_free_ex(start, MI_REGION_SIZE, region_commit, tld->stats); + if (mi_atomic_cas_strong(®ions[idx+i].info, info, 0)) { + // claimed! + regions[idx+i].arena_memid = arena_memid; + mi_atomic_write(®ions[idx+i].numa_node, _mi_os_numa_node(tld) + 1); + mi_atomic_increment(®ions_count); + claimed = true; } - // and continue with the memory at our index - info = mi_atomic_read(®ions[idx].info); } + if (!claimed) { + // free our OS allocation if we didn't succeed to store it in some region + _mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats); + } + // continue with the actual info at our index in case another thread was quicker with the allocation + info = mi_atomic_read(®ions[idx].info); + mi_assert_internal(info != 0); } mi_assert_internal(info == mi_atomic_read(®ions[idx].info)); mi_assert_internal(info != 0); @@ -290,19 +284,21 @@ static bool mi_region_is_suitable(int numa_node, size_t idx, bool commit, bool a int rnode = ((int)mi_atomic_read_relaxed(®ions->numa_node)) - 1; if (rnode != numa_node) return false; } - if (mi_unlikely(!(commit || allow_large))) { - // otherwise skip incompatible regions if possible. - // this is not guaranteed due to multiple threads allocating at the same time but - // that's ok. In secure mode, large is never allowed for any thread, so that works out; - // otherwise we might just not be able to reset/decommit individual pages sometimes. - mi_region_info_t info = mi_atomic_read_relaxed(®ions->info); - bool is_large; - bool is_committed; - void* start = mi_region_info_read(info, &is_large, &is_committed); - bool ok = (start == NULL || (commit || !is_committed) || (allow_large || !is_large)); // Todo: test with one bitmap operation? - if (!ok) return false; - } - return true; + if (commit && allow_large) return true; // always ok + + // otherwise skip incompatible regions if possible. + // this is not guaranteed due to multiple threads allocating at the same time but + // that's ok. In secure mode, large is never allowed for any thread, so that works out; + // otherwise we might just not be able to reset/decommit individual pages sometimes. + mi_region_info_t info = mi_atomic_read_relaxed(®ions->info); + bool is_large; + bool is_committed; + void* start = mi_region_info_read(info, &is_large, &is_committed); + // note: we also skip if commit is false and the region is committed, + // that is a bit strong but prevents allocation of eager delayed segments in + // committed memory + bool ok = (start == NULL || (commit || !is_committed) || (allow_large || !is_large)); // Todo: test with one bitmap operation? + return ok; } // Try to allocate `blocks` in a `region` at `idx` of a given `size`. Does a quick check before trying to claim. diff --git a/src/page.c b/src/page.c index 32b68edb..c5b6e370 100644 --- a/src/page.c +++ b/src/page.c @@ -497,8 +497,10 @@ static void mi_page_free_list_extend_secure(mi_heap_t* heap, mi_page_t* page, si static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* page, size_t extend, mi_stats_t* stats) { UNUSED(stats); + #if (MI_SECURE <= 2) mi_assert_internal(page->free == NULL); mi_assert_internal(page->local_free == NULL); + #endif mi_assert_internal(page->capacity + extend <= page->reserved); void* page_area = _mi_page_start(_mi_page_segment(page), page, NULL ); size_t bsize = page->block_size; diff --git a/test/test-stress.c b/test/test-stress.c index bb428072..d80cb1a4 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -66,7 +66,9 @@ static void* alloc_items(size_t items, random_t r) { if (chance(1, r)) items *= 100; // 1% huge objects; if (items==40) items++; // pthreads uses that size for stack increases uintptr_t* p = (uintptr_t*)mi_malloc(items*sizeof(uintptr_t)); - for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie; + if (p != NULL) { + for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie; + } return p; } From 27f1a8b3d24acf0ff0bcbdacfbecd21437fb450e Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 7 Nov 2019 10:35:30 -0800 Subject: [PATCH 026/179] fix avg display; set secure default to 0` --- include/mimalloc-types.h | 2 +- src/stats.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index ddbe72f3..3f5e4e27 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -29,7 +29,7 @@ terms of the MIT license. A copy of the license can be found in the file // #define MI_SECURE 4 // experimental, may be more expensive: checks for double free. (cmake -DMI_SECURE_FULL=ON) #if !defined(MI_SECURE) -#define MI_SECURE 4 +#define MI_SECURE 0 #endif // Define MI_DEBUG for debug mode diff --git a/src/stats.c b/src/stats.c index 011fab64..cb6d8866 100644 --- a/src/stats.c +++ b/src/stats.c @@ -206,7 +206,7 @@ static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* const int64_t avg_tens = (stat->count == 0 ? 0 : (stat->total*10 / stat->count)); const long avg_whole = (long)(avg_tens/10); const long avg_frac1 = (long)(avg_tens%10); - _mi_fprintf(out, "%10s: %5ld.%ld avg %ld %ld\n", msg, avg_whole, avg_frac1); + _mi_fprintf(out, "%10s: %5ld.%ld avg\n", msg, avg_whole, avg_frac1); } From 13f5e6e43e9aae4043d9acc94fac67746fcd9bb4 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 7 Nov 2019 18:09:30 -0800 Subject: [PATCH 027/179] fix numa node check in regions --- src/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/memory.c b/src/memory.c index bdbf1e48..fb3f5093 100644 --- a/src/memory.c +++ b/src/memory.c @@ -282,7 +282,7 @@ static bool mi_region_is_suitable(int numa_node, size_t idx, bool commit, bool a if (m == MI_BITMAP_FIELD_FULL) return false; if (numa_node >= 0) { // use negative numa node to always succeed int rnode = ((int)mi_atomic_read_relaxed(®ions->numa_node)) - 1; - if (rnode != numa_node) return false; + if (rnode >= 0 && rnode != numa_node) return false; } if (commit && allow_large) return true; // always ok From 7b72a4cd50782563104e28becb7e181e8978449f Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Fri, 8 Nov 2019 11:55:43 -0800 Subject: [PATCH 028/179] fix region suitable bug --- src/memory.c | 6 +++--- test/test-stress.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/memory.c b/src/memory.c index bdbf1e48..f8798d99 100644 --- a/src/memory.c +++ b/src/memory.c @@ -281,8 +281,8 @@ static bool mi_region_is_suitable(int numa_node, size_t idx, bool commit, bool a uintptr_t m = mi_atomic_read_relaxed(®ions_map[idx]); if (m == MI_BITMAP_FIELD_FULL) return false; if (numa_node >= 0) { // use negative numa node to always succeed - int rnode = ((int)mi_atomic_read_relaxed(®ions->numa_node)) - 1; - if (rnode != numa_node) return false; + int rnode = ((int)mi_atomic_read_relaxed(®ions[idx].numa_node)) - 1; + if (rnode >= 0 && rnode != numa_node) return false; } if (commit && allow_large) return true; // always ok @@ -290,7 +290,7 @@ static bool mi_region_is_suitable(int numa_node, size_t idx, bool commit, bool a // this is not guaranteed due to multiple threads allocating at the same time but // that's ok. In secure mode, large is never allowed for any thread, so that works out; // otherwise we might just not be able to reset/decommit individual pages sometimes. - mi_region_info_t info = mi_atomic_read_relaxed(®ions->info); + mi_region_info_t info = mi_atomic_read_relaxed(®ions[idx].info); bool is_large; bool is_committed; void* start = mi_region_info_read(info, &is_large, &is_committed); diff --git a/test/test-stress.c b/test/test-stress.c index d80cb1a4..be2a9c67 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -18,7 +18,7 @@ terms of the MIT license. // argument defaults static int THREADS = 32; // more repeatable if THREADS <= #processors -static int N = 20; // scaling factor +static int N = 40; // scaling factor // static int THREADS = 8; // more repeatable if THREADS <= #processors // static int N = 100; // scaling factor From 9f08ddd0d0d2909998d71bf6da9bce2b048d851e Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sat, 9 Nov 2019 19:30:53 -0800 Subject: [PATCH 029/179] refactor regions; add commit tracking on a segment basis --- src/arena.c | 9 +- src/bitmap.inc.c | 14 +- src/memory.c | 382 ++++++++++++++++++++--------------------------- src/segment.c | 2 +- 4 files changed, 181 insertions(+), 226 deletions(-) diff --git a/src/arena.c b/src/arena.c index 8feec89f..1b6cf4a4 100644 --- a/src/arena.c +++ b/src/arena.c @@ -123,7 +123,7 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n mi_bitmap_index_t bitmap_index; if (mi_arena_alloc(arena, needed_bcount, &bitmap_index)) { // claimed it! set the dirty bits (todo: no need for an atomic op here?) - *is_zero = mi_bitmap_claim(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index); + *is_zero = mi_bitmap_claim(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL); *memid = mi_memid_create(arena_index, bitmap_index); *commit = true; // TODO: support commit on demand? *large = arena->is_large; @@ -181,7 +181,10 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, // finally, fall back to the OS *is_zero = true; - *memid = MI_MEMID_OS; + *memid = MI_MEMID_OS; + if (*large) { + *large = mi_option_is_enabled(mi_option_large_os_pages); // try large OS pages only if enabled and allowed + } return _mi_os_alloc_aligned(size, alignment, *commit, large, tld); } @@ -288,7 +291,7 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec if (post > 0) { // don't use leftover bits at the end mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post); - mi_bitmap_claim(arena->blocks_map, fields, post, postidx); + mi_bitmap_claim(arena->blocks_map, fields, post, postidx, NULL); } mi_arena_add(arena); diff --git a/src/bitmap.inc.c b/src/bitmap.inc.c index 19e6bbb8..3847e712 100644 --- a/src/bitmap.inc.c +++ b/src/bitmap.inc.c @@ -61,6 +61,7 @@ static inline size_t mi_bitmap_index_bit(mi_bitmap_index_t bitmap_idx) { // The bit mask for a given number of blocks at a specified bit index. static uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) { mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS); + if (count == MI_BITMAP_FIELD_BITS) return MI_BITMAP_FIELD_FULL; return ((((uintptr_t)1 << count) - 1) << bitidx); } @@ -183,14 +184,25 @@ static inline bool mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, s // Set `count` bits at `bitmap_idx` to 1 atomically // Returns `true` if all `count` bits were 0 previously -static inline bool mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { +static inline bool mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero) { const size_t idx = mi_bitmap_index_field(bitmap_idx); const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); const uintptr_t mask = mi_bitmap_mask_(count, bitidx); mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); // mi_assert_internal((bitmap[idx] & mask) == 0); uintptr_t prev = mi_atomic_or(&bitmap[idx], mask); + if (any_zero != NULL) *any_zero = ((prev & mask) != mask); return ((prev & mask) == 0); } +// Returns `true` if all `count` bits were 1 +static inline bool mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + const size_t idx = mi_bitmap_index_field(bitmap_idx); + const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); + const uintptr_t mask = mi_bitmap_mask_(count, bitidx); + mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); + // mi_assert_internal((bitmap[idx] & mask) == 0); + return ((mi_atomic_read(&bitmap[idx]) & mask) == mask); +} + #endif \ No newline at end of file diff --git a/src/memory.c b/src/memory.c index f8798d99..a1f94e18 100644 --- a/src/memory.c +++ b/src/memory.c @@ -65,10 +65,11 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, boo #define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE +#define MI_REGION_MAX_BLOCKS MI_BITMAP_FIELD_BITS #define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS) // 256MiB (64MiB on 32 bits) -#define MI_REGION_MAX_ALLOC_SIZE (MI_REGION_SIZE/4) // 64MiB #define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE) // 1024 (48 on 32 bits) - +#define MI_REGION_MAX_OBJ_BLOCKS (MI_REGION_MAX_BLOCKS/4) // 64MiB +#define MI_REGION_MAX_OBJ_SIZE (MI_REGION_MAX_OBJ_BLOCKS*MI_SEGMENT_SIZE) // Region info is a pointer to the memory region and two bits for // its flags: is_large, and is_committed. @@ -88,20 +89,16 @@ static inline void* mi_region_info_read(mi_region_info_t info, bool* is_large, b // A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with // a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block. typedef struct mem_region_s { - volatile _Atomic(mi_region_info_t) info; // start of the memory area (and flags) - volatile _Atomic(uintptr_t) numa_node; // associated numa node + 1 (so 0 is no association) - size_t arena_memid; // if allocated from a (huge page) arena + volatile _Atomic(mi_region_info_t) info; // start of the memory area (and flags) + volatile _Atomic(uintptr_t) numa_node; // associated numa node + 1 (so 0 is no association) + mi_bitmap_field_t in_use; + mi_bitmap_field_t dirty; + size_t arena_memid; // if allocated from a (huge page) arena } mem_region_t; // The region map static mem_region_t regions[MI_REGION_MAX]; -// A bit mask per region for its claimed MI_SEGMENT_SIZE blocks. -static mi_bitmap_field_t regions_map[MI_REGION_MAX]; - -// A bit mask per region to track which blocks are dirty (= potentially written to) -static mi_bitmap_field_t regions_dirty[MI_REGION_MAX]; - // Allocated regions static volatile _Atomic(uintptr_t) regions_count; // = 0; @@ -112,8 +109,7 @@ Utility functions // Blocks (of 4MiB) needed for the given size. static size_t mi_region_block_count(size_t size) { - mi_assert_internal(size <= MI_REGION_MAX_ALLOC_SIZE); - return (size + MI_SEGMENT_SIZE - 1) / MI_SEGMENT_SIZE; + return _mi_divide_up(size, MI_SEGMENT_SIZE); } // Return a rounded commit/reset size such that we don't fragment large OS pages into small ones. @@ -134,8 +130,11 @@ bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { } -static size_t mi_memid_create(mi_bitmap_index_t bitmap_idx) { - return bitmap_idx<<1; +static size_t mi_memid_create(mem_region_t* region, mi_bitmap_index_t bit_idx) { + mi_assert_internal(bit_idx < MI_BITMAP_FIELD_BITS); + size_t idx = region - regions; + mi_assert_internal(®ions[idx] == region); + return (idx*MI_BITMAP_FIELD_BITS + bit_idx)<<1; } static size_t mi_memid_create_from_arena(size_t arena_memid) { @@ -146,177 +145,149 @@ static bool mi_memid_is_arena(size_t id) { return ((id&1)==1); } -static bool mi_memid_indices(size_t id, mi_bitmap_index_t* bitmap_idx, size_t* arena_memid) { +static bool mi_memid_indices(size_t id, mem_region_t** region, mi_bitmap_index_t* bit_idx, size_t* arena_memid) { if (mi_memid_is_arena(id)) { *arena_memid = (id>>1); return true; } else { - *bitmap_idx = (mi_bitmap_index_t)(id>>1); + size_t idx = (id >> 1) / MI_BITMAP_FIELD_BITS; + *bit_idx = (mi_bitmap_index_t)(id>>1) % MI_BITMAP_FIELD_BITS; + *region = ®ions[idx]; return false; } } /* ---------------------------------------------------------------------------- - Ensure a region is allocated from the OS (or an arena) + Allocate a region is allocated from the OS (or an arena) -----------------------------------------------------------------------------*/ -static bool mi_region_ensure_allocated(size_t idx, bool allow_large, mi_region_info_t* pinfo, mi_os_tld_t* tld) +static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) { - // ensure the region is reserved - mi_region_info_t info = mi_atomic_read(®ions[idx].info); - if (mi_unlikely(info == 0)) - { - bool region_commit = mi_option_is_enabled(mi_option_eager_region_commit); - bool region_large = allow_large; - bool is_zero = false; - size_t arena_memid = 0; - void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, ®ion_commit, ®ion_large, &is_zero, &arena_memid, tld); - mi_assert_internal(!(region_large && !allow_large)); + // not out of regions yet? + if (mi_atomic_read_relaxed(®ions_count) >= MI_REGION_MAX - 1) return false; - if (start == NULL) { - // failure to allocate from the OS! fail - *pinfo = 0; - return false; - } - - // set the newly allocated region - // try to initialize any region up to 4 beyond the current one in - // care multiple threads are doing this concurrently (common at startup) - info = mi_region_info_create(start, region_large, region_commit); - bool claimed = false; - for (size_t i = 0; i <= 4 && idx + i < MI_REGION_MAX && !claimed; i++) { - if (!is_zero) { - // set dirty bits before CAS; this might race with a zero block but that is ok. - // (but writing before cas prevents a concurrent allocation to assume it is not dirty) - mi_atomic_write(®ions_dirty[idx+i], MI_BITMAP_FIELD_FULL); - } - if (mi_atomic_cas_strong(®ions[idx+i].info, info, 0)) { - // claimed! - regions[idx+i].arena_memid = arena_memid; - mi_atomic_write(®ions[idx+i].numa_node, _mi_os_numa_node(tld) + 1); - mi_atomic_increment(®ions_count); - claimed = true; - } - } - if (!claimed) { - // free our OS allocation if we didn't succeed to store it in some region - _mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats); - } - // continue with the actual info at our index in case another thread was quicker with the allocation - info = mi_atomic_read(®ions[idx].info); - mi_assert_internal(info != 0); + // try to allocate a fresh region from the OS + bool region_commit = (commit && mi_option_is_enabled(mi_option_eager_region_commit)); + bool region_large = (commit && allow_large); + bool is_zero = false; + size_t arena_memid = 0; + void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, ®ion_commit, ®ion_large, &is_zero, &arena_memid, tld); + if (start == NULL) return false; + mi_assert_internal(!(region_large && !allow_large)); + + // claim a fresh slot + const uintptr_t idx = mi_atomic_increment(®ions_count); + if (idx >= MI_REGION_MAX) { + mi_atomic_decrement(®ions_count); + _mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats); + return false; } - mi_assert_internal(info == mi_atomic_read(®ions[idx].info)); - mi_assert_internal(info != 0); - *pinfo = info; + + // allocated, initialize and claim the initial blocks + mem_region_t* r = ®ions[idx]; + r->numa_node = _mi_os_numa_node(tld) + 1; + r->arena_memid = arena_memid; + *bit_idx = 0; + mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL); + mi_atomic_write(&r->info, mi_region_info_create(start, region_large, region_commit)); // now make it available to others + *region = r; + return true; +} + +/* ---------------------------------------------------------------------------- + Try to claim blocks in suitable regions +-----------------------------------------------------------------------------*/ + +static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool commit, bool allow_large ) { + // initialized at all? + mi_region_info_t info = mi_atomic_read_relaxed(®ion->info); + if (info==0) return false; + + // numa correct + if (numa_node >= 0) { // use negative numa node to always succeed + int rnode = ((int)mi_atomic_read_relaxed(®ion->numa_node)) - 1; + if (rnode >= 0 && rnode != numa_node) return false; + } + + // note: we also skip if commit is false and the region is committed, + // that is a bit strong but prevents allocation of eager-delayed segments in an eagerly committed region + bool is_large; + bool is_committed; + mi_region_info_read(info, &is_large, &is_committed); + + if (!commit && is_committed) return false; + if (!allow_large && is_large) return false; return true; } -/* ---------------------------------------------------------------------------- - Commit blocks ------------------------------------------------------------------------------*/ - -static void* mi_region_commit_blocks(mi_bitmap_index_t bitmap_idx, mi_region_info_t info, size_t blocks, size_t size, bool* commit, bool* is_large, bool* is_zero, mi_os_tld_t* tld) +static bool mi_region_try_claim(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) { - // set dirty bits - *is_zero = mi_bitmap_claim(regions_dirty, MI_REGION_MAX, blocks, bitmap_idx); + // try all regions for a free slot + const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld)); + const size_t count = mi_atomic_read(®ions_count); + size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? + for (size_t visited = 0; visited < count; visited++, idx++) { + if (idx >= count) idx = 0; // wrap around + mem_region_t* r = ®ions[idx]; + if (mi_region_is_suitable(r, numa_node, commit, allow_large)) { + if (mi_bitmap_try_claim_field(&r->in_use, 0, blocks, bit_idx)) { + tld->region_idx = idx; // remember the last found position + *region = r; + return true; + } + } + } + return false; +} - // Commit the blocks to memory + +static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) +{ + mi_assert_internal(blocks <= MI_BITMAP_FIELD_BITS); + mem_region_t* region; + mi_bitmap_index_t bit_idx; + // first try to claim in existing regions + if (!mi_region_try_claim(blocks, *commit, *is_large, ®ion, &bit_idx, tld)) { + // otherwise try to allocate a fresh region + if (!mi_region_try_alloc_os(blocks, *commit, *is_large, ®ion, &bit_idx, tld)) { + // out of regions or memory + return NULL; + } + } + + // found a region and claimed `blocks` at `bit_idx` + mi_assert_internal(region != NULL); + mi_assert_internal(mi_bitmap_is_claimed(®ion->in_use, 1, blocks, bit_idx)); + + mi_region_info_t info = mi_atomic_read(®ion->info); bool region_is_committed = false; bool region_is_large = false; void* start = mi_region_info_read(info, ®ion_is_large, ®ion_is_committed); mi_assert_internal(!(region_is_large && !*is_large)); - mi_assert_internal(start!=NULL); + mi_assert_internal(start != NULL); - void* blocks_start = (uint8_t*)start + (mi_bitmap_index_bit_in_field(bitmap_idx) * MI_SEGMENT_SIZE); - if (*commit && !region_is_committed) { - // ensure commit - bool commit_zero = false; - _mi_os_commit(blocks_start, mi_good_commit_size(size), &commit_zero, tld->stats); // only commit needed size (unless using large OS pages) - if (commit_zero) *is_zero = true; - } - else if (!*commit && region_is_committed) { - // but even when no commit is requested, we might have committed anyway (in a huge OS page for example) - *commit = true; - } - - // and return the allocation - mi_assert_internal(blocks_start != NULL); + bool any_zero = false; + *is_zero = mi_bitmap_claim(®ion->dirty, 1, blocks, bit_idx, &any_zero); + if (!mi_option_is_enabled(mi_option_eager_commit)) any_zero = true; // if no eager commit, even dirty segments may be partially committed *is_large = region_is_large; - return blocks_start; + *memid = mi_memid_create(region, bit_idx); + void* p = (uint8_t*)start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE); + if (*commit && !region_is_committed && any_zero) { // want to commit, but not yet fully committed? + // ensure commit + _mi_os_commit(p, blocks * MI_SEGMENT_SIZE, is_zero, tld->stats); + } + else { + *commit = region_is_committed || !any_zero; + } + + + // and return the allocation + mi_assert_internal(p != NULL); + return p; } -/* ---------------------------------------------------------------------------- - Claim and allocate blocks in a region ------------------------------------------------------------------------------*/ - -static bool mi_region_alloc_blocks( - size_t idx, size_t blocks, size_t size, - bool* commit, bool* allow_large, bool* is_zero, - void** p, size_t* id, mi_os_tld_t* tld) -{ - mi_bitmap_index_t bitmap_idx; - if (!mi_bitmap_try_claim_field(regions_map, idx, blocks, &bitmap_idx)) { - return true; // no error, but also no success - } - mi_region_info_t info; - if (!mi_region_ensure_allocated(idx,*allow_large,&info,tld)) { - // failed to allocate region memory, unclaim the bits and fail - mi_bitmap_unclaim(regions_map, MI_REGION_MAX, blocks, bitmap_idx); - return false; - } - *p = mi_region_commit_blocks(bitmap_idx,info,blocks,size,commit,allow_large,is_zero,tld); - *id = mi_memid_create(bitmap_idx); - return true; -} - - -/* ---------------------------------------------------------------------------- - Try to allocate blocks in suitable regions ------------------------------------------------------------------------------*/ - -static bool mi_region_is_suitable(int numa_node, size_t idx, bool commit, bool allow_large ) { - uintptr_t m = mi_atomic_read_relaxed(®ions_map[idx]); - if (m == MI_BITMAP_FIELD_FULL) return false; - if (numa_node >= 0) { // use negative numa node to always succeed - int rnode = ((int)mi_atomic_read_relaxed(®ions[idx].numa_node)) - 1; - if (rnode >= 0 && rnode != numa_node) return false; - } - if (commit && allow_large) return true; // always ok - - // otherwise skip incompatible regions if possible. - // this is not guaranteed due to multiple threads allocating at the same time but - // that's ok. In secure mode, large is never allowed for any thread, so that works out; - // otherwise we might just not be able to reset/decommit individual pages sometimes. - mi_region_info_t info = mi_atomic_read_relaxed(®ions[idx].info); - bool is_large; - bool is_committed; - void* start = mi_region_info_read(info, &is_large, &is_committed); - // note: we also skip if commit is false and the region is committed, - // that is a bit strong but prevents allocation of eager delayed segments in - // committed memory - bool ok = (start == NULL || (commit || !is_committed) || (allow_large || !is_large)); // Todo: test with one bitmap operation? - return ok; -} - -// Try to allocate `blocks` in a `region` at `idx` of a given `size`. Does a quick check before trying to claim. -// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written -// if the blocks were successfully claimed so ensure they are initialized to NULL/0 before the call. -// (not being able to claim is not considered an error so check for `p != NULL` afterwards). -static bool mi_region_try_alloc_blocks( - int numa_node, size_t idx, size_t blocks, size_t size, - bool* commit, bool* allow_large, bool* is_zero, - void** p, size_t* id, mi_os_tld_t* tld) -{ - // check if there are available blocks in the region.. - mi_assert_internal(idx < MI_REGION_MAX); - if (mi_region_is_suitable(numa_node, idx, *commit, *allow_large)) { - return mi_region_alloc_blocks(idx, blocks, size, commit, allow_large, is_zero, p, id, tld); - } - return true; // no error, but no success either -} /* ---------------------------------------------------------------------------- Allocation @@ -324,63 +295,35 @@ static bool mi_region_try_alloc_blocks( // Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`. // (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`) -void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, - size_t* id, mi_os_tld_t* tld) +void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { - mi_assert_internal(id != NULL && tld != NULL); + mi_assert_internal(memid != NULL && tld != NULL); mi_assert_internal(size > 0); - *id = 0; + *memid = 0; *is_zero = false; bool default_large = false; if (large==NULL) large = &default_large; // ensure `large != NULL` - - // use direct OS allocation for huge blocks or alignment - if (size > MI_REGION_MAX_ALLOC_SIZE || alignment > MI_SEGMENT_ALIGN) { - size_t arena_memid = 0; - void* p = _mi_arena_alloc_aligned(mi_good_commit_size(size), alignment, commit, large, is_zero, &arena_memid, tld); // round up size - *id = mi_memid_create_from_arena(arena_memid); - return p; - } - - // always round size to OS page size multiple (so commit/decommit go over the entire range) - // TODO: use large OS page size here? + if (size == 0) return NULL; size = _mi_align_up(size, _mi_os_page_size()); - // calculate the number of needed blocks + // allocate from regions if possible + size_t arena_memid; const size_t blocks = mi_region_block_count(size); - mi_assert_internal(blocks > 0 && blocks <= 8*MI_INTPTR_SIZE); - - // find a range of free blocks - const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld)); - void* p = NULL; - const size_t count = mi_atomic_read(®ions_count); - size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? - for (size_t visited = 0; visited < count; visited++, idx++) { - if (idx >= count) idx = 0; // wrap around - if (!mi_region_try_alloc_blocks(numa_node, idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error - if (p != NULL) break; - } - - if (p == NULL) { - // no free range in existing regions -- try to extend beyond the count.. but at most 8 regions - for (idx = count; idx < mi_atomic_read_relaxed(®ions_count) + 8 && idx < MI_REGION_MAX; idx++) { - if (!mi_region_try_alloc_blocks(numa_node, idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error - if (p != NULL) break; + if (blocks <= MI_REGION_MAX_OBJ_BLOCKS && alignment <= MI_SEGMENT_ALIGN) { + void* p = mi_region_try_alloc(blocks, commit, large, is_zero, memid, tld); + mi_assert_internal(p == NULL || (uintptr_t)p % alignment == 0); + if (p != NULL) { + if (*commit) { ((uint8_t*)p)[0] = 0; } + return p; } + _mi_warning_message("unable to allocate from region: size %zu\n", size); } - if (p == NULL) { - // we could not find a place to allocate, fall back to the os directly - _mi_warning_message("unable to allocate from region: size %zu\n", size); - size_t arena_memid = 0; - p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_zero, &arena_memid, tld); - *id = mi_memid_create_from_arena(arena_memid); - } - else { - tld->region_idx = idx; // next start of search - } - + // and otherwise fall back to the OS + void* p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_zero, &arena_memid, tld); + *memid = mi_memid_create_from_arena(arena_memid); mi_assert_internal( p == NULL || (uintptr_t)p % alignment == 0); + if (p != NULL && *commit) { ((uint8_t*)p)[0] = 0; } return p; } @@ -396,31 +339,28 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { if (p==NULL) return; if (size==0) return; size_t arena_memid = 0; - mi_bitmap_index_t bitmap_idx; - if (mi_memid_indices(id,&bitmap_idx,&arena_memid)) { + mi_bitmap_index_t bit_idx; + mem_region_t* region; + if (mi_memid_indices(id,®ion,&bit_idx,&arena_memid)) { // was a direct arena allocation, pass through _mi_arena_free(p, size, arena_memid, stats); } else { // allocated in a region - mi_assert_internal(size <= MI_REGION_MAX_ALLOC_SIZE); if (size > MI_REGION_MAX_ALLOC_SIZE) return; + mi_assert_internal(size <= MI_REGION_MAX_OBJ_SIZE); if (size > MI_REGION_MAX_OBJ_SIZE) return; // we can align the size up to page size (as we allocate that way too) // this ensures we fully commit/decommit/reset size = _mi_align_up(size, _mi_os_page_size()); - const size_t blocks = mi_region_block_count(size); - const size_t idx = mi_bitmap_index_field(bitmap_idx); - const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); - mi_assert_internal(idx < MI_REGION_MAX); if (idx >= MI_REGION_MAX) return; // or `abort`? - mem_region_t* region = ®ions[idx]; + const size_t blocks = mi_region_block_count(size); mi_region_info_t info = mi_atomic_read(®ion->info); bool is_large; bool is_eager_committed; void* start = mi_region_info_read(info,&is_large,&is_eager_committed); mi_assert_internal(start != NULL); - void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE); + void* blocks_start = (uint8_t*)start + (bit_idx * MI_SEGMENT_SIZE); mi_assert_internal(blocks_start == p); // not a pointer in our area? - mi_assert_internal(bitidx + blocks <= MI_BITMAP_FIELD_BITS); - if (blocks_start != p || bitidx + blocks > MI_BITMAP_FIELD_BITS) return; // or `abort`? + mi_assert_internal(bit_idx + blocks <= MI_BITMAP_FIELD_BITS); + if (blocks_start != p || bit_idx + blocks > MI_BITMAP_FIELD_BITS) return; // or `abort`? // decommit (or reset) the blocks to reduce the working set. // TODO: implement delayed decommit/reset as these calls are too expensive @@ -446,7 +386,7 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { // this frees up virtual address space which might be useful on 32-bit systems? // and unclaim - mi_bitmap_unclaim(regions_map, MI_REGION_MAX, blocks, bitmap_idx); + mi_bitmap_unclaim(®ion->in_use, 1, blocks, bit_idx); } } @@ -456,13 +396,15 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { -----------------------------------------------------------------------------*/ void _mi_mem_collect(mi_stats_t* stats) { // free every region that has no segments in use. - for (size_t i = 0; i < regions_count; i++) { - if (mi_atomic_read_relaxed(®ions_map[i]) == 0) { + uintptr_t rcount = mi_atomic_read_relaxed(®ions_count); + for (size_t i = 0; i < rcount; i++) { + mem_region_t* region = ®ions[i]; + if (mi_atomic_read_relaxed(®ion->info) != 0) { // if no segments used, try to claim the whole region uintptr_t m; do { - m = mi_atomic_read_relaxed(®ions_map[i]); - } while(m == 0 && !mi_atomic_cas_weak(®ions_map[i], MI_BITMAP_FIELD_FULL, 0 )); + m = mi_atomic_read_relaxed(®ion->in_use); + } while(m == 0 && !mi_atomic_cas_weak(®ion->in_use, MI_BITMAP_FIELD_FULL, 0 )); if (m == 0) { // on success, free the whole region bool is_eager_committed; @@ -471,9 +413,7 @@ void _mi_mem_collect(mi_stats_t* stats) { _mi_arena_free(start, MI_REGION_SIZE, regions[i].arena_memid, stats); } // and release - mi_atomic_write(®ions[i].info,0); - mi_atomic_write(®ions_dirty[i],0); - mi_atomic_write(®ions_map[i],0); + mi_atomic_write(®ion->info,0); } } } diff --git a/src/segment.c b/src/segment.c index 178e0eda..b2b37fac 100644 --- a/src/segment.c +++ b/src/segment.c @@ -370,7 +370,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, } segment->memid = memid; segment->mem_is_fixed = mem_large; - segment->mem_is_committed = commit; + segment->mem_is_committed = commit; mi_segments_track_size((long)segment_size, tld); } mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); From d2279b2a3faf7c2e084644449326306ef8d4f619 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sun, 10 Nov 2019 08:13:40 -0800 Subject: [PATCH 030/179] update test-stress with better object distribution --- test/test-stress.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/test/test-stress.c b/test/test-stress.c index be2a9c67..37572d42 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -17,8 +17,8 @@ terms of the MIT license. #include // argument defaults -static int THREADS = 32; // more repeatable if THREADS <= #processors -static int N = 40; // scaling factor +static int THREADS = 8; // more repeatable if THREADS <= #processors +static int N = 200; // scaling factor // static int THREADS = 8; // more repeatable if THREADS <= #processors // static int N = 100; // scaling factor @@ -63,7 +63,11 @@ static bool chance(size_t perc, random_t r) { } static void* alloc_items(size_t items, random_t r) { - if (chance(1, r)) items *= 100; // 1% huge objects; + if (chance(1, r)) { + if (chance(1, r)) items *= 1000; // 0.01% giant + else if (chance(10, r)) items *= 100; // 0.1% huge + else items *= 10; // 1% large objects; + } if (items==40) items++; // pthreads uses that size for stack increases uintptr_t* p = (uintptr_t*)mi_malloc(items*sizeof(uintptr_t)); if (p != NULL) { From 21bbb1be870c8b9bd6ca057257a4cbb0ec57e6e5 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 10 Nov 2019 12:36:55 -0800 Subject: [PATCH 031/179] fix warnings --- src/init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/init.c b/src/init.c index 7e704e7a..d5ec03c2 100644 --- a/src/init.c +++ b/src/init.c @@ -100,8 +100,8 @@ static mi_tld_t tld_main = { 0, false, &_mi_heap_main, { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats, tld_main_os }, // segments - { 0, tld_main_stats }, // os - { MI_STATS_NULL } // stats + { 0, tld_main_stats, {{0,NULL,0}} }, // os + { MI_STATS_NULL } // stats }; mi_heap_t _mi_heap_main = { From 83a066fd2d0d7484abf6372e41ac777c721c761a Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 11 Nov 2019 09:46:02 -0800 Subject: [PATCH 032/179] remove reset_decommits option --- include/mimalloc.h | 3 +-- src/memory.c | 28 ++++++++++++---------------- src/options.c | 7 +++---- src/os.c | 20 +++----------------- 4 files changed, 19 insertions(+), 39 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 70b6e412..4c542ee0 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -272,9 +272,8 @@ typedef enum mi_option_e { mi_option_segment_cache, mi_option_page_reset, mi_option_cache_reset, - mi_option_reset_decommits, - mi_option_eager_commit_delay, mi_option_segment_reset, + mi_option_eager_commit_delay, mi_option_os_tag, mi_option_max_numa_node, mi_option_max_errors, diff --git a/src/memory.c b/src/memory.c index a1f94e18..ceb9a702 100644 --- a/src/memory.c +++ b/src/memory.c @@ -350,12 +350,12 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { mi_assert_internal(size <= MI_REGION_MAX_OBJ_SIZE); if (size > MI_REGION_MAX_OBJ_SIZE) return; // we can align the size up to page size (as we allocate that way too) // this ensures we fully commit/decommit/reset - size = _mi_align_up(size, _mi_os_page_size()); - const size_t blocks = mi_region_block_count(size); + size = _mi_align_up(size, _mi_os_page_size()); + const size_t blocks = mi_region_block_count(size); mi_region_info_t info = mi_atomic_read(®ion->info); bool is_large; - bool is_eager_committed; - void* start = mi_region_info_read(info,&is_large,&is_eager_committed); + bool is_committed; + void* start = mi_region_info_read(info, &is_large, &is_committed); mi_assert_internal(start != NULL); void* blocks_start = (uint8_t*)start + (bit_idx * MI_SEGMENT_SIZE); mi_assert_internal(blocks_start == p); // not a pointer in our area? @@ -366,18 +366,14 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { // TODO: implement delayed decommit/reset as these calls are too expensive // if the memory is reused soon. // reset: 10x slowdown on malloc-large, decommit: 17x slowdown on malloc-large - if (!is_large) { - if (mi_option_is_enabled(mi_option_segment_reset)) { - if (!is_eager_committed && // cannot reset large pages - (mi_option_is_enabled(mi_option_eager_commit) || // cannot reset halfway committed segments, use `option_page_reset` instead - mi_option_is_enabled(mi_option_reset_decommits))) // but we can decommit halfway committed segments - { - _mi_os_reset(p, size, stats); - //_mi_os_decommit(p, size, stats); // todo: and clear dirty bits? - } - } - } - if (!is_eager_committed) { + if (!is_large && + mi_option_is_enabled(mi_option_segment_reset) && + mi_option_is_enabled(mi_option_eager_commit)) // cannot reset halfway committed segments, use `option_page_reset` instead + { + _mi_os_reset(p, size, stats); + //_mi_os_decommit(p, size, stats); // todo: and clear dirty bits? + } + if (!is_committed) { // adjust commit statistics as we commit again when re-using the same slot _mi_stat_decrease(&stats->committed, mi_good_commit_size(size)); } diff --git a/src/options.c b/src/options.c index 63b1612a..75a2736a 100644 --- a/src/options.c +++ b/src/options.c @@ -65,11 +65,10 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread - { 0, UNINIT, MI_OPTION(page_reset) }, + { 1, UNINIT, MI_OPTION(page_reset) }, { 0, UNINIT, MI_OPTION(cache_reset) }, - { 0, UNINIT, MI_OPTION(reset_decommits) }, // note: cannot enable this if secure is on - { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) + { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose { 256, UNINIT, MI_OPTION(max_numa_node) }, // maximum allowed numa node { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output @@ -88,7 +87,7 @@ void _mi_options_init(void) { mi_option_desc_t* desc = &options[option]; _mi_verbose_message("option '%s': %ld\n", desc->name, desc->value); } - } + } mi_max_error_count = mi_option_get(mi_option_max_errors); } diff --git a/src/os.c b/src/os.c index 027df6ab..5229381b 100644 --- a/src/os.c +++ b/src/os.c @@ -646,10 +646,6 @@ bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats) { return mi_os_commitx(addr, size, false, true /* conservative? */, &is_zero, stats); } -bool _mi_os_commit_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) { - return mi_os_commitx(addr, size, true, true /* conservative? */, is_zero, stats); -} - // Signal to the OS that the address range is no longer in use // but may be used later again. This will release physical memory @@ -708,22 +704,12 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) // pages and reduce swapping while keeping the memory committed. // We page align to a conservative area inside the range to reset. bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) { - if (mi_option_is_enabled(mi_option_reset_decommits)) { - return _mi_os_decommit(addr,size,stats); - } - else { - return mi_os_resetx(addr, size, true, stats); - } + return mi_os_resetx(addr, size, true, stats); } bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) { - if (mi_option_is_enabled(mi_option_reset_decommits)) { - return _mi_os_commit_unreset(addr, size, is_zero, stats); // re-commit it (conservatively!) - } - else { - *is_zero = false; - return mi_os_resetx(addr, size, false, stats); - } + *is_zero = false; + return mi_os_resetx(addr, size, false, stats); } From 93a646338343984b86b00b1c7852322eafa7190e Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 11 Nov 2019 14:16:45 -0800 Subject: [PATCH 033/179] only allow commit delay for small and medium objects --- src/options.c | 2 +- src/segment.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/options.c b/src/options.c index 75a2736a..dbb7df79 100644 --- a/src/options.c +++ b/src/options.c @@ -65,7 +65,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread - { 1, UNINIT, MI_OPTION(page_reset) }, + { 0, UNINIT, MI_OPTION(page_reset) }, { 0, UNINIT, MI_OPTION(cache_reset) }, { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed diff --git a/src/segment.c b/src/segment.c index b2b37fac..d089078c 100644 --- a/src/segment.c +++ b/src/segment.c @@ -328,9 +328,9 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t page_size = (page_kind == MI_PAGE_HUGE ? segment_size : (size_t)1 << page_shift); // Try to get it from our thread local cache first - bool eager_delay = (tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); - bool eager = !eager_delay && mi_option_is_enabled(mi_option_eager_commit); - bool commit = eager || (page_kind > MI_PAGE_MEDIUM); + bool eager_delayed = (page_kind <= MI_PAGE_MEDIUM && tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); + bool eager = !eager_delayed && mi_option_is_enabled(mi_option_eager_commit); + bool commit = eager || (page_kind >= MI_PAGE_LARGE); bool protection_still_good = false; bool is_zero = false; mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld); @@ -359,7 +359,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, else { // Allocate the segment from the OS size_t memid; - bool mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy + bool mem_large = (!eager_delayed && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_zero, &memid, os_tld); if (segment == NULL) return NULL; // failed to allocate if (!commit) { From 534e1e39ef29946e502fd0f668d2dc80ffd141da Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 11 Nov 2019 14:42:29 -0800 Subject: [PATCH 034/179] allow allocation in committed regions even if not requested --- src/memory.c | 6 ++---- src/options.c | 4 ++-- src/segment.c | 4 +++- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/memory.c b/src/memory.c index ceb9a702..24239e05 100644 --- a/src/memory.c +++ b/src/memory.c @@ -210,14 +210,12 @@ static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, boo if (rnode >= 0 && rnode != numa_node) return false; } - // note: we also skip if commit is false and the region is committed, - // that is a bit strong but prevents allocation of eager-delayed segments in an eagerly committed region + // check allow-large bool is_large; bool is_committed; mi_region_info_read(info, &is_large, &is_committed); - - if (!commit && is_committed) return false; if (!allow_large && is_large) return false; + return true; } diff --git a/src/options.c b/src/options.c index dbb7df79..694b916b 100644 --- a/src/options.c +++ b/src/options.c @@ -65,8 +65,8 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread - { 0, UNINIT, MI_OPTION(page_reset) }, - { 0, UNINIT, MI_OPTION(cache_reset) }, + { 0, UNINIT, MI_OPTION(page_reset) }, // reset pages on free + { 0, UNINIT, MI_OPTION(cache_reset) }, // reset segment cache on free { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose diff --git a/src/segment.c b/src/segment.c index d089078c..eb5a0390 100644 --- a/src/segment.c +++ b/src/segment.c @@ -327,12 +327,14 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, mi_assert_internal(segment_size >= required); size_t page_size = (page_kind == MI_PAGE_HUGE ? segment_size : (size_t)1 << page_shift); - // Try to get it from our thread local cache first + // Initialize parameters bool eager_delayed = (page_kind <= MI_PAGE_MEDIUM && tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); bool eager = !eager_delayed && mi_option_is_enabled(mi_option_eager_commit); bool commit = eager || (page_kind >= MI_PAGE_LARGE); bool protection_still_good = false; bool is_zero = false; + + // Try to get it from our thread local cache first mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld); if (segment != NULL) { if (MI_SECURE!=0) { From 2bb058bd25258c2e7a9fb2c1a64400ec780c2912 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 11 Nov 2019 14:44:32 -0800 Subject: [PATCH 035/179] remove cache_reset parameter --- include/mimalloc.h | 1 - src/options.c | 1 - src/segment.c | 6 +----- 3 files changed, 1 insertion(+), 7 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 4c542ee0..6df889a4 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -271,7 +271,6 @@ typedef enum mi_option_e { mi_option_reserve_huge_os_pages, mi_option_segment_cache, mi_option_page_reset, - mi_option_cache_reset, mi_option_segment_reset, mi_option_eager_commit_delay, mi_option_os_tag, diff --git a/src/options.c b/src/options.c index 694b916b..1231e1c9 100644 --- a/src/options.c +++ b/src/options.c @@ -66,7 +66,6 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread { 0, UNINIT, MI_OPTION(page_reset) }, // reset pages on free - { 0, UNINIT, MI_OPTION(cache_reset) }, // reset segment cache on free { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose diff --git a/src/segment.c b/src/segment.c index eb5a0390..ef24c660 100644 --- a/src/segment.c +++ b/src/segment.c @@ -280,9 +280,6 @@ static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld) return false; } mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE); - if (!segment->mem_is_fixed && mi_option_is_enabled(mi_option_cache_reset)) { - _mi_mem_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size, tld->stats); - } segment->next = tld->cache; tld->cache = segment; tld->cache_count++; @@ -351,8 +348,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, _mi_mem_commit(segment, segment->segment_size, &is_zero, tld->stats); segment->mem_is_committed = true; } - if (!segment->mem_is_fixed && - (mi_option_is_enabled(mi_option_cache_reset) || mi_option_is_enabled(mi_option_page_reset))) { + if (!segment->mem_is_fixed && mi_option_is_enabled(mi_option_page_reset)) { bool reset_zero = false; _mi_mem_unreset(segment, segment->segment_size, &reset_zero, tld->stats); if (reset_zero) is_zero = true; From db3f1c4bfadcb7007357fd61d7dc24369ae8fe31 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 11 Nov 2019 15:26:11 -0800 Subject: [PATCH 036/179] add commit info to arenas --- src/arena.c | 66 ++++++++++++++++++++++++++++++++++------------------- 1 file changed, 42 insertions(+), 24 deletions(-) diff --git a/src/arena.c b/src/arena.c index 1b6cf4a4..02890bd6 100644 --- a/src/arena.c +++ b/src/arena.c @@ -33,6 +33,7 @@ of 256MiB in practice. #include "bitmap.inc.c" // atomic bitmap + // os.c void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld); void _mi_os_free(void* p, size_t size, mi_stats_t* stats); @@ -40,6 +41,7 @@ void _mi_os_free(void* p, size_t size, mi_stats_t* stats); void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize); void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats); +bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); int _mi_os_numa_node_count(void); /* ----------------------------------------------------------- @@ -56,13 +58,15 @@ int _mi_os_numa_node_count(void); typedef struct mi_arena_s { uint8_t* start; // the start of the memory area size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) - size_t field_count; // number of bitmap fields + size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`) int numa_node; // associated NUMA node bool is_zero_init; // is the arena zero initialized? + bool is_committed; // is the memory committed bool is_large; // large OS page allocated volatile _Atomic(uintptr_t) search_idx; // optimization to start the search for free blocks mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? - mi_bitmap_field_t blocks_map[1]; // bitmap of in-use blocks + mi_bitmap_field_t* blocks_committed; // if `!is_committed`, are the blocks committed? + mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`) } mi_arena_t; @@ -104,7 +108,7 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* size_t idx = mi_atomic_read(&arena->search_idx); // start from last search for (size_t visited = 0; visited < fcount; visited++, idx++) { if (idx >= fcount) idx = 0; // wrap around - if (mi_bitmap_try_claim_field(arena->blocks_map, idx, blocks, bitmap_idx)) { + if (mi_bitmap_try_claim_field(arena->blocks_inuse, idx, blocks, bitmap_idx)) { mi_atomic_write(&arena->search_idx, idx); // start search from here next time return true; } @@ -118,31 +122,46 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* ----------------------------------------------------------- */ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, - bool* commit, bool* large, bool* is_zero, size_t* memid) + bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { mi_bitmap_index_t bitmap_index; - if (mi_arena_alloc(arena, needed_bcount, &bitmap_index)) { - // claimed it! set the dirty bits (todo: no need for an atomic op here?) - *is_zero = mi_bitmap_claim(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL); - *memid = mi_memid_create(arena_index, bitmap_index); - *commit = true; // TODO: support commit on demand? - *large = arena->is_large; - return (arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE)); + if (!mi_arena_alloc(arena, needed_bcount, &bitmap_index)) return NULL; + + // claimed it! set the dirty bits (todo: no need for an atomic op here?) + void* p = arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE); + *memid = mi_memid_create(arena_index, bitmap_index); + *is_zero = mi_bitmap_claim(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL); + *large = arena->is_large; + if (arena->is_committed) { + // always committed + *commit = true; } - return NULL; + else if (commit) { + // ensure commit now + bool any_zero; + mi_bitmap_claim(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_zero); + if (any_zero) { + bool commit_zero; + _mi_os_commit(p, needed_bcount * MI_ARENA_BLOCK_SIZE, &commit_zero, tld->stats); + if (commit_zero) *is_zero = true; + } + } + else { + // no need to commit, but check if already fully committed + *commit = mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index); + } + return p; } void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { - mi_assert_internal(memid != NULL && tld != NULL); + mi_assert_internal(commit != NULL && large != NULL && is_zero != NULL && memid != NULL && tld != NULL); mi_assert_internal(size > 0); *memid = MI_MEMID_OS; *is_zero = false; - bool default_large = false; - if (large==NULL) large = &default_large; // ensure `large != NULL` - + // try to allocate in an arena if the alignment is small enough // and the object is not too large or too small. if (alignment <= MI_SEGMENT_ALIGN && @@ -160,7 +179,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local? (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages { - void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid); + void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid, tld); mi_assert_internal((uintptr_t)p % alignment == 0); if (p != NULL) return p; } @@ -172,7 +191,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local! (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages { - void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid); + void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid, tld); mi_assert_internal((uintptr_t)p % alignment == 0); if (p != NULL) return p; } @@ -182,9 +201,6 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, // finally, fall back to the OS *is_zero = true; *memid = MI_MEMID_OS; - if (*large) { - *large = mi_option_is_enabled(mi_option_large_os_pages); // try large OS pages only if enabled and allowed - } return _mi_os_alloc_aligned(size, alignment, *commit, large, tld); } @@ -223,7 +239,7 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) { return; } const size_t blocks = mi_block_count_of_size(size); - bool ones = mi_bitmap_unclaim(arena->blocks_map, arena->field_count, blocks, bitmap_idx); + bool ones = mi_bitmap_unclaim(arena->blocks_inuse, arena->field_count, blocks, bitmap_idx); if (!ones) { _mi_fatal_error("trying to free an already freed block: %p, size %zu\n", p, size); return; @@ -283,15 +299,17 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1) arena->is_large = true; arena->is_zero_init = true; + arena->is_committed = true; arena->search_idx = 0; - arena->blocks_dirty = &arena->blocks_map[bcount]; + arena->blocks_dirty = &arena->blocks_inuse[bcount]; + arena->blocks_committed = NULL; // the bitmaps are already zero initialized due to os_alloc // just claim leftover blocks if needed size_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount; if (post > 0) { // don't use leftover bits at the end mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post); - mi_bitmap_claim(arena->blocks_map, fields, post, postidx, NULL); + mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL); } mi_arena_add(arena); From 5e6754f3f7905485ca74546ab082f4c3bc5404fd Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 11 Nov 2019 15:45:31 -0800 Subject: [PATCH 037/179] track commit status per block in a region --- src/memory.c | 49 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/src/memory.c b/src/memory.c index 208b9b7e..8299bbc2 100644 --- a/src/memory.c +++ b/src/memory.c @@ -59,7 +59,7 @@ static bool mi_delay_remove(mi_delay_slot_t* slots, size_t count, void* p, size_ // Constants #if (MI_INTPTR_SIZE==8) -#define MI_HEAP_REGION_MAX_SIZE (256 * GiB) // 40KiB for the region map +#define MI_HEAP_REGION_MAX_SIZE (256 * GiB) // 48KiB for the region map #elif (MI_INTPTR_SIZE==4) #define MI_HEAP_REGION_MAX_SIZE (3 * GiB) // ~ KiB for the region map #else @@ -94,8 +94,9 @@ static inline void* mi_region_info_read(mi_region_info_t info, bool* is_large, b typedef struct mem_region_s { volatile _Atomic(mi_region_info_t) info; // start of the memory area (and flags) volatile _Atomic(uintptr_t) numa_node; // associated numa node + 1 (so 0 is no association) - mi_bitmap_field_t in_use; - mi_bitmap_field_t dirty; + mi_bitmap_field_t in_use; // bit per in-use block + mi_bitmap_field_t dirty; // track if non-zero per block + mi_bitmap_field_t commit; // track if committed per block (if `!info.is_committed)) size_t arena_memid; // if allocated from a (huge page) arena } mem_region_t; @@ -165,20 +166,20 @@ static bool mi_memid_indices(size_t id, mem_region_t** region, mi_bitmap_index_t Allocate a region is allocated from the OS (or an arena) -----------------------------------------------------------------------------*/ -static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) +static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) { // not out of regions yet? if (mi_atomic_read_relaxed(®ions_count) >= MI_REGION_MAX - 1) return false; // try to allocate a fresh region from the OS bool region_commit = (commit && mi_option_is_enabled(mi_option_eager_region_commit)); - bool region_large = (commit && allow_large); - bool is_zero = false; + bool region_large = (commit && allow_large); + bool is_zero = false; size_t arena_memid = 0; void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, ®ion_commit, ®ion_large, &is_zero, &arena_memid, tld); if (start == NULL) return false; mi_assert_internal(!(region_large && !allow_large)); - + // claim a fresh slot const uintptr_t idx = mi_atomic_increment(®ions_count); if (idx >= MI_REGION_MAX) { @@ -191,8 +192,13 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t* r = ®ions[idx]; r->numa_node = _mi_os_numa_node(tld) + 1; r->arena_memid = arena_memid; + mi_atomic_write(&r->in_use, 0); + mi_atomic_write(&r->dirty, (is_zero ? 0 : ~0UL)); + mi_atomic_write(&r->commit, (region_commit ? ~0UL : 0)); *bit_idx = 0; mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL); + + // and share it mi_atomic_write(&r->info, mi_region_info_create(start, region_large, region_commit)); // now make it available to others *region = r; return true; @@ -269,20 +275,28 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo mi_assert_internal(!(region_is_large && !*is_large)); mi_assert_internal(start != NULL); - bool any_zero = false; - *is_zero = mi_bitmap_claim(®ion->dirty, 1, blocks, bit_idx, &any_zero); - if (!mi_option_is_enabled(mi_option_eager_commit)) any_zero = true; // if no eager commit, even dirty segments may be partially committed + *is_zero = mi_bitmap_claim(®ion->dirty, 1, blocks, bit_idx, NULL); *is_large = region_is_large; *memid = mi_memid_create(region, bit_idx); void* p = (uint8_t*)start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE); - if (*commit && !region_is_committed && any_zero) { // want to commit, but not yet fully committed? - // ensure commit - _mi_os_commit(p, blocks * MI_SEGMENT_SIZE, is_zero, tld->stats); + if (region_is_committed) { + // always committed + *commit = true; + } + else if (*commit) { + // ensure commit + bool any_zero; + mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, &any_zero); + if (any_zero) { + bool commit_zero; + _mi_mem_commit(p, blocks * MI_SEGMENT_SIZE, &commit_zero, tld); + if (commit_zero) *is_zero = true; + } } else { - *commit = region_is_committed || !any_zero; - } - + // no need to commit, but check if already fully committed + *commit = mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx); + } // and return the allocation mi_assert_internal(p != NULL); @@ -374,7 +388,8 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld) { mi_option_is_enabled(mi_option_segment_reset) && mi_option_is_enabled(mi_option_eager_commit)) // cannot reset halfway committed segments, use `option_page_reset` instead { - _mi_os_reset(p, size, tld->stats); + // note: don't use `_mi_mem_reset` as it is shared with other threads! + _mi_os_reset(p, size, tld->stats); // TODO: maintain reset bits to unreset } if (!is_committed) { // adjust commit statistics as we commit again when re-using the same slot From a0958b2da696a308f8c200f45f08bf1ab3e5f14b Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 11 Nov 2019 17:06:16 -0800 Subject: [PATCH 038/179] enable more reset delay slots --- include/mimalloc-types.h | 13 ++++++-- src/init.c | 9 ++++-- src/memory.c | 70 ++++++++++++++++++++++++++-------------- src/options.c | 2 +- src/segment.c | 4 ++- 5 files changed, 66 insertions(+), 32 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 2651fc85..0ce91339 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -390,13 +390,20 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount); // ------------------------------------------------------ typedef int64_t mi_msecs_t; +#define MI_RESET_DELAY_SLOTS (256) + typedef struct mi_delay_slot_s { mi_msecs_t expire; uint8_t* addr; size_t size; } mi_delay_slot_t; -#define MI_RESET_DELAY_SLOTS (128) +typedef struct mi_delay_slots_s { + size_t capacity; // always `MI_RESET_DELAY_SLOTS` + size_t count; // current slots used (`<= capacity`) + mi_delay_slot_t slots[MI_RESET_DELAY_SLOTS]; +} mi_delay_slots_t; + // ------------------------------------------------------ // Thread Local data @@ -411,8 +418,8 @@ typedef struct mi_segment_queue_s { // OS thread local data typedef struct mi_os_tld_s { size_t region_idx; // start point for next allocation - mi_stats_t* stats; // points to tld stats - mi_delay_slot_t reset_delay[MI_RESET_DELAY_SLOTS]; + mi_delay_slots_t* reset_delay; // delay slots for OS reset operations + mi_stats_t* stats; // points to tld stats } mi_os_tld_t; // Segments thread local data diff --git a/src/init.c b/src/init.c index d5ec03c2..c9700cd5 100644 --- a/src/init.c +++ b/src/init.c @@ -100,8 +100,8 @@ static mi_tld_t tld_main = { 0, false, &_mi_heap_main, { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats, tld_main_os }, // segments - { 0, tld_main_stats, {{0,NULL,0}} }, // os - { MI_STATS_NULL } // stats + { 0, NULL, tld_main_stats }, // os + { MI_STATS_NULL } // stats }; mi_heap_t _mi_heap_main = { @@ -192,6 +192,7 @@ uintptr_t _mi_random_init(uintptr_t seed /* can be zero */) { typedef struct mi_thread_data_s { mi_heap_t heap; // must come first due to cast in `_mi_heap_done` mi_tld_t tld; + mi_delay_slots_t reset_delay; } mi_thread_data_t; // Initialize the thread local default heap, called from `mi_thread_init` @@ -211,6 +212,7 @@ static bool _mi_heap_init(void) { } mi_tld_t* tld = &td->tld; mi_heap_t* heap = &td->heap; + mi_delay_slots_t* reset_delay = &td->reset_delay; memcpy(heap, &_mi_heap_empty, sizeof(*heap)); heap->thread_id = _mi_thread_id(); heap->random = _mi_random_init(heap->thread_id); @@ -221,6 +223,9 @@ static bool _mi_heap_init(void) { tld->segments.stats = &tld->stats; tld->segments.os = &tld->os; tld->os.stats = &tld->stats; + tld->os.reset_delay = reset_delay; + memset(reset_delay, 0, sizeof(*reset_delay)); + reset_delay->capacity = MI_RESET_DELAY_SLOTS; _mi_heap_default = heap; } return false; diff --git a/src/memory.c b/src/memory.c index 8299bbc2..f3052d6b 100644 --- a/src/memory.c +++ b/src/memory.c @@ -54,7 +54,7 @@ void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, s void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); // local -static bool mi_delay_remove(mi_delay_slot_t* slots, size_t count, void* p, size_t size); +static bool mi_delay_remove(mi_delay_slots_t* delay_slots, void* p, size_t size); // Constants @@ -208,7 +208,7 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, Try to claim blocks in suitable regions -----------------------------------------------------------------------------*/ -static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool commit, bool allow_large ) { +static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool allow_large ) { // initialized at all? mi_region_info_t info = mi_atomic_read_relaxed(®ion->info); if (info==0) return false; @@ -229,7 +229,7 @@ static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, boo } -static bool mi_region_try_claim(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) +static bool mi_region_try_claim(size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) { // try all regions for a free slot const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld)); @@ -238,7 +238,7 @@ static bool mi_region_try_claim(size_t blocks, bool commit, bool allow_large, me for (size_t visited = 0; visited < count; visited++, idx++) { if (idx >= count) idx = 0; // wrap around mem_region_t* r = ®ions[idx]; - if (mi_region_is_suitable(r, numa_node, commit, allow_large)) { + if (mi_region_is_suitable(r, numa_node, allow_large)) { if (mi_bitmap_try_claim_field(&r->in_use, 0, blocks, bit_idx)) { tld->region_idx = idx; // remember the last found position *region = r; @@ -256,7 +256,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo mem_region_t* region; mi_bitmap_index_t bit_idx; // first try to claim in existing regions - if (!mi_region_try_claim(blocks, *commit, *is_large, ®ion, &bit_idx, tld)) { + if (!mi_region_try_claim(blocks, *is_large, ®ion, &bit_idx, tld)) { // otherwise try to allocate a fresh region if (!mi_region_try_alloc_os(blocks, *commit, *is_large, ®ion, &bit_idx, tld)) { // out of regions or memory @@ -354,7 +354,7 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld) { if (p==NULL) return; if (size==0) return; - mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size); + mi_delay_remove(tld->reset_delay, p, size); size_t arena_memid = 0; mi_bitmap_index_t bit_idx; @@ -424,7 +424,7 @@ void _mi_mem_collect(mi_os_tld_t* tld) { bool is_eager_committed; void* start = mi_region_info_read(mi_atomic_read(®ions[i].info), NULL, &is_eager_committed); if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { - mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, start, MI_REGION_SIZE); + mi_delay_remove(tld->reset_delay, start, MI_REGION_SIZE); _mi_arena_free(start, MI_REGION_SIZE, region->arena_memid, tld->stats); } // and release @@ -440,21 +440,22 @@ void _mi_mem_collect(mi_os_tld_t* tld) { typedef void (mi_delay_resolve_fun)(void* addr, size_t size, void* arg); -static void mi_delay_insert(mi_delay_slot_t* slots, size_t count, +static void mi_delay_insert(mi_delay_slots_t* ds, mi_msecs_t delay, uint8_t* addr, size_t size, mi_delay_resolve_fun* resolve, void* arg) { - if (delay==0) { + if (ds == NULL || delay==0 || addr==NULL || size==0) { resolve(addr, size, arg); return; } mi_msecs_t now = _mi_clock_now(); - mi_delay_slot_t* oldest = slots; + mi_delay_slot_t* oldest = &ds->slots[0]; // walk through all slots, resolving expired ones. // remember the oldest slot to insert the new entry in. - for (size_t i = 0; i < count; i++) { - mi_delay_slot_t* slot = &slots[i]; + size_t newcount = 0; + for (size_t i = 0; i < ds->count; i++) { + mi_delay_slot_t* slot = &ds->slots[i]; if (slot->expire == 0) { // empty slot @@ -480,26 +481,40 @@ static void mi_delay_insert(mi_delay_slot_t* slots, size_t count, } else if (oldest->expire > slot->expire) { oldest = slot; + newcount = i+1; + } + else { + newcount = i+1; } } + ds->count = newcount; if (delay>0) { - // not yet registered, use the oldest slot - if (oldest->expire > 0) { + // not yet registered, use the oldest slot (or a new one if there is space) + if (ds->count < ds->capacity) { + oldest = &ds->slots[ds->count]; + ds->count++; + } + else if (oldest->expire > 0) { resolve(oldest->addr, oldest->size, arg); // evict if not empty } + mi_assert_internal((oldest - ds->slots) < (ptrdiff_t)ds->count); oldest->expire = now + delay; oldest->addr = addr; oldest->size = size; } } -static bool mi_delay_remove(mi_delay_slot_t* slots, size_t count, void* p, size_t size) +static bool mi_delay_remove(mi_delay_slots_t* ds, void* p, size_t size) { + if (ds == NULL || p==NULL || size==0) return false; + uint8_t* addr = (uint8_t*)p; bool done = false; - // walk through all slots - for (size_t i = 0; i < count; i++) { - mi_delay_slot_t* slot = &slots[i]; + size_t newcount = 0; + + // walk through all valid slots + for (size_t i = 0; i < ds->count; i++) { + mi_delay_slot_t* slot = &ds->slots[i]; if (slot->addr <= addr && slot->addr + slot->size >= addr + size) { // earlier slot encompasses the area; remove it slot->expire = 0; @@ -510,12 +525,17 @@ static bool mi_delay_remove(mi_delay_slot_t* slots, size_t count, void* p, size_ slot->expire = 0; } else if ((addr <= slot->addr && addr + size > slot->addr) || - (addr < slot->addr + slot->size && addr + size >= slot->addr + slot->size)) { - // partial overlap, remove slot - mi_assert_internal(false); + (addr < slot->addr + slot->size && addr + size >= slot->addr + slot->size)) { + // partial overlap + // can happen with a large object spanning onto some partial end block + // mi_assert_internal(false); slot->expire = 0; } + else { + newcount = i + 1; + } } + ds->count = newcount; return done; } @@ -525,13 +545,13 @@ static void mi_resolve_reset(void* p, size_t size, void* vtld) { } bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) { - mi_delay_insert(tld->reset_delay, MI_RESET_DELAY_SLOTS, mi_option_get(mi_option_reset_delay), + mi_delay_insert(tld->reset_delay, mi_option_get(mi_option_reset_delay), (uint8_t*)p, size, &mi_resolve_reset, tld); return true; } bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { - if (!mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, (uint8_t*)p, size)) { + if (!mi_delay_remove(tld->reset_delay, (uint8_t*)p, size)) { return _mi_os_unreset(p, size, is_zero, tld->stats); } return true; @@ -544,12 +564,12 @@ bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { -----------------------------------------------------------------------------*/ bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { - mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size); + mi_delay_remove(tld->reset_delay,p, size); return _mi_os_commit(p, size, is_zero, tld->stats); } bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld) { - mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size); + mi_delay_remove(tld->reset_delay, p, size); return _mi_os_decommit(p, size, tld->stats); } diff --git a/src/options.c b/src/options.c index 81ffe88b..ff96c95b 100644 --- a/src/options.c +++ b/src/options.c @@ -65,7 +65,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread - { 0, UNINIT, MI_OPTION(page_reset) }, // reset pages on free + { 1, UNINIT, MI_OPTION(page_reset) }, // reset pages on free { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds diff --git a/src/segment.c b/src/segment.c index 49dab6ba..549dd339 100644 --- a/src/segment.c +++ b/src/segment.c @@ -504,7 +504,9 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg _mi_stat_decrease(&tld->stats->pages, 1); // reset the page memory to reduce memory pressure? - if (!segment->mem_is_fixed && !page->is_reset && mi_option_is_enabled(mi_option_page_reset)) { + if (!segment->mem_is_fixed && !page->is_reset && mi_option_is_enabled(mi_option_page_reset)) + // && segment->page_kind <= MI_PAGE_MEDIUM) // to prevent partial overlapping resets + { size_t psize; uint8_t* start = _mi_page_start(segment, page, &psize); page->is_reset = true; From 165ee4584597aebdb1a45fcd4e8b3904b6f7d396 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 11 Nov 2019 17:31:48 -0800 Subject: [PATCH 039/179] initialize delay slots for the main thread --- src/init.c | 4 +++- src/options.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/init.c b/src/init.c index c9700cd5..5967b4b9 100644 --- a/src/init.c +++ b/src/init.c @@ -96,11 +96,13 @@ mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; #define tld_main_stats ((mi_stats_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,stats))) #define tld_main_os ((mi_os_tld_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,os))) +static mi_delay_slots_t tld_reset_delay_main = { MI_RESET_DELAY_SLOTS, 0, { {0,NULL,0} } }; + static mi_tld_t tld_main = { 0, false, &_mi_heap_main, { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats, tld_main_os }, // segments - { 0, NULL, tld_main_stats }, // os + { 0, &tld_reset_delay_main, tld_main_stats }, // os { MI_STATS_NULL } // stats }; diff --git a/src/options.c b/src/options.c index ff96c95b..81ffe88b 100644 --- a/src/options.c +++ b/src/options.c @@ -65,7 +65,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread - { 1, UNINIT, MI_OPTION(page_reset) }, // reset pages on free + { 0, UNINIT, MI_OPTION(page_reset) }, // reset pages on free { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds From ef179a63770d8e17f105303a08ddfdd57085b936 Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 12 Nov 2019 10:16:59 -0800 Subject: [PATCH 040/179] avoid allocation at numa node detection on linux --- include/mimalloc-internal.h | 37 +++++++++++++++------ src/os.c | 65 +++++++++++++++++-------------------- 2 files changed, 56 insertions(+), 46 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 6bfabe27..668a7bd3 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -17,18 +17,18 @@ terms of the MIT license. A copy of the license can be found in the file #if (MI_DEBUG>0) #define mi_trace_message(...) _mi_trace_message(__VA_ARGS__) #else -#define mi_trace_message(...) +#define mi_trace_message(...) #endif #if defined(_MSC_VER) #define mi_decl_noinline __declspec(noinline) -#define mi_attr_noreturn +#define mi_attr_noreturn #elif defined(__GNUC__) || defined(__clang__) #define mi_decl_noinline __attribute__((noinline)) #define mi_attr_noreturn __attribute__((noreturn)) #else #define mi_decl_noinline -#define mi_attr_noreturn +#define mi_attr_noreturn #endif @@ -56,8 +56,6 @@ void _mi_os_init(void); // called fro void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocate thread local data void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data size_t _mi_os_good_alloc_size(size_t size); -int _mi_os_numa_node(mi_os_tld_t* tld); -int _mi_os_numa_node_count(void); // memory.c void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* id, mi_os_tld_t* tld); @@ -146,8 +144,8 @@ bool _mi_page_is_valid(mi_page_t* page); Inlined definitions ----------------------------------------------------------- */ #define UNUSED(x) (void)(x) -#if (MI_DEBUG>0) -#define UNUSED_RELEASE(x) +#if (MI_DEBUG>0) +#define UNUSED_RELEASE(x) #else #define UNUSED_RELEASE(x) UNUSED(x) #endif @@ -398,7 +396,7 @@ static inline mi_block_t* mi_block_nextx( uintptr_t cookie, const mi_block_t* bl #endif } -static inline void mi_block_set_nextx(uintptr_t cookie, mi_block_t* block, const mi_block_t* next) { +static inline void mi_block_set_nextx(uintptr_t cookie, mi_block_t* block, const mi_block_t* next) { #ifdef MI_ENCODE_FREELIST block->next = (mi_encoded_t)next ^ cookie; #else @@ -411,12 +409,12 @@ static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* #ifdef MI_ENCODE_FREELIST mi_block_t* next = mi_block_nextx(page->cookie,block); // check for free list corruption: is `next` at least in our segment range? - // TODO: it is better to check if it is actually inside our page but that is more expensive + // TODO: it is better to check if it is actually inside our page but that is more expensive // to calculate. Perhaps with a relative free list this becomes feasible? if (next!=NULL && !mi_is_in_same_segment(block, next)) { _mi_fatal_error("corrupted free list entry of size %zub at %p: value 0x%zx\n", page->block_size, block, (uintptr_t)next); next = NULL; - } + } return next; #else UNUSED(page); @@ -433,6 +431,25 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c #endif } + +// ------------------------------------------------------------------- +// Optimize numa node access for the common case (= one node) +// ------------------------------------------------------------------- + +int _mi_os_numa_node_get(mi_os_tld_t* tld); +int _mi_os_numa_node_count_get(void); + +extern int _mi_numa_node_count; +static inline int _mi_os_numa_node(mi_os_tld_t* tld) { + if (mi_likely(_mi_numa_node_count == 1)) return 0; + else return _mi_os_numa_node_get(tld); +} +static inline int _mi_os_numa_node_count(void) { + if (mi_likely(_mi_numa_node_count>0)) return _mi_numa_node_count; + else return _mi_os_numa_node_count_get(); +} + + // ------------------------------------------------------------------- // Getting the thread id should be performant // as it is called in the fast path of `_mi_free`, diff --git a/src/os.c b/src/os.c index 5229381b..d6878927 100644 --- a/src/os.c +++ b/src/os.c @@ -786,9 +786,9 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE; mi_win_enable_large_os_pages(); - + #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) - MEM_EXTENDED_PARAMETER params[3] = { {0,0},{0,0},{0,0} }; + MEM_EXTENDED_PARAMETER params[3] = { {0,0},{0,0},{0,0} }; // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages static bool mi_huge_pages_available = true; if (pNtAllocateVirtualMemoryEx != NULL && mi_huge_pages_available) { @@ -818,7 +818,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) // on modern Windows try use VirtualAlloc2 for numa aware large OS page allocation if (pVirtualAlloc2 != NULL && numa_node >= 0) { params[0].Type = MemExtendedParameterNumaNode; - params[0].ULong = (unsigned)numa_node; + params[0].ULong = (unsigned)numa_node; return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1); } #endif @@ -838,7 +838,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) #ifdef MI_HAS_NUMA if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes uintptr_t numa_mask = (1UL << numa_node); - // TODO: does `mbind` work correctly for huge OS pages? should we + // TODO: does `mbind` work correctly for huge OS pages? should we // use `set_mempolicy` before calling mmap instead? // see: long err = mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); @@ -857,7 +857,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) } #endif -#if (MI_INTPTR_SIZE >= 8) +#if (MI_INTPTR_SIZE >= 8) // To ensure proper alignment, use our own area for huge OS pages static _Atomic(uintptr_t) mi_huge_start; // = 0 @@ -900,7 +900,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse size_t size = 0; uint8_t* start = mi_os_claim_huge_pages(pages, &size); if (start == NULL) return NULL; // or 32-bit systems - + // Allocate one page at the time but try to place them contiguously // We allocate one page at the time to be able to abort if it takes too long // or to at least allocate as many as available on the system. @@ -920,11 +920,11 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse } break; } - + // success, record it _mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE); _mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE); - + // check for timeout if (max_msecs > 0) { mi_msecs_t elapsed = _mi_clock_end(start_t); @@ -958,7 +958,7 @@ void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) { } /* ---------------------------------------------------------------------------- -Support NUMA aware allocation +Support NUMA aware allocation -----------------------------------------------------------------------------*/ #ifdef WIN32 static int mi_os_numa_nodex() { @@ -975,9 +975,8 @@ static int mi_os_numa_node_countx(void) { return (int)(numa_max + 1); } #elif defined(__linux__) -#include -#include -#include +#include // getcpu +#include // access static int mi_os_numa_nodex(void) { #ifdef SYS_getcpu @@ -990,22 +989,15 @@ static int mi_os_numa_nodex(void) { return 0; #endif } - static int mi_os_numa_node_countx(void) { - DIR* d = opendir("/sys/devices/system/node"); - if (d==NULL) return 1; - - struct dirent* de; - int max_node_num = 0; - while ((de = readdir(d)) != NULL) { - int node_num; - if (strncmp(de->d_name, "node", 4) == 0) { - node_num = (int)strtol(de->d_name+4, NULL, 0); - if (max_node_num < node_num) max_node_num = node_num; - } + char buf[128]; + int max_node = mi_option_get(mi_option_max_numa_node); + int node = 0; + for(node = 0; node < max_node; node++) { + snprintf(buf, 127, "/sys/devices/system/node/node%i", node + 1); + if (access(buf,R_OK) != 0) break; } - closedir(d); - return (max_node_num + 1); + return (node+1); } #else static int mi_os_numa_nodex(void) { @@ -1016,29 +1008,30 @@ static int mi_os_numa_node_countx(void) { } #endif -int _mi_os_numa_node_count(void) { - static int numa_node_count = 0; // cache the node count - if (mi_unlikely(numa_node_count <= 0)) { - int ncount = mi_os_numa_node_countx(); +int _mi_numa_node_count = 0; // cache the node count + +int _mi_os_numa_node_count_get(void) { + if (mi_unlikely(_mi_numa_node_count <= 0)) { + int ncount = mi_os_numa_node_countx(); int ncount0 = ncount; // never more than max numa node and at least 1 int nmax = 1 + (int)mi_option_get(mi_option_max_numa_node); if (ncount > nmax) ncount = nmax; if (ncount <= 0) ncount = 1; - numa_node_count = ncount; - _mi_verbose_message("using %i numa regions (%i nodes detected)\n", numa_node_count, ncount0); + _mi_numa_node_count = ncount; + _mi_verbose_message("using %i numa regions (%i nodes detected)\n", _mi_numa_node_count, ncount0); } - mi_assert_internal(numa_node_count >= 1); - return numa_node_count; + mi_assert_internal(_mi_numa_node_count >= 1); + return _mi_numa_node_count; } -int _mi_os_numa_node(mi_os_tld_t* tld) { +int _mi_os_numa_node_get(mi_os_tld_t* tld) { UNUSED(tld); int numa_count = _mi_os_numa_node_count(); if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0 // never more than the node count and >= 0 int numa_node = mi_os_numa_nodex(); if (numa_node >= numa_count) { numa_node = numa_node % numa_count; } - if (numa_node < 0) numa_node = 0; + if (numa_node < 0) numa_node = 0; return numa_node; } From af746ca4c1682e29dd42e8c0e6fa6db6aa04b200 Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 12 Nov 2019 10:17:39 -0800 Subject: [PATCH 041/179] inline bitmap_mask --- src/bitmap.inc.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/bitmap.inc.c b/src/bitmap.inc.c index 3847e712..81f87a79 100644 --- a/src/bitmap.inc.c +++ b/src/bitmap.inc.c @@ -8,11 +8,11 @@ terms of the MIT license. A copy of the license can be found in the file /* ---------------------------------------------------------------------------- This file is meant to be included in other files for efficiency. It implements a bitmap that can set/reset sequences of bits atomically -and is used to concurrently claim memory ranges. +and is used to concurrently claim memory ranges. A bitmap is an array of fields where each field is a machine word (`uintptr_t`) -A current limitation is that the bit sequences cannot cross fields +A current limitation is that the bit sequences cannot cross fields and that the sequence must be smaller or equal to the bits in a field. ---------------------------------------------------------------------------- */ #pragma once @@ -59,7 +59,7 @@ static inline size_t mi_bitmap_index_bit(mi_bitmap_index_t bitmap_idx) { // The bit mask for a given number of blocks at a specified bit index. -static uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) { +static inline uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) { mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS); if (count == MI_BITMAP_FIELD_BITS) return MI_BITMAP_FIELD_FULL; return ((((uintptr_t)1 << count) - 1) << bitidx); @@ -104,10 +104,10 @@ static inline size_t mi_bsr(uintptr_t x) { Claim a bit sequence atomically ----------------------------------------------------------- */ -// Try to atomically claim a sequence of `count` bits in a single +// Try to atomically claim a sequence of `count` bits in a single // field at `idx` in `bitmap`. Returns `true` on success. -static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx) -{ +static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx) +{ mi_assert_internal(bitmap_idx != NULL); volatile _Atomic(uintptr_t)* field = &bitmap[idx]; uintptr_t map = mi_atomic_read(field); @@ -136,7 +136,7 @@ static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, con continue; } else { - // success, we claimed the bits! + // success, we claimed the bits! *bitmap_idx = mi_bitmap_index_create(idx, bitidx); return true; } @@ -205,4 +205,4 @@ static inline bool mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields return ((mi_atomic_read(&bitmap[idx]) & mask) == mask); } -#endif \ No newline at end of file +#endif From 867d78f877474c7f36fd19bc2ea62918f117f068 Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 12 Nov 2019 10:19:52 -0800 Subject: [PATCH 042/179] reserve huge OS pages earlier on at process_init --- src/init.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/init.c b/src/init.c index 5967b4b9..473e9a32 100644 --- a/src/init.c +++ b/src/init.c @@ -19,7 +19,7 @@ const mi_page_t _mi_page_empty = { 0, #endif 0, // used - NULL, + NULL, ATOMIC_VAR_INIT(0), ATOMIC_VAR_INIT(0), 0, NULL, NULL, NULL #if (MI_INTPTR_SIZE==8 && defined(MI_ENCODE_FREELIST)) || (MI_INTPTR_SIZE==4 && !defined(MI_ENCODE_FREELIST)) @@ -246,7 +246,7 @@ static bool _mi_heap_done(void) { // switch to backing heap and free it heap = heap->tld->heap_backing; if (!mi_heap_is_initialized(heap)) return false; - + // collect if not the main thread if (heap != &_mi_heap_main) { _mi_heap_collect_abandon(heap); @@ -394,7 +394,7 @@ bool mi_is_redirected() mi_attr_noexcept { } // Communicate with the redirection module on Windows -#if defined(_WIN32) && defined(MI_SHARED_LIB) +#if defined(_WIN32) && defined(MI_SHARED_LIB) #ifdef __cplusplus extern "C" { #endif @@ -440,11 +440,6 @@ static void mi_process_load(void) { if (msg != NULL && (mi_option_is_enabled(mi_option_verbose) || mi_option_is_enabled(mi_option_show_errors))) { _mi_fputs(NULL,NULL,msg); } - - if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { - size_t pages = mi_option_get(mi_option_reserve_huge_os_pages); - mi_reserve_huge_os_pages_interleave(pages, pages*500); - } } // Initialize the process; called by thread_init or the process loader @@ -471,6 +466,11 @@ void mi_process_init(void) mi_attr_noexcept { #endif mi_thread_init(); mi_stats_reset(); // only call stat reset *after* thread init (or the heap tld == NULL) + + if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { + size_t pages = mi_option_get(mi_option_reserve_huge_os_pages); + mi_reserve_huge_os_pages_interleave(pages, pages*500); + } } // Called when the process is done (through `at_exit`) @@ -497,7 +497,7 @@ static void mi_process_done(void) { #if defined(_WIN32) && defined(MI_SHARED_LIB) - // Windows DLL: easy to hook into process_init and thread_done + // Windows DLL: easy to hook into process_init and thread_done __declspec(dllexport) BOOL WINAPI DllMain(HINSTANCE inst, DWORD reason, LPVOID reserved) { UNUSED(reserved); UNUSED(inst); From d4f54dcf3049bd958ee262cbd9b3b0c7134d59ed Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 12 Nov 2019 10:37:15 -0800 Subject: [PATCH 043/179] remove numaif dependency on linux --- CMakeLists.txt | 11 ----------- src/os.c | 21 ++++++++++++++------- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 18bdea5a..a2258128 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,7 +15,6 @@ option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanis option(MI_BUILD_TESTS "Build test executables" ON) include("cmake/mimalloc-config-version.cmake") -include("CheckIncludeFile") set(mi_install_dir "lib/mimalloc-${mi_version}") @@ -98,16 +97,6 @@ if(MI_USE_CXX MATCHES "ON") set_source_files_properties(src/static.c test/test-api.c PROPERTIES LANGUAGE CXX ) endif() -CHECK_INCLUDE_FILE("numaif.h" MI_HAVE_NUMA_H) -if(MI_HAVE_NUMA_H) - list(APPEND mi_defines MI_HAS_NUMA) - list(APPEND mi_libraries numa) -else() - if (NOT(WIN32)) - message(WARNING "Compiling without using NUMA optimized allocation (on Linux, install libnuma-dev?)") - endif() -endif() - # Compiler flags if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU") list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas) diff --git a/src/os.c b/src/os.c index d6878927..7af7363b 100644 --- a/src/os.c +++ b/src/os.c @@ -827,28 +827,35 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) } #elif defined(MI_OS_USE_MMAP) && (MI_INTPTR_SIZE >= 8) -#ifdef MI_HAS_NUMA -#include // mbind, and use -lnuma +#include +#ifndef MPOL_PREFERRED +#define MPOL_PREFERRED 1 +#endif +#if defined(SYS_mbind) +static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) { + return syscall(SYS_mbind, start, len, mode, nmask, maxnode, flags); +} +#else +static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) { + UNUSED(start); UNUSED(len); UNUSED(mode); UNUSED(nmask); UNUSED(maxnode); UNUSED(flags); + return 0; +} #endif static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) { mi_assert_internal(size%GiB == 0); bool is_large = true; void* p = mi_unix_mmap(addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); if (p == NULL) return NULL; - #ifdef MI_HAS_NUMA if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes uintptr_t numa_mask = (1UL << numa_node); // TODO: does `mbind` work correctly for huge OS pages? should we // use `set_mempolicy` before calling mmap instead? // see: - long err = mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); + long err = mi_os_mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); if (err != 0) { _mi_warning_message("failed to bind huge (1GiB) pages to NUMA node %d: %s\n", numa_node, strerror(errno)); } } - #else - UNUSED(numa_node); - #endif return p; } #else From bdb82748191ac5dbc436f0f62dcbebfd3df95157 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Tue, 12 Nov 2019 12:04:43 -0800 Subject: [PATCH 044/179] change max_numa_node to max_numa_nodes option --- include/mimalloc.h | 2 +- src/options.c | 2 +- src/os.c | 7 ++++--- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 67b17c73..8d029135 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -275,7 +275,7 @@ typedef enum mi_option_e { mi_option_eager_commit_delay, mi_option_reset_delay, mi_option_os_tag, - mi_option_max_numa_node, + mi_option_max_numa_nodes, mi_option_max_errors, _mi_option_last } mi_option_t; diff --git a/src/options.c b/src/options.c index 81ffe88b..bbea4e67 100644 --- a/src/options.c +++ b/src/options.c @@ -70,7 +70,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose - { 256, UNINIT, MI_OPTION(max_numa_node) }, // maximum allowed numa node + { 256, UNINIT, MI_OPTION(max_numa_nodes) }, // use at most N numa nodes { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output }; diff --git a/src/os.c b/src/os.c index 7af7363b..93fb8b31 100644 --- a/src/os.c +++ b/src/os.c @@ -998,9 +998,10 @@ static int mi_os_numa_nodex(void) { } static int mi_os_numa_node_countx(void) { char buf[128]; - int max_node = mi_option_get(mi_option_max_numa_node); + int max_nodes = mi_option_get(mi_option_max_numa_nodes); // set to 0 to disable detection (and NUMA awareness) int node = 0; - for(node = 0; node < max_node; node++) { + for(node = 0; node < max_nodes; node++) { + // enumerate node entries -- todo: it there a more efficient way to do this? (but ensure there is no allocation) snprintf(buf, 127, "/sys/devices/system/node/node%i", node + 1); if (access(buf,R_OK) != 0) break; } @@ -1022,7 +1023,7 @@ int _mi_os_numa_node_count_get(void) { int ncount = mi_os_numa_node_countx(); int ncount0 = ncount; // never more than max numa node and at least 1 - int nmax = 1 + (int)mi_option_get(mi_option_max_numa_node); + int nmax = (int)mi_option_get(mi_option_max_numa_nodes); if (ncount > nmax) ncount = nmax; if (ncount <= 0) ncount = 1; _mi_numa_node_count = ncount; From d01ed42bcb755ed6c1b52bfd8a306821da098dd5 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Wed, 13 Nov 2019 13:35:50 -0800 Subject: [PATCH 045/179] replace max_numa_nodes by use_numa_nodes (to help with wrong detection of numa nodes on WSL for example) --- include/mimalloc-internal.h | 8 +++--- include/mimalloc.h | 4 +-- src/arena.c | 15 +++++------ src/init.c | 2 +- src/options.c | 4 +-- src/os.c | 54 +++++++++++++++++-------------------- 6 files changed, 40 insertions(+), 47 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 668a7bd3..77045a99 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -436,15 +436,15 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c // Optimize numa node access for the common case (= one node) // ------------------------------------------------------------------- -int _mi_os_numa_node_get(mi_os_tld_t* tld); -int _mi_os_numa_node_count_get(void); +int _mi_os_numa_node_get(mi_os_tld_t* tld); +size_t _mi_os_numa_node_count_get(void); -extern int _mi_numa_node_count; +extern size_t _mi_numa_node_count; static inline int _mi_os_numa_node(mi_os_tld_t* tld) { if (mi_likely(_mi_numa_node_count == 1)) return 0; else return _mi_os_numa_node_get(tld); } -static inline int _mi_os_numa_node_count(void) { +static inline size_t _mi_os_numa_node_count(void) { if (mi_likely(_mi_numa_node_count>0)) return _mi_numa_node_count; else return _mi_os_numa_node_count_get(); } diff --git a/include/mimalloc.h b/include/mimalloc.h index 8d029135..3c942849 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -230,7 +230,7 @@ mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_b mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept; mi_decl_export bool mi_is_redirected() mi_attr_noexcept; -mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages, size_t timeout_msecs) mi_attr_noexcept; +mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept; mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept; // deprecated @@ -274,8 +274,8 @@ typedef enum mi_option_e { mi_option_segment_reset, mi_option_eager_commit_delay, mi_option_reset_delay, + mi_option_use_numa_nodes, mi_option_os_tag, - mi_option_max_numa_nodes, mi_option_max_errors, _mi_option_last } mi_option_t; diff --git a/src/arena.c b/src/arena.c index 02890bd6..46741208 100644 --- a/src/arena.c +++ b/src/arena.c @@ -42,7 +42,6 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_sec void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats); bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); -int _mi_os_numa_node_count(void); /* ----------------------------------------------------------- Arena allocation @@ -317,22 +316,22 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec } -// reserve huge pages evenly among all numa nodes. -int mi_reserve_huge_os_pages_interleave(size_t pages, size_t timeout_msecs) mi_attr_noexcept { +// reserve huge pages evenly among the given number of numa nodes (or use the available ones as detected) +int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept { if (pages == 0) return 0; // pages per numa node - int numa_count = _mi_os_numa_node_count(); + size_t numa_count = (numa_nodes > 0 ? numa_nodes : _mi_os_numa_node_count()); if (numa_count <= 0) numa_count = 1; const size_t pages_per = pages / numa_count; const size_t pages_mod = pages % numa_count; const size_t timeout_per = (timeout_msecs / numa_count) + 50; // reserve evenly among numa nodes - for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { + for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { size_t node_pages = pages_per; // can be 0 - if ((size_t)numa_node < pages_mod) node_pages++; - int err = mi_reserve_huge_os_pages_at(node_pages, numa_node, timeout_per); + if (numa_node < pages_mod) node_pages++; + int err = mi_reserve_huge_os_pages_at(node_pages, (int)numa_node, timeout_per); if (err) return err; if (pages < node_pages) { pages = 0; @@ -349,7 +348,7 @@ int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserv UNUSED(max_secs); _mi_warning_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n"); if (pages_reserved != NULL) *pages_reserved = 0; - int err = mi_reserve_huge_os_pages_interleave(pages, (size_t)(max_secs * 1000.0)); + int err = mi_reserve_huge_os_pages_interleave(pages, 0, (size_t)(max_secs * 1000.0)); if (err==0 && pages_reserved!=NULL) *pages_reserved = pages; return err; } diff --git a/src/init.c b/src/init.c index 473e9a32..72543b95 100644 --- a/src/init.c +++ b/src/init.c @@ -469,7 +469,7 @@ void mi_process_init(void) mi_attr_noexcept { if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { size_t pages = mi_option_get(mi_option_reserve_huge_os_pages); - mi_reserve_huge_os_pages_interleave(pages, pages*500); + mi_reserve_huge_os_pages_interleave(pages, 0, pages*500); } } diff --git a/src/options.c b/src/options.c index bbea4e67..180f6a75 100644 --- a/src/options.c +++ b/src/options.c @@ -69,9 +69,9 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds + { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose - { 256, UNINIT, MI_OPTION(max_numa_nodes) }, // use at most N numa nodes - { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output + { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output }; static void mi_option_init(mi_option_desc_t* desc); diff --git a/src/os.c b/src/os.c index 93fb8b31..2415a40d 100644 --- a/src/os.c +++ b/src/os.c @@ -968,66 +968,61 @@ void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) { Support NUMA aware allocation -----------------------------------------------------------------------------*/ #ifdef WIN32 -static int mi_os_numa_nodex() { +static size_t mi_os_numa_nodex() { PROCESSOR_NUMBER pnum; USHORT numa_node = 0; GetCurrentProcessorNumberEx(&pnum); GetNumaProcessorNodeEx(&pnum,&numa_node); - return (int)numa_node; + return numa_node; } -static int mi_os_numa_node_countx(void) { +static size_t mi_os_numa_node_countx(void) { ULONG numa_max = 0; GetNumaHighestNodeNumber(&numa_max); - return (int)(numa_max + 1); + return (numa_max + 1); } #elif defined(__linux__) #include // getcpu #include // access -static int mi_os_numa_nodex(void) { +static size_t mi_os_numa_nodex(void) { #ifdef SYS_getcpu - unsigned node = 0; - unsigned ncpu = 0; - int err = syscall(SYS_getcpu, &ncpu, &node, NULL); + unsigned long node = 0; + unsigned long ncpu = 0; + long err = syscall(SYS_getcpu, &ncpu, &node, NULL); if (err != 0) return 0; - return (int)node; + return node; #else return 0; #endif } -static int mi_os_numa_node_countx(void) { +static size_t mi_os_numa_node_countx(void) { char buf[128]; - int max_nodes = mi_option_get(mi_option_max_numa_nodes); // set to 0 to disable detection (and NUMA awareness) - int node = 0; - for(node = 0; node < max_nodes; node++) { + unsigned node = 0; + for(node = 0; node < 256; node++) { // enumerate node entries -- todo: it there a more efficient way to do this? (but ensure there is no allocation) - snprintf(buf, 127, "/sys/devices/system/node/node%i", node + 1); + snprintf(buf, 127, "/sys/devices/system/node/node%u", node + 1); if (access(buf,R_OK) != 0) break; } return (node+1); } #else -static int mi_os_numa_nodex(void) { +static size_t mi_os_numa_nodex(void) { return 0; } -static int mi_os_numa_node_countx(void) { +static size_t mi_os_numa_node_countx(void) { return 1; } #endif -int _mi_numa_node_count = 0; // cache the node count +size_t _mi_numa_node_count = 0; // cache the node count -int _mi_os_numa_node_count_get(void) { +size_t _mi_os_numa_node_count_get(void) { if (mi_unlikely(_mi_numa_node_count <= 0)) { - int ncount = mi_os_numa_node_countx(); - int ncount0 = ncount; - // never more than max numa node and at least 1 - int nmax = (int)mi_option_get(mi_option_max_numa_nodes); - if (ncount > nmax) ncount = nmax; - if (ncount <= 0) ncount = 1; - _mi_numa_node_count = ncount; - _mi_verbose_message("using %i numa regions (%i nodes detected)\n", _mi_numa_node_count, ncount0); + long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly? + if (ncount <= 0) ncount = (long)mi_os_numa_node_countx(); // or detect dynamically + _mi_numa_node_count = (size_t)(ncount <= 0 ? 1 : ncount); + _mi_verbose_message("using %zd numa regions\n", _mi_numa_node_count); } mi_assert_internal(_mi_numa_node_count >= 1); return _mi_numa_node_count; @@ -1035,11 +1030,10 @@ int _mi_os_numa_node_count_get(void) { int _mi_os_numa_node_get(mi_os_tld_t* tld) { UNUSED(tld); - int numa_count = _mi_os_numa_node_count(); + size_t numa_count = _mi_os_numa_node_count(); if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0 // never more than the node count and >= 0 - int numa_node = mi_os_numa_nodex(); + size_t numa_node = mi_os_numa_nodex(); if (numa_node >= numa_count) { numa_node = numa_node % numa_count; } - if (numa_node < 0) numa_node = 0; - return numa_node; + return (int)numa_node; } From 30e2c54adba9f1d2ef32e35e4e6c4b80e5732c26 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Wed, 20 Nov 2019 14:13:02 -0800 Subject: [PATCH 046/179] remove delayed reset option (for now) --- include/mimalloc.h | 2 +- src/memory.c | 139 ++------------------------ src/options.c | 4 +- src/os.c | 237 +++++++++++++++++++++++++-------------------- 4 files changed, 142 insertions(+), 240 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 3c942849..a59b9cf7 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -273,7 +273,7 @@ typedef enum mi_option_e { mi_option_page_reset, mi_option_segment_reset, mi_option_eager_commit_delay, - mi_option_reset_delay, + mi_option_reset_decommits, mi_option_use_numa_nodes, mi_option_os_tag, mi_option_max_errors, diff --git a/src/memory.c b/src/memory.c index f3052d6b..b0bcf7a0 100644 --- a/src/memory.c +++ b/src/memory.c @@ -53,9 +53,6 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats); void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); -// local -static bool mi_delay_remove(mi_delay_slots_t* delay_slots, void* p, size_t size); - // Constants #if (MI_INTPTR_SIZE==8) @@ -354,8 +351,6 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld) { if (p==NULL) return; if (size==0) return; - mi_delay_remove(tld->reset_delay, p, size); - size_t arena_memid = 0; mi_bitmap_index_t bit_idx; mem_region_t* region; @@ -424,7 +419,6 @@ void _mi_mem_collect(mi_os_tld_t* tld) { bool is_eager_committed; void* start = mi_region_info_read(mi_atomic_read(®ions[i].info), NULL, &is_eager_committed); if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { - mi_delay_remove(tld->reset_delay, start, MI_REGION_SIZE); _mi_arena_free(start, MI_REGION_SIZE, region->arena_memid, tld->stats); } // and release @@ -434,142 +428,23 @@ void _mi_mem_collect(mi_os_tld_t* tld) { } } -/* ---------------------------------------------------------------------------- - Delay slots ------------------------------------------------------------------------------*/ - -typedef void (mi_delay_resolve_fun)(void* addr, size_t size, void* arg); - -static void mi_delay_insert(mi_delay_slots_t* ds, - mi_msecs_t delay, uint8_t* addr, size_t size, - mi_delay_resolve_fun* resolve, void* arg) -{ - if (ds == NULL || delay==0 || addr==NULL || size==0) { - resolve(addr, size, arg); - return; - } - - mi_msecs_t now = _mi_clock_now(); - mi_delay_slot_t* oldest = &ds->slots[0]; - // walk through all slots, resolving expired ones. - // remember the oldest slot to insert the new entry in. - size_t newcount = 0; - for (size_t i = 0; i < ds->count; i++) { - mi_delay_slot_t* slot = &ds->slots[i]; - - if (slot->expire == 0) { - // empty slot - oldest = slot; - } - // TODO: should we handle overlapping areas too? - else if (slot->addr <= addr && slot->addr + slot->size >= addr + size) { - // earlier slot encompasses new area, increase expiration - slot->expire = now + delay; - delay = 0; - } - else if (addr <= slot->addr && addr + size >= slot->addr + slot->size) { - // new one encompasses old slot, overwrite - slot->expire = now + delay; - slot->addr = addr; - slot->size = size; - delay = 0; - } - else if (slot->expire < now) { - // expired slot, resolve now - slot->expire = 0; - resolve(slot->addr, slot->size, arg); - } - else if (oldest->expire > slot->expire) { - oldest = slot; - newcount = i+1; - } - else { - newcount = i+1; - } - } - ds->count = newcount; - if (delay>0) { - // not yet registered, use the oldest slot (or a new one if there is space) - if (ds->count < ds->capacity) { - oldest = &ds->slots[ds->count]; - ds->count++; - } - else if (oldest->expire > 0) { - resolve(oldest->addr, oldest->size, arg); // evict if not empty - } - mi_assert_internal((oldest - ds->slots) < (ptrdiff_t)ds->count); - oldest->expire = now + delay; - oldest->addr = addr; - oldest->size = size; - } -} - -static bool mi_delay_remove(mi_delay_slots_t* ds, void* p, size_t size) -{ - if (ds == NULL || p==NULL || size==0) return false; - - uint8_t* addr = (uint8_t*)p; - bool done = false; - size_t newcount = 0; - - // walk through all valid slots - for (size_t i = 0; i < ds->count; i++) { - mi_delay_slot_t* slot = &ds->slots[i]; - if (slot->addr <= addr && slot->addr + slot->size >= addr + size) { - // earlier slot encompasses the area; remove it - slot->expire = 0; - done = true; - } - else if (addr <= slot->addr && addr + size >= slot->addr + slot->size) { - // new one encompasses old slot, remove it - slot->expire = 0; - } - else if ((addr <= slot->addr && addr + size > slot->addr) || - (addr < slot->addr + slot->size && addr + size >= slot->addr + slot->size)) { - // partial overlap - // can happen with a large object spanning onto some partial end block - // mi_assert_internal(false); - slot->expire = 0; - } - else { - newcount = i + 1; - } - } - ds->count = newcount; - return done; -} - -static void mi_resolve_reset(void* p, size_t size, void* vtld) { - mi_os_tld_t* tld = (mi_os_tld_t*)vtld; - _mi_os_reset(p, size, tld->stats); -} - -bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) { - mi_delay_insert(tld->reset_delay, mi_option_get(mi_option_reset_delay), - (uint8_t*)p, size, &mi_resolve_reset, tld); - return true; -} - -bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { - if (!mi_delay_remove(tld->reset_delay, (uint8_t*)p, size)) { - return _mi_os_unreset(p, size, is_zero, tld->stats); - } - return true; -} - - /* ---------------------------------------------------------------------------- Other -----------------------------------------------------------------------------*/ +bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) { + return _mi_os_reset(p, size, tld->stats); +} + +bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { + return _mi_os_unreset(p, size, is_zero, tld->stats); +} bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { - mi_delay_remove(tld->reset_delay,p, size); return _mi_os_commit(p, size, is_zero, tld->stats); } bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld) { - mi_delay_remove(tld->reset_delay, p, size); return _mi_os_decommit(p, size, tld->stats); } diff --git a/src/options.c b/src/options.c index 180f6a75..8c4c1707 100644 --- a/src/options.c +++ b/src/options.c @@ -65,10 +65,10 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread - { 0, UNINIT, MI_OPTION(page_reset) }, // reset pages on free + { 1, UNINIT, MI_OPTION(page_reset) }, // reset pages on free { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed - { 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds + { 1, UNINIT, MI_OPTION(reset_decommits) }, // reset uses decommit/commit { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output diff --git a/src/os.c b/src/os.c index 2415a40d..02683a02 100644 --- a/src/os.c +++ b/src/os.c @@ -77,11 +77,11 @@ static bool use_large_os_page(size_t size, size_t alignment) { // round to a good OS allocation size (bounded by max 12.5% waste) size_t _mi_os_good_alloc_size(size_t size) { size_t align_size; - if (size < 512*KiB) align_size = _mi_os_page_size(); - else if (size < 2*MiB) align_size = 64*KiB; - else if (size < 8*MiB) align_size = 256*KiB; - else if (size < 32*MiB) align_size = 1*MiB; - else align_size = 4*MiB; + if (size < 512 * KiB) align_size = _mi_os_page_size(); + else if (size < 2 * MiB) align_size = 64 * KiB; + else if (size < 8 * MiB) align_size = 256 * KiB; + else if (size < 32 * MiB) align_size = 1 * MiB; + else align_size = 4 * MiB; if (size >= (SIZE_MAX - align_size)) return size; // possible overflow? return _mi_align_up(size, align_size); } @@ -92,8 +92,8 @@ size_t _mi_os_good_alloc_size(size_t size) { // NtAllocateVirtualAllocEx is used for huge OS page allocation (1GiB) // We hide MEM_EXTENDED_PARAMETER to compile with older SDK's. #include -typedef PVOID (__stdcall *PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ void*, ULONG); -typedef NTSTATUS (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ PVOID, ULONG); +typedef PVOID(__stdcall* PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ void*, ULONG); +typedef NTSTATUS(__stdcall* PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ PVOID, ULONG); static PVirtualAlloc2 pVirtualAlloc2 = NULL; static PNtAllocateVirtualMemoryEx pNtAllocateVirtualMemoryEx = NULL; @@ -129,7 +129,7 @@ static bool mi_win_enable_large_os_pages() if (err == 0) err = GetLastError(); _mi_warning_message("cannot enable large OS page support, error %lu\n", err); } - return (ok!=0); + return (ok != 0); } void _mi_os_init(void) { @@ -144,7 +144,7 @@ void _mi_os_init(void) { if (hDll != NULL) { // use VirtualAlloc2FromApp if possible as it is available to Windows store apps pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2FromApp"); - if (pVirtualAlloc2==NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2"); + if (pVirtualAlloc2 == NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2"); FreeLibrary(hDll); } hDll = LoadLibrary(TEXT("ntdll.dll")); @@ -170,7 +170,7 @@ void _mi_os_init() { os_alloc_granularity = os_page_size; } if (mi_option_is_enabled(mi_option_large_os_pages)) { - large_os_page_size = 2*MiB; + large_os_page_size = 2 * MiB; } } #endif @@ -210,7 +210,7 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment #if (MI_INTPTR_SIZE >= 8) // on 64-bit systems, try to use the virtual address area after 4TiB for 4MiB aligned allocations void* hint; - if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment,size)) != NULL) { + if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment, size)) != NULL) { return VirtualAlloc(hint, size, flags, PAGE_READWRITE); } #endif @@ -233,7 +233,7 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0; void* p = NULL; if ((large_only || use_large_os_page(size, try_alignment)) - && allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0) { + && allow_large && (flags & MEM_COMMIT) != 0 && (flags & MEM_RESERVE) != 0) { uintptr_t try_ok = mi_atomic_read(&large_page_try_ok); if (!large_only && try_ok > 0) { // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive. @@ -247,12 +247,12 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, if (large_only) return p; // fall back to non-large page allocation on error (`p == NULL`). if (p == NULL) { - mi_atomic_write(&large_page_try_ok,10); // on error, don't try again for the next N allocations + mi_atomic_write(&large_page_try_ok, 10); // on error, don't try again for the next N allocations } } } if (p == NULL) { - *is_large = ((flags&MEM_LARGE_PAGES) != 0); + *is_large = ((flags & MEM_LARGE_PAGES) != 0); p = mi_win_virtual_allocx(addr, size, try_alignment, flags); } if (p == NULL) { @@ -264,8 +264,8 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, #elif defined(__wasi__) static void* mi_wasm_heap_grow(size_t size, size_t try_alignment) { uintptr_t base = __builtin_wasm_memory_size(0) * _mi_os_page_size(); - uintptr_t aligned_base = _mi_align_up(base, (uintptr_t) try_alignment); - size_t alloc_size = _mi_align_up( aligned_base - base + size, _mi_os_page_size()); + uintptr_t aligned_base = _mi_align_up(base, (uintptr_t)try_alignment); + size_t alloc_size = _mi_align_up(aligned_base - base + size, _mi_os_page_size()); mi_assert(alloc_size >= size && (alloc_size % _mi_os_page_size()) == 0); if (alloc_size < size) return NULL; if (__builtin_wasm_memory_grow(0, alloc_size / _mi_os_page_size()) == SIZE_MAX) { @@ -278,47 +278,50 @@ static void* mi_wasm_heap_grow(size_t size, size_t try_alignment) { #define MI_OS_USE_MMAP static void* mi_unix_mmapx(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) { void* p = NULL; - #if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED) +#if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED) // on 64-bit systems, use the virtual address area after 4TiB for 4MiB aligned allocations void* hint; if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment, size)) != NULL) { - p = mmap(hint,size,protect_flags,flags,fd,0); - if (p==MAP_FAILED) p = NULL; // fall back to regular mmap + p = mmap(hint, size, protect_flags, flags, fd, 0); + if (p == MAP_FAILED) p = NULL; // fall back to regular mmap } - #else +#else UNUSED(try_alignment); - #endif - if (p==NULL) { - p = mmap(addr,size,protect_flags,flags,fd,0); - if (p==MAP_FAILED) p = NULL; +#endif + if (p == NULL) { + p = mmap(addr, size, protect_flags, flags, fd, 0); + if (p == MAP_FAILED) p = NULL; } return p; } static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int protect_flags, bool large_only, bool allow_large, bool* is_large) { void* p = NULL; - #if !defined(MAP_ANONYMOUS) - #define MAP_ANONYMOUS MAP_ANON - #endif - int flags = MAP_PRIVATE | MAP_ANONYMOUS; +#if !defined(MAP_ANONYMOUS) +#define MAP_ANONYMOUS MAP_ANON +#endif +#if !defined(MAP_NORESERVE) +#define MAP_NORESERVE 0 +#endif + int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE; int fd = -1; - #if defined(MAP_ALIGNED) // BSD +#if defined(MAP_ALIGNED) // BSD if (try_alignment > 0) { size_t n = _mi_bsr(try_alignment); if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) { // alignment is a power of 2 and 4096 <= alignment <= 1GiB flags |= MAP_ALIGNED(n); } } - #endif - #if defined(PROT_MAX) +#endif +#if defined(PROT_MAX) protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD - #endif - #if defined(VM_MAKE_TAG) - // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99) +#endif +#if defined(VM_MAKE_TAG) +// macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99) int os_tag = (int)mi_option_get(mi_option_os_tag); if (os_tag < 100 || os_tag > 255) os_tag = 100; fd = VM_MAKE_TAG(os_tag); - #endif +#endif if ((large_only || use_large_os_page(size, try_alignment)) && allow_large) { static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0; uintptr_t try_ok = mi_atomic_read(&large_page_try_ok); @@ -332,39 +335,39 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro else { int lflags = flags; int lfd = fd; - #ifdef MAP_ALIGNED_SUPER +#ifdef MAP_ALIGNED_SUPER lflags |= MAP_ALIGNED_SUPER; - #endif - #ifdef MAP_HUGETLB +#endif +#ifdef MAP_HUGETLB lflags |= MAP_HUGETLB; - #endif - #ifdef MAP_HUGE_1GB +#endif +#ifdef MAP_HUGE_1GB static bool mi_huge_pages_available = true; if ((size % GiB) == 0 && mi_huge_pages_available) { lflags |= MAP_HUGE_1GB; } else - #endif +#endif { - #ifdef MAP_HUGE_2MB +#ifdef MAP_HUGE_2MB lflags |= MAP_HUGE_2MB; - #endif +#endif } - #ifdef VM_FLAGS_SUPERPAGE_SIZE_2MB +#ifdef VM_FLAGS_SUPERPAGE_SIZE_2MB lfd |= VM_FLAGS_SUPERPAGE_SIZE_2MB; - #endif +#endif if (large_only || lflags != flags) { // try large OS page allocation *is_large = true; p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd); - #ifdef MAP_HUGE_1GB +#ifdef MAP_HUGE_1GB if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) { mi_huge_pages_available = false; // don't try huge 1GiB pages again _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error %i)\n", errno); lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB); p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd); } - #endif +#endif if (large_only) return p; if (p == NULL) { mi_atomic_write(&large_page_try_ok, 10); // on error, don't try again for the next N allocations @@ -375,7 +378,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro if (p == NULL) { *is_large = false; p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, flags, fd); - #if defined(MADV_HUGEPAGE) +#if defined(MADV_HUGEPAGE) // Many Linux systems don't allow MAP_HUGETLB but they support instead // transparent huge pages (THP). It is not required to call `madvise` with MADV_HUGE // though since properly aligned allocations will already use large pages if available @@ -387,7 +390,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro *is_large = true; // possibly }; } - #endif +#endif } return p; } @@ -401,18 +404,18 @@ static volatile _Atomic(intptr_t) aligned_base; // Return a 4MiB aligned address that is probably available static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { if (try_alignment == 0 || try_alignment > MI_SEGMENT_SIZE) return NULL; - if ((size%MI_SEGMENT_SIZE) != 0) return NULL; + if ((size % MI_SEGMENT_SIZE) != 0) return NULL; intptr_t hint = mi_atomic_add(&aligned_base, size); - if (hint == 0 || hint > ((intptr_t)30<<40)) { // try to wrap around after 30TiB (area after 32TiB is used for huge OS pages) + if (hint == 0 || hint > ((intptr_t)30 << 40)) { // try to wrap around after 30TiB (area after 32TiB is used for huge OS pages) intptr_t init = ((intptr_t)4 << 40); // start at 4TiB area - #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode +#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode uintptr_t r = _mi_random_init((uintptr_t)&mi_os_get_aligned_hint ^ hint); - init = init + (MI_SEGMENT_SIZE * ((r>>17) & 0xFFFF)); // (randomly 0-64k)*4MiB == 0 to 256GiB - #endif + init = init + (MI_SEGMENT_SIZE * ((r >> 17) & 0xFFFF)); // (randomly 0-64k)*4MiB == 0 to 256GiB +#endif mi_atomic_cas_strong(mi_atomic_cast(uintptr_t, &aligned_base), init, hint + size); hint = mi_atomic_add(&aligned_base, size); // this may still give 0 or > 30TiB but that is ok, it is a hint after all } - if (hint%try_alignment != 0) return NULL; + if (hint % try_alignment != 0) return NULL; return (void*)hint; } #else @@ -441,17 +444,17 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo } */ - #if defined(_WIN32) - int flags = MEM_RESERVE; - if (commit) flags |= MEM_COMMIT; - p = mi_win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large); - #elif defined(__wasi__) - *is_large = false; - p = mi_wasm_heap_grow(size, try_alignment); - #else - int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); - p = mi_unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large); - #endif +#if defined(_WIN32) + int flags = MEM_RESERVE; + if (commit) flags |= MEM_COMMIT; + p = mi_win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large); +#elif defined(__wasi__) + *is_large = false; + p = mi_wasm_heap_grow(size, try_alignment); +#else + int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); + p = mi_unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large); +#endif mi_stat_counter_increase(stats->mmap_calls, 1); if (p != NULL) { _mi_stat_increase(&stats->reserved, size); @@ -561,7 +564,7 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* lar allow_large = *large; *large = false; } - return mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, (large!=NULL?large:&allow_large), tld->stats); + return mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, (large != NULL ? large : &allow_large), tld->stats); } @@ -613,7 +616,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ _mi_stat_decrease(&stats->committed, csize); } - #if defined(_WIN32) +#if defined(_WIN32) if (commit) { // if the memory was already committed, the call succeeds but it is not zero'd // *is_zero = true; @@ -624,28 +627,42 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ BOOL ok = VirtualFree(start, csize, MEM_DECOMMIT); err = (ok ? 0 : GetLastError()); } - #elif defined(__wasi__) +#elif defined(__wasi__) // WebAssembly guests can't control memory protection - #else +#elif defined(MAP_FIXED) + if (!commit) { + // use mmap with MAP_FIXED to discard the existing memory (and reduce commit charge) + void* p = mmap(start, size, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), -1, 0); + if (p != start) { err = errno; } + } + else { + // for commit, just change the protection + err = mprotect(start, csize, (PROT_READ | PROT_WRITE)); + if (err != 0) { err = errno; } + } +#else err = mprotect(start, csize, (commit ? (PROT_READ | PROT_WRITE) : PROT_NONE)); if (err != 0) { err = errno; } - #endif +#endif if (err != 0) { - _mi_warning_message("commit/decommit error: start: 0x%p, csize: 0x%x, err: %i\n", start, csize, err); + _mi_warning_message("%s error: start: 0x%p, csize: 0x%x, err: %i\n", commit ? "commit" : "decommit", start, csize, err); } mi_assert_internal(err == 0); return (err == 0); } bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) { - return mi_os_commitx(addr, size, true, false /* conservative? */, is_zero, stats); + return mi_os_commitx(addr, size, true, false /* liberal */, is_zero, stats); } bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats) { bool is_zero; - return mi_os_commitx(addr, size, false, true /* conservative? */, &is_zero, stats); + return mi_os_commitx(addr, size, false, true /* conservative */, &is_zero, stats); } +bool _mi_os_commit_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) { + return mi_os_commitx(addr, size, true, true /* conservative */, is_zero, stats); +} // Signal to the OS that the address range is no longer in use // but may be used later again. This will release physical memory @@ -657,24 +674,24 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) void* start = mi_os_page_align_area_conservative(addr, size, &csize); if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr) if (reset) _mi_stat_increase(&stats->reset, csize); - else _mi_stat_decrease(&stats->reset, csize); + else _mi_stat_decrease(&stats->reset, csize); if (!reset) return true; // nothing to do on unreset! - #if (MI_DEBUG>1) - if (MI_SECURE==0) { +#if (MI_DEBUG>1) + if (MI_SECURE == 0) { memset(start, 0, csize); // pretend it is eagerly reset } - #endif +#endif #if defined(_WIN32) // Testing shows that for us (on `malloc-large`) MEM_RESET is 2x faster than DiscardVirtualMemory void* p = VirtualAlloc(start, csize, MEM_RESET, PAGE_READWRITE); mi_assert_internal(p == start); - #if 1 +#if 1 if (p == start && start != NULL) { - VirtualUnlock(start,csize); // VirtualUnlock after MEM_RESET removes the memory from the working set + VirtualUnlock(start, csize); // VirtualUnlock after MEM_RESET removes the memory from the working set } - #endif +#endif if (p != start) return false; #else #if defined(MADV_FREE) @@ -704,12 +721,22 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) // pages and reduce swapping while keeping the memory committed. // We page align to a conservative area inside the range to reset. bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) { - return mi_os_resetx(addr, size, true, stats); + if (mi_option_is_enabled(mi_option_reset_decommits)) { + return _mi_os_decommit(addr, size, stats); + } + else { + return mi_os_resetx(addr, size, true, stats); + } } bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) { - *is_zero = false; - return mi_os_resetx(addr, size, false, stats); + if (mi_option_is_enabled(mi_option_reset_decommits)) { + return _mi_os_commit_unreset(addr, size, is_zero, stats); // re-commit it (conservatively!) + } + else { + *is_zero = false; + return mi_os_resetx(addr, size, false, stats); + } } @@ -721,7 +748,7 @@ static bool mi_os_protectx(void* addr, size_t size, bool protect) { if (csize == 0) return false; /* if (_mi_os_is_huge_reserved(addr)) { - _mi_warning_message("cannot mprotect memory allocated in huge OS pages\n"); + _mi_warning_message("cannot mprotect memory allocated in huge OS pages\n"); } */ int err = 0; @@ -753,7 +780,7 @@ bool _mi_os_unprotect(void* addr, size_t size) { bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize, mi_stats_t* stats) { // page align conservatively within the range - mi_assert_internal(oldsize > newsize && p != NULL); + mi_assert_internal(oldsize > newsize&& p != NULL); if (oldsize < newsize || p == NULL) return false; if (oldsize == newsize) return true; @@ -781,20 +808,20 @@ and possibly associated with a specific NUMA node. (use `numa_node>=0`) #if defined(WIN32) && (MI_INTPTR_SIZE >= 8) static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) { - mi_assert_internal(size%GiB == 0); + mi_assert_internal(size % GiB == 0); mi_assert_internal(addr != NULL); const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE; mi_win_enable_large_os_pages(); - #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) +#if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) MEM_EXTENDED_PARAMETER params[3] = { {0,0},{0,0},{0,0} }; // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages static bool mi_huge_pages_available = true; if (pNtAllocateVirtualMemoryEx != NULL && mi_huge_pages_available) { - #ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE - #define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE (0x10) - #endif +#ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE +#define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE (0x10) +#endif params[0].Type = 5; // == MemExtendedParameterAttributeFlags; params[0].ULong64 = MEM_EXTENDED_PARAMETER_NONPAGED_HUGE; ULONG param_count = 1; @@ -821,7 +848,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) params[0].ULong = (unsigned)numa_node; return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1); } - #endif +#endif // otherwise use regular virtual alloc on older windows return VirtualAlloc(addr, size, flags, PAGE_READWRITE); } @@ -842,16 +869,16 @@ static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, cons } #endif static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) { - mi_assert_internal(size%GiB == 0); + mi_assert_internal(size % GiB == 0); bool is_large = true; void* p = mi_unix_mmap(addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); if (p == NULL) return NULL; - if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes + if (numa_node >= 0 && numa_node < 8 * MI_INTPTR_SIZE) { // at most 64 nodes uintptr_t numa_mask = (1UL << numa_node); // TODO: does `mbind` work correctly for huge OS pages? should we // use `set_mempolicy` before calling mmap instead? // see: - long err = mi_os_mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); + long err = mi_os_mbind(p, size, MPOL_PREFERRED, &numa_mask, 8 * MI_INTPTR_SIZE, 0); if (err != 0) { _mi_warning_message("failed to bind huge (1GiB) pages to NUMA node %d: %s\n", numa_node, strerror(errno)); } @@ -883,7 +910,7 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { start = ((uintptr_t)32 << 40); // 32TiB virtual start address #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode uintptr_t r = _mi_random_init((uintptr_t)&mi_os_claim_huge_pages); - start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF)); // (randomly 0-1024)*1GiB == 0 to 1TiB + start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r >> 17) & 0x3FF)); // (randomly 0-1024)*1GiB == 0 to 1TiB #endif } end = start + size; @@ -936,8 +963,8 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse if (max_msecs > 0) { mi_msecs_t elapsed = _mi_clock_end(start_t); if (page >= 1) { - mi_msecs_t estimate = ((elapsed / (page+1)) * pages); - if (estimate > 2*max_msecs) { // seems like we are going to timeout, break + mi_msecs_t estimate = ((elapsed / (page + 1)) * pages); + if (estimate > 2 * max_msecs) { // seems like we are going to timeout, break elapsed = max_msecs + 1; } } @@ -947,7 +974,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse } } } - mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size); + mi_assert_internal(page * MI_HUGE_OS_PAGE_SIZE <= size); if (pages_reserved != NULL) *pages_reserved = page; if (psize != NULL) *psize = page * MI_HUGE_OS_PAGE_SIZE; return (page == 0 ? NULL : start); @@ -956,7 +983,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse // free every huge page in a range individually (as we allocated per page) // note: needed with VirtualAlloc but could potentially be done in one go on mmap'd systems. void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) { - if (p==NULL || size==0) return; + if (p == NULL || size == 0) return; uint8_t* base = (uint8_t*)p; while (size >= MI_HUGE_OS_PAGE_SIZE) { _mi_os_free(base, MI_HUGE_OS_PAGE_SIZE, stats); @@ -972,7 +999,7 @@ static size_t mi_os_numa_nodex() { PROCESSOR_NUMBER pnum; USHORT numa_node = 0; GetCurrentProcessorNumberEx(&pnum); - GetNumaProcessorNodeEx(&pnum,&numa_node); + GetNumaProcessorNodeEx(&pnum, &numa_node); return numa_node; } @@ -999,12 +1026,12 @@ static size_t mi_os_numa_nodex(void) { static size_t mi_os_numa_node_countx(void) { char buf[128]; unsigned node = 0; - for(node = 0; node < 256; node++) { + for (node = 0; node < 256; node++) { // enumerate node entries -- todo: it there a more efficient way to do this? (but ensure there is no allocation) snprintf(buf, 127, "/sys/devices/system/node/node%u", node + 1); - if (access(buf,R_OK) != 0) break; + if (access(buf, R_OK) != 0) break; } - return (node+1); + return (node + 1); } #else static size_t mi_os_numa_nodex(void) { @@ -1031,7 +1058,7 @@ size_t _mi_os_numa_node_count_get(void) { int _mi_os_numa_node_get(mi_os_tld_t* tld) { UNUSED(tld); size_t numa_count = _mi_os_numa_node_count(); - if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0 + if (numa_count <= 1) return 0; // optimize on single numa node systems: always node 0 // never more than the node count and >= 0 size_t numa_node = mi_os_numa_nodex(); if (numa_node >= numa_count) { numa_node = numa_node % numa_count; } From 211f1aa5190f063ee8eef237473281535c2be79f Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Wed, 20 Nov 2019 14:55:12 -0800 Subject: [PATCH 047/179] remove reset delay slots; add reset tracking per page and segment --- include/mimalloc-internal.h | 8 +- include/mimalloc-types.h | 28 +--- include/mimalloc.h | 3 +- src/arena.c | 8 +- src/bitmap.inc.c | 54 ++++++-- src/init.c | 11 +- src/memory.c | 199 +++++++++++++++------------ src/options.c | 5 +- src/os.c | 204 ++++++++++++++-------------- src/page.c | 7 +- src/segment.c | 264 ++++++++++++++++++++++-------------- 11 files changed, 443 insertions(+), 348 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index d727e563..ab295e65 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -59,7 +59,7 @@ size_t _mi_os_good_alloc_size(size_t size); // memory.c void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* id, mi_os_tld_t* tld); -void _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld); +void _mi_mem_free(void* p, size_t size, size_t id, bool fully_committed, bool any_reset, mi_os_tld_t* tld); bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld); bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld); @@ -75,7 +75,7 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld); bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld); void _mi_segment_thread_collect(mi_segments_tld_t* tld); -uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size); // page start for any page +uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size, size_t* pre_size); // page start for any page // "page.c" void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc; @@ -297,7 +297,9 @@ static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const // Quick page start for initialized pages static inline uint8_t* _mi_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) { - return _mi_segment_page_start(segment, page, page->block_size, page_size); + const size_t bsize = page->block_size; + mi_assert_internal(bsize > 0 && (bsize%sizeof(void*)) == 0); + return _mi_segment_page_start(segment, page, bsize, page_size, NULL); } // Get the page containing the pointer diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 0ce91339..e816c3a6 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -384,31 +384,12 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount); #define mi_heap_stat_increase(heap,stat,amount) mi_stat_increase( (heap)->tld->stats.stat, amount) #define mi_heap_stat_decrease(heap,stat,amount) mi_stat_decrease( (heap)->tld->stats.stat, amount) - -// ------------------------------------------------------ -// Delay slots (to avoid expensive OS calls) -// ------------------------------------------------------ -typedef int64_t mi_msecs_t; - -#define MI_RESET_DELAY_SLOTS (256) - -typedef struct mi_delay_slot_s { - mi_msecs_t expire; - uint8_t* addr; - size_t size; -} mi_delay_slot_t; - -typedef struct mi_delay_slots_s { - size_t capacity; // always `MI_RESET_DELAY_SLOTS` - size_t count; // current slots used (`<= capacity`) - mi_delay_slot_t slots[MI_RESET_DELAY_SLOTS]; -} mi_delay_slots_t; - - // ------------------------------------------------------ // Thread Local data // ------------------------------------------------------ +typedef int64_t mi_msecs_t; + // Queue of segments typedef struct mi_segment_queue_s { mi_segment_t* first; @@ -417,9 +398,8 @@ typedef struct mi_segment_queue_s { // OS thread local data typedef struct mi_os_tld_s { - size_t region_idx; // start point for next allocation - mi_delay_slots_t* reset_delay; // delay slots for OS reset operations - mi_stats_t* stats; // points to tld stats + size_t region_idx; // start point for next allocation + mi_stats_t* stats; // points to tld stats } mi_os_tld_t; // Segments thread local data diff --git a/include/mimalloc.h b/include/mimalloc.h index a59b9cf7..197b1734 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -272,8 +272,9 @@ typedef enum mi_option_e { mi_option_segment_cache, mi_option_page_reset, mi_option_segment_reset, - mi_option_eager_commit_delay, mi_option_reset_decommits, + mi_option_eager_commit_delay, + mi_option_reset_delay, mi_option_use_numa_nodes, mi_option_os_tag, mi_option_max_errors, diff --git a/src/arena.c b/src/arena.c index 46741208..4a596b2c 100644 --- a/src/arena.c +++ b/src/arena.c @@ -107,7 +107,7 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* size_t idx = mi_atomic_read(&arena->search_idx); // start from last search for (size_t visited = 0; visited < fcount; visited++, idx++) { if (idx >= fcount) idx = 0; // wrap around - if (mi_bitmap_try_claim_field(arena->blocks_inuse, idx, blocks, bitmap_idx)) { + if (mi_bitmap_try_find_claim_field(arena->blocks_inuse, idx, blocks, bitmap_idx)) { mi_atomic_write(&arena->search_idx, idx); // start search from here next time return true; } @@ -137,9 +137,9 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n } else if (commit) { // ensure commit now - bool any_zero; - mi_bitmap_claim(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_zero); - if (any_zero) { + bool any_uncommitted; + mi_bitmap_claim(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted); + if (any_uncommitted) { bool commit_zero; _mi_os_commit(p, needed_bcount * MI_ARENA_BLOCK_SIZE, &commit_zero, tld->stats); if (commit_zero) *is_zero = true; diff --git a/src/bitmap.inc.c b/src/bitmap.inc.c index 81f87a79..11ada472 100644 --- a/src/bitmap.inc.c +++ b/src/bitmap.inc.c @@ -104,9 +104,29 @@ static inline size_t mi_bsr(uintptr_t x) { Claim a bit sequence atomically ----------------------------------------------------------- */ +// Try to atomically claim a sequence of `count` bits at in `idx` +// in the bitmap field. Returns `true` on success. +static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t bitmap_fields, const size_t count, mi_bitmap_index_t bitmap_idx) { + const size_t idx = mi_bitmap_index_field(bitmap_idx); + const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); + const uintptr_t mask = mi_bitmap_mask_(count, bitidx); + mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); + mi_assert_internal(bitidx + count <= MI_BITMAP_FIELD_BITS); + + mi_bitmap_field_t field = mi_atomic_read_relaxed(&bitmap[idx]); + if ((field & mask) == 0) { // free? + if (mi_atomic_cas_strong(&bitmap[idx], (field|mask), field)) { + // claimed! + return true; + } + } + return false; +} + + // Try to atomically claim a sequence of `count` bits in a single // field at `idx` in `bitmap`. Returns `true` on success. -static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx) +static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx) { mi_assert_internal(bitmap_idx != NULL); volatile _Atomic(uintptr_t)* field = &bitmap[idx]; @@ -160,9 +180,9 @@ static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, con // Find `count` bits of 0 and set them to 1 atomically; returns `true` on success. // For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never span fields. -static inline bool mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t* bitmap_idx) { +static inline bool mi_bitmap_try_find_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t* bitmap_idx) { for (size_t idx = 0; idx < bitmap_fields; idx++) { - if (mi_bitmap_try_claim_field(bitmap, idx, count, bitmap_idx)) { + if (mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) { return true; } } @@ -170,39 +190,51 @@ static inline bool mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, } // Set `count` bits at `bitmap_idx` to 0 atomically -// Returns `true` if all `count` bits were 1 previously +// Returns `true` if all `count` bits were 1 previously. static inline bool mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { const size_t idx = mi_bitmap_index_field(bitmap_idx); const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); const uintptr_t mask = mi_bitmap_mask_(count, bitidx); mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); - mi_assert_internal((bitmap[idx] & mask) == mask); + // mi_assert_internal((bitmap[idx] & mask) == mask); uintptr_t prev = mi_atomic_and(&bitmap[idx], ~mask); return ((prev & mask) == mask); } // Set `count` bits at `bitmap_idx` to 1 atomically -// Returns `true` if all `count` bits were 0 previously +// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. static inline bool mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero) { const size_t idx = mi_bitmap_index_field(bitmap_idx); const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); const uintptr_t mask = mi_bitmap_mask_(count, bitidx); mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); - // mi_assert_internal((bitmap[idx] & mask) == 0); + //mi_assert_internal(any_zero != NULL || (bitmap[idx] & mask) == 0); uintptr_t prev = mi_atomic_or(&bitmap[idx], mask); if (any_zero != NULL) *any_zero = ((prev & mask) != mask); return ((prev & mask) == 0); } -// Returns `true` if all `count` bits were 1 -static inline bool mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { +// Returns `true` if all `count` bits were 1. `any_ones` is `true` if there was at least one bit set to one. +static inline bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_ones) { const size_t idx = mi_bitmap_index_field(bitmap_idx); const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); const uintptr_t mask = mi_bitmap_mask_(count, bitidx); mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); - // mi_assert_internal((bitmap[idx] & mask) == 0); - return ((mi_atomic_read(&bitmap[idx]) & mask) == mask); + mi_bitmap_field_t field = mi_atomic_read_relaxed(&bitmap[idx]); + if (any_ones != NULL) *any_ones = ((field & mask) != 0); + return ((field & mask) == mask); } +static inline bool mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + return mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, NULL); +} + +static inline bool mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + bool any_ones; + mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, &any_ones); + return any_ones; +} + + #endif diff --git a/src/init.c b/src/init.c index f9735462..468fd46f 100644 --- a/src/init.c +++ b/src/init.c @@ -97,13 +97,11 @@ mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; #define tld_main_stats ((mi_stats_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,stats))) #define tld_main_os ((mi_os_tld_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,os))) -static mi_delay_slots_t tld_reset_delay_main = { MI_RESET_DELAY_SLOTS, 0, { {0,NULL,0} } }; - static mi_tld_t tld_main = { 0, false, &_mi_heap_main, { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats, tld_main_os }, // segments - { 0, &tld_reset_delay_main, tld_main_stats }, // os + { 0, tld_main_stats }, // os { MI_STATS_NULL } // stats }; @@ -194,8 +192,7 @@ uintptr_t _mi_random_init(uintptr_t seed /* can be zero */) { typedef struct mi_thread_data_s { mi_heap_t heap; // must come first due to cast in `_mi_heap_done` - mi_tld_t tld; - mi_delay_slots_t reset_delay; + mi_tld_t tld; } mi_thread_data_t; // Initialize the thread local default heap, called from `mi_thread_init` @@ -215,7 +212,6 @@ static bool _mi_heap_init(void) { } mi_tld_t* tld = &td->tld; mi_heap_t* heap = &td->heap; - mi_delay_slots_t* reset_delay = &td->reset_delay; memcpy(heap, &_mi_heap_empty, sizeof(*heap)); heap->thread_id = _mi_thread_id(); heap->random = _mi_random_init(heap->thread_id); @@ -226,9 +222,6 @@ static bool _mi_heap_init(void) { tld->segments.stats = &tld->stats; tld->segments.os = &tld->os; tld->os.stats = &tld->stats; - tld->os.reset_delay = reset_delay; - memset(reset_delay, 0, sizeof(*reset_delay)); - reset_delay->capacity = MI_RESET_DELAY_SLOTS; _mi_heap_set_default_direct(heap); } return false; diff --git a/src/memory.c b/src/memory.c index b0bcf7a0..94b6348f 100644 --- a/src/memory.c +++ b/src/memory.c @@ -54,6 +54,7 @@ void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, s void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); + // Constants #if (MI_INTPTR_SIZE==8) #define MI_HEAP_REGION_MAX_SIZE (256 * GiB) // 48KiB for the region map @@ -73,28 +74,26 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, boo // Region info is a pointer to the memory region and two bits for // its flags: is_large, and is_committed. -typedef uintptr_t mi_region_info_t; - -static inline mi_region_info_t mi_region_info_create(void* start, bool is_large, bool is_committed) { - return ((uintptr_t)start | ((uintptr_t)(is_large?1:0) << 1) | (is_committed?1:0)); -} - -static inline void* mi_region_info_read(mi_region_info_t info, bool* is_large, bool* is_committed) { - if (is_large) *is_large = ((info&0x02) != 0); - if (is_committed) *is_committed = ((info&0x01) != 0); - return (void*)(info & ~0x03); -} +typedef union mi_region_info_u { + uintptr_t value; + struct { + bool valid; + bool is_large; + int numa_node; + }; +} mi_region_info_t; // A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with // a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block. typedef struct mem_region_s { - volatile _Atomic(mi_region_info_t) info; // start of the memory area (and flags) - volatile _Atomic(uintptr_t) numa_node; // associated numa node + 1 (so 0 is no association) + volatile _Atomic(uintptr_t) info; // is_large, and associated numa node + 1 (so 0 is no association) + volatile _Atomic(void*) start; // start of the memory area (and flags) mi_bitmap_field_t in_use; // bit per in-use block mi_bitmap_field_t dirty; // track if non-zero per block mi_bitmap_field_t commit; // track if committed per block (if `!info.is_committed)) - size_t arena_memid; // if allocated from a (huge page) arena + mi_bitmap_field_t reset; // track reset per block + volatile _Atomic(uintptr_t) arena_memid; // if allocated from a (huge page) arena- } mem_region_t; // The region map @@ -113,24 +112,32 @@ static size_t mi_region_block_count(size_t size) { return _mi_divide_up(size, MI_SEGMENT_SIZE); } +/* // Return a rounded commit/reset size such that we don't fragment large OS pages into small ones. static size_t mi_good_commit_size(size_t size) { if (size > (SIZE_MAX - _mi_os_large_page_size())) return size; return _mi_align_up(size, _mi_os_large_page_size()); } +*/ // Return if a pointer points into a region reserved by us. bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { if (p==NULL) return false; size_t count = mi_atomic_read_relaxed(®ions_count); for (size_t i = 0; i < count; i++) { - uint8_t* start = (uint8_t*)mi_region_info_read( mi_atomic_read_relaxed(®ions[i].info), NULL, NULL); + uint8_t* start = (uint8_t*)mi_atomic_read_ptr_relaxed(®ions[i].start); if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true; } return false; } +static void* mi_region_blocks_start(const mem_region_t* region, mi_bitmap_index_t bit_idx) { + void* start = mi_atomic_read_ptr(®ion->start); + mi_assert_internal(start != NULL); + return ((uint8_t*)start + (bit_idx * MI_SEGMENT_SIZE)); +} + static size_t mi_memid_create(mem_region_t* region, mi_bitmap_index_t bit_idx) { mi_assert_internal(bit_idx < MI_BITMAP_FIELD_BITS); size_t idx = region - regions; @@ -142,13 +149,10 @@ static size_t mi_memid_create_from_arena(size_t arena_memid) { return (arena_memid << 1) | 1; } -static bool mi_memid_is_arena(size_t id) { - return ((id&1)==1); -} -static bool mi_memid_indices(size_t id, mem_region_t** region, mi_bitmap_index_t* bit_idx, size_t* arena_memid) { - if (mi_memid_is_arena(id)) { - *arena_memid = (id>>1); +static bool mi_memid_is_arena(size_t id, mem_region_t** region, mi_bitmap_index_t* bit_idx, size_t* arena_memid) { + if ((id&1)==1) { + if (arena_memid != NULL) *arena_memid = (id>>1); return true; } else { @@ -159,6 +163,7 @@ static bool mi_memid_indices(size_t id, mem_region_t** region, mi_bitmap_index_t } } + /* ---------------------------------------------------------------------------- Allocate a region is allocated from the OS (or an arena) -----------------------------------------------------------------------------*/ @@ -187,16 +192,21 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, // allocated, initialize and claim the initial blocks mem_region_t* r = ®ions[idx]; - r->numa_node = _mi_os_numa_node(tld) + 1; - r->arena_memid = arena_memid; + r->arena_memid = arena_memid; mi_atomic_write(&r->in_use, 0); mi_atomic_write(&r->dirty, (is_zero ? 0 : ~0UL)); mi_atomic_write(&r->commit, (region_commit ? ~0UL : 0)); + mi_atomic_write(&r->reset, 0); *bit_idx = 0; mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL); + mi_atomic_write_ptr(&r->start, start); // and share it - mi_atomic_write(&r->info, mi_region_info_create(start, region_large, region_commit)); // now make it available to others + mi_region_info_t info; + info.valid = true; + info.is_large = region_large; + info.numa_node = _mi_os_numa_node(tld); + mi_atomic_write(&r->info, info.value); // now make it available to others *region = r; return true; } @@ -207,36 +217,33 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool allow_large ) { // initialized at all? - mi_region_info_t info = mi_atomic_read_relaxed(®ion->info); - if (info==0) return false; + mi_region_info_t info; + info.value = mi_atomic_read_relaxed(®ion->info); + if (info.value==0) return false; // numa correct if (numa_node >= 0) { // use negative numa node to always succeed - int rnode = ((int)mi_atomic_read_relaxed(®ion->numa_node)) - 1; + int rnode = info.numa_node; if (rnode >= 0 && rnode != numa_node) return false; } // check allow-large - bool is_large; - bool is_committed; - mi_region_info_read(info, &is_large, &is_committed); - if (!allow_large && is_large) return false; + if (!allow_large && info.is_large) return false; return true; } -static bool mi_region_try_claim(size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) +static bool mi_region_try_claim(int numa_node, size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) { - // try all regions for a free slot - const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld)); + // try all regions for a free slot const size_t count = mi_atomic_read(®ions_count); size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? for (size_t visited = 0; visited < count; visited++, idx++) { if (idx >= count) idx = 0; // wrap around mem_region_t* r = ®ions[idx]; if (mi_region_is_suitable(r, numa_node, allow_large)) { - if (mi_bitmap_try_claim_field(&r->in_use, 0, blocks, bit_idx)) { + if (mi_bitmap_try_find_claim_field(&r->in_use, 0, blocks, bit_idx)) { tld->region_idx = idx; // remember the last found position *region = r; return true; @@ -252,8 +259,9 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo mi_assert_internal(blocks <= MI_BITMAP_FIELD_BITS); mem_region_t* region; mi_bitmap_index_t bit_idx; - // first try to claim in existing regions - if (!mi_region_try_claim(blocks, *is_large, ®ion, &bit_idx, tld)) { + const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld)); + // try to claim in existing regions + if (!mi_region_try_claim(numa_node, blocks, *is_large, ®ion, &bit_idx, tld)) { // otherwise try to allocate a fresh region if (!mi_region_try_alloc_os(blocks, *commit, *is_large, ®ion, &bit_idx, tld)) { // out of regions or memory @@ -261,30 +269,28 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo } } + // found a region and claimed `blocks` at `bit_idx` mi_assert_internal(region != NULL); mi_assert_internal(mi_bitmap_is_claimed(®ion->in_use, 1, blocks, bit_idx)); - mi_region_info_t info = mi_atomic_read(®ion->info); - bool region_is_committed = false; - bool region_is_large = false; - void* start = mi_region_info_read(info, ®ion_is_large, ®ion_is_committed); - mi_assert_internal(!(region_is_large && !*is_large)); + mi_region_info_t info; + info.value = mi_atomic_read(®ion->info); + void* start = mi_atomic_read_ptr(®ion->start); + mi_assert_internal(!(info.is_large && !*is_large)); mi_assert_internal(start != NULL); - *is_zero = mi_bitmap_claim(®ion->dirty, 1, blocks, bit_idx, NULL); - *is_large = region_is_large; + *is_zero = mi_bitmap_unclaim(®ion->dirty, 1, blocks, bit_idx); + *is_large = info.is_large; *memid = mi_memid_create(region, bit_idx); void* p = (uint8_t*)start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE); - if (region_is_committed) { - // always committed - *commit = true; - } - else if (*commit) { + + // commit + if (*commit) { // ensure commit - bool any_zero; - mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, &any_zero); - if (any_zero) { + bool any_uncommitted; + mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, &any_uncommitted); + if (any_uncommitted) { bool commit_zero; _mi_mem_commit(p, blocks * MI_SEGMENT_SIZE, &commit_zero, tld); if (commit_zero) *is_zero = true; @@ -294,6 +300,21 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo // no need to commit, but check if already fully committed *commit = mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx); } + mi_assert_internal(mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx)); + + // unreset reset blocks + if (mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)) { + mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit); + mi_bitmap_unclaim(®ion->reset, 1, blocks, bit_idx); + bool reset_zero; + _mi_mem_unreset(p, blocks * MI_SEGMENT_SIZE, &reset_zero, tld); + if (reset_zero) *is_zero = true; + } + mi_assert_internal(!mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)); + + #if (MI_DEBUG>=2) + if (*commit) { ((uint8_t*)p)[0] = 0; } + #endif // and return the allocation mi_assert_internal(p != NULL); @@ -325,7 +346,9 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l void* p = mi_region_try_alloc(blocks, commit, large, is_zero, memid, tld); mi_assert_internal(p == NULL || (uintptr_t)p % alignment == 0); if (p != NULL) { + #if (MI_DEBUG>=2) if (*commit) { ((uint8_t*)p)[0] = 0; } + #endif return p; } _mi_warning_message("unable to allocate from region: size %zu\n", size); @@ -346,56 +369,56 @@ Free -----------------------------------------------------------------------------*/ // Free previously allocated memory with a given id. -void _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld) { +void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_reset, mi_os_tld_t* tld) { mi_assert_internal(size > 0 && tld != NULL); if (p==NULL) return; if (size==0) return; - + size = _mi_align_up(size, _mi_os_page_size()); + size_t arena_memid = 0; mi_bitmap_index_t bit_idx; mem_region_t* region; - if (mi_memid_indices(id,®ion,&bit_idx,&arena_memid)) { + if (mi_memid_is_arena(id,®ion,&bit_idx,&arena_memid)) { // was a direct arena allocation, pass through _mi_arena_free(p, size, arena_memid, tld->stats); } else { // allocated in a region mi_assert_internal(size <= MI_REGION_MAX_OBJ_SIZE); if (size > MI_REGION_MAX_OBJ_SIZE) return; - // we can align the size up to page size (as we allocate that way too) - // this ensures we fully commit/decommit/reset - size = _mi_align_up(size, _mi_os_page_size()); const size_t blocks = mi_region_block_count(size); - mi_region_info_t info = mi_atomic_read(®ion->info); - bool is_large; - bool is_committed; - void* start = mi_region_info_read(info, &is_large, &is_committed); - mi_assert_internal(start != NULL); - void* blocks_start = (uint8_t*)start + (bit_idx * MI_SEGMENT_SIZE); + mi_assert_internal(blocks + bit_idx <= MI_BITMAP_FIELD_BITS); + mi_region_info_t info; + info.value = mi_atomic_read(®ion->info); + mi_assert_internal(info.value != 0); + void* blocks_start = mi_region_blocks_start(region, bit_idx); mi_assert_internal(blocks_start == p); // not a pointer in our area? mi_assert_internal(bit_idx + blocks <= MI_BITMAP_FIELD_BITS); if (blocks_start != p || bit_idx + blocks > MI_BITMAP_FIELD_BITS) return; // or `abort`? - // decommit (or reset) the blocks to reduce the working set. - // TODO: implement delayed decommit/reset as these calls are too expensive - // if the memory is reused soon. - // reset: 10x slowdown on malloc-large, decommit: 17x slowdown on malloc-large - if (!is_large && - mi_option_is_enabled(mi_option_segment_reset) && - mi_option_is_enabled(mi_option_eager_commit)) // cannot reset halfway committed segments, use `option_page_reset` instead - { - // note: don't use `_mi_mem_reset` as it is shared with other threads! - _mi_os_reset(p, size, tld->stats); // TODO: maintain reset bits to unreset - } - if (!is_committed) { - // adjust commit statistics as we commit again when re-using the same slot - _mi_stat_decrease(&tld->stats->committed, mi_good_commit_size(size)); + // committed? + if (full_commit && (size % MI_SEGMENT_SIZE) == 0) { + mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, NULL); } - // TODO: should we free empty regions? currently only done _mi_mem_collect. - // this frees up virtual address space which might be useful on 32-bit systems? + if (any_reset) { + // set the is_reset bits if any pages were reset + mi_bitmap_claim(®ion->reset, 1, blocks, bit_idx, NULL); + } + + // reset the blocks to reduce the working set. + if (!info.is_large && mi_option_is_enabled(mi_option_segment_reset) && + mi_option_is_enabled(mi_option_eager_commit)) // cannot reset halfway committed segments, use only `option_page_reset` instead + { + bool any_unreset; + mi_bitmap_claim(®ion->reset, 1, blocks, bit_idx, &any_unreset); + if (any_unreset) { + _mi_mem_reset(p, blocks * MI_SEGMENT_SIZE, tld); + } + } // and unclaim - mi_bitmap_unclaim(®ion->in_use, 1, blocks, bit_idx); + bool all_unclaimed = mi_bitmap_unclaim(®ion->in_use, 1, blocks, bit_idx); + mi_assert_internal(all_unclaimed); UNUSED(all_unclaimed); } } @@ -416,13 +439,14 @@ void _mi_mem_collect(mi_os_tld_t* tld) { } while(m == 0 && !mi_atomic_cas_weak(®ion->in_use, MI_BITMAP_FIELD_FULL, 0 )); if (m == 0) { // on success, free the whole region - bool is_eager_committed; - void* start = mi_region_info_read(mi_atomic_read(®ions[i].info), NULL, &is_eager_committed); - if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { - _mi_arena_free(start, MI_REGION_SIZE, region->arena_memid, tld->stats); + void* start = mi_atomic_read_ptr(®ions[i].start); + size_t arena_memid = mi_atomic_read_relaxed(®ions[i].arena_memid); + memset(®ions[i], 0, sizeof(mem_region_t)); + // and release the whole region + mi_atomic_write(®ion->info, 0); + if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { + _mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats); } - // and release - mi_atomic_write(®ion->info,0); } } } @@ -432,6 +456,7 @@ void _mi_mem_collect(mi_os_tld_t* tld) { /* ---------------------------------------------------------------------------- Other -----------------------------------------------------------------------------*/ + bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) { return _mi_os_reset(p, size, tld->stats); } diff --git a/src/options.c b/src/options.c index 8c4c1707..9b6e4cd0 100644 --- a/src/options.c +++ b/src/options.c @@ -65,10 +65,11 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread - { 1, UNINIT, MI_OPTION(page_reset) }, // reset pages on free + { 0, UNINIT, MI_OPTION(page_reset) }, // reset pages on free { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) + { 1, UNINIT, MI_OPTION(reset_decommits) }, // reset decommits memory { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed - { 1, UNINIT, MI_OPTION(reset_decommits) }, // reset uses decommit/commit + { 500,UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output diff --git a/src/os.c b/src/os.c index 02683a02..553d72c9 100644 --- a/src/os.c +++ b/src/os.c @@ -77,11 +77,11 @@ static bool use_large_os_page(size_t size, size_t alignment) { // round to a good OS allocation size (bounded by max 12.5% waste) size_t _mi_os_good_alloc_size(size_t size) { size_t align_size; - if (size < 512 * KiB) align_size = _mi_os_page_size(); - else if (size < 2 * MiB) align_size = 64 * KiB; - else if (size < 8 * MiB) align_size = 256 * KiB; - else if (size < 32 * MiB) align_size = 1 * MiB; - else align_size = 4 * MiB; + if (size < 512*KiB) align_size = _mi_os_page_size(); + else if (size < 2*MiB) align_size = 64*KiB; + else if (size < 8*MiB) align_size = 256*KiB; + else if (size < 32*MiB) align_size = 1*MiB; + else align_size = 4*MiB; if (size >= (SIZE_MAX - align_size)) return size; // possible overflow? return _mi_align_up(size, align_size); } @@ -92,8 +92,8 @@ size_t _mi_os_good_alloc_size(size_t size) { // NtAllocateVirtualAllocEx is used for huge OS page allocation (1GiB) // We hide MEM_EXTENDED_PARAMETER to compile with older SDK's. #include -typedef PVOID(__stdcall* PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ void*, ULONG); -typedef NTSTATUS(__stdcall* PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ PVOID, ULONG); +typedef PVOID (__stdcall *PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ void*, ULONG); +typedef NTSTATUS (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ PVOID, ULONG); static PVirtualAlloc2 pVirtualAlloc2 = NULL; static PNtAllocateVirtualMemoryEx pNtAllocateVirtualMemoryEx = NULL; @@ -129,7 +129,7 @@ static bool mi_win_enable_large_os_pages() if (err == 0) err = GetLastError(); _mi_warning_message("cannot enable large OS page support, error %lu\n", err); } - return (ok != 0); + return (ok!=0); } void _mi_os_init(void) { @@ -144,7 +144,7 @@ void _mi_os_init(void) { if (hDll != NULL) { // use VirtualAlloc2FromApp if possible as it is available to Windows store apps pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2FromApp"); - if (pVirtualAlloc2 == NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2"); + if (pVirtualAlloc2==NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2"); FreeLibrary(hDll); } hDll = LoadLibrary(TEXT("ntdll.dll")); @@ -170,7 +170,7 @@ void _mi_os_init() { os_alloc_granularity = os_page_size; } if (mi_option_is_enabled(mi_option_large_os_pages)) { - large_os_page_size = 2 * MiB; + large_os_page_size = 2*MiB; } } #endif @@ -210,7 +210,7 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment #if (MI_INTPTR_SIZE >= 8) // on 64-bit systems, try to use the virtual address area after 4TiB for 4MiB aligned allocations void* hint; - if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment, size)) != NULL) { + if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment,size)) != NULL) { return VirtualAlloc(hint, size, flags, PAGE_READWRITE); } #endif @@ -233,7 +233,7 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0; void* p = NULL; if ((large_only || use_large_os_page(size, try_alignment)) - && allow_large && (flags & MEM_COMMIT) != 0 && (flags & MEM_RESERVE) != 0) { + && allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0) { uintptr_t try_ok = mi_atomic_read(&large_page_try_ok); if (!large_only && try_ok > 0) { // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive. @@ -247,12 +247,12 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, if (large_only) return p; // fall back to non-large page allocation on error (`p == NULL`). if (p == NULL) { - mi_atomic_write(&large_page_try_ok, 10); // on error, don't try again for the next N allocations + mi_atomic_write(&large_page_try_ok,10); // on error, don't try again for the next N allocations } } } if (p == NULL) { - *is_large = ((flags & MEM_LARGE_PAGES) != 0); + *is_large = ((flags&MEM_LARGE_PAGES) != 0); p = mi_win_virtual_allocx(addr, size, try_alignment, flags); } if (p == NULL) { @@ -264,8 +264,8 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, #elif defined(__wasi__) static void* mi_wasm_heap_grow(size_t size, size_t try_alignment) { uintptr_t base = __builtin_wasm_memory_size(0) * _mi_os_page_size(); - uintptr_t aligned_base = _mi_align_up(base, (uintptr_t)try_alignment); - size_t alloc_size = _mi_align_up(aligned_base - base + size, _mi_os_page_size()); + uintptr_t aligned_base = _mi_align_up(base, (uintptr_t) try_alignment); + size_t alloc_size = _mi_align_up( aligned_base - base + size, _mi_os_page_size()); mi_assert(alloc_size >= size && (alloc_size % _mi_os_page_size()) == 0); if (alloc_size < size) return NULL; if (__builtin_wasm_memory_grow(0, alloc_size / _mi_os_page_size()) == SIZE_MAX) { @@ -278,50 +278,50 @@ static void* mi_wasm_heap_grow(size_t size, size_t try_alignment) { #define MI_OS_USE_MMAP static void* mi_unix_mmapx(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) { void* p = NULL; -#if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED) + #if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED) // on 64-bit systems, use the virtual address area after 4TiB for 4MiB aligned allocations void* hint; if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment, size)) != NULL) { - p = mmap(hint, size, protect_flags, flags, fd, 0); - if (p == MAP_FAILED) p = NULL; // fall back to regular mmap + p = mmap(hint,size,protect_flags,flags,fd,0); + if (p==MAP_FAILED) p = NULL; // fall back to regular mmap } -#else + #else UNUSED(try_alignment); -#endif - if (p == NULL) { - p = mmap(addr, size, protect_flags, flags, fd, 0); - if (p == MAP_FAILED) p = NULL; + #endif + if (p==NULL) { + p = mmap(addr,size,protect_flags,flags,fd,0); + if (p==MAP_FAILED) p = NULL; } return p; } static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int protect_flags, bool large_only, bool allow_large, bool* is_large) { void* p = NULL; -#if !defined(MAP_ANONYMOUS) -#define MAP_ANONYMOUS MAP_ANON -#endif -#if !defined(MAP_NORESERVE) -#define MAP_NORESERVE 0 -#endif + #if !defined(MAP_ANONYMOUS) + #define MAP_ANONYMOUS MAP_ANON + #endif + #if !defined(MAP_NORESERVE) + #define MAP_NORESERVE 0 + #endif int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE; int fd = -1; -#if defined(MAP_ALIGNED) // BSD + #if defined(MAP_ALIGNED) // BSD if (try_alignment > 0) { size_t n = _mi_bsr(try_alignment); if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) { // alignment is a power of 2 and 4096 <= alignment <= 1GiB flags |= MAP_ALIGNED(n); } } -#endif -#if defined(PROT_MAX) + #endif + #if defined(PROT_MAX) protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD -#endif -#if defined(VM_MAKE_TAG) -// macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99) + #endif + #if defined(VM_MAKE_TAG) + // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99) int os_tag = (int)mi_option_get(mi_option_os_tag); if (os_tag < 100 || os_tag > 255) os_tag = 100; fd = VM_MAKE_TAG(os_tag); -#endif + #endif if ((large_only || use_large_os_page(size, try_alignment)) && allow_large) { static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0; uintptr_t try_ok = mi_atomic_read(&large_page_try_ok); @@ -335,39 +335,39 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro else { int lflags = flags; int lfd = fd; -#ifdef MAP_ALIGNED_SUPER + #ifdef MAP_ALIGNED_SUPER lflags |= MAP_ALIGNED_SUPER; -#endif -#ifdef MAP_HUGETLB + #endif + #ifdef MAP_HUGETLB lflags |= MAP_HUGETLB; -#endif -#ifdef MAP_HUGE_1GB + #endif + #ifdef MAP_HUGE_1GB static bool mi_huge_pages_available = true; if ((size % GiB) == 0 && mi_huge_pages_available) { lflags |= MAP_HUGE_1GB; } else -#endif + #endif { -#ifdef MAP_HUGE_2MB + #ifdef MAP_HUGE_2MB lflags |= MAP_HUGE_2MB; -#endif + #endif } -#ifdef VM_FLAGS_SUPERPAGE_SIZE_2MB + #ifdef VM_FLAGS_SUPERPAGE_SIZE_2MB lfd |= VM_FLAGS_SUPERPAGE_SIZE_2MB; -#endif + #endif if (large_only || lflags != flags) { // try large OS page allocation *is_large = true; p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd); -#ifdef MAP_HUGE_1GB + #ifdef MAP_HUGE_1GB if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) { mi_huge_pages_available = false; // don't try huge 1GiB pages again _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error %i)\n", errno); lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB); p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd); } -#endif + #endif if (large_only) return p; if (p == NULL) { mi_atomic_write(&large_page_try_ok, 10); // on error, don't try again for the next N allocations @@ -378,7 +378,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro if (p == NULL) { *is_large = false; p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, flags, fd); -#if defined(MADV_HUGEPAGE) + #if defined(MADV_HUGEPAGE) // Many Linux systems don't allow MAP_HUGETLB but they support instead // transparent huge pages (THP). It is not required to call `madvise` with MADV_HUGE // though since properly aligned allocations will already use large pages if available @@ -390,7 +390,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro *is_large = true; // possibly }; } -#endif + #endif } return p; } @@ -404,18 +404,18 @@ static volatile _Atomic(intptr_t) aligned_base; // Return a 4MiB aligned address that is probably available static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { if (try_alignment == 0 || try_alignment > MI_SEGMENT_SIZE) return NULL; - if ((size % MI_SEGMENT_SIZE) != 0) return NULL; + if ((size%MI_SEGMENT_SIZE) != 0) return NULL; intptr_t hint = mi_atomic_add(&aligned_base, size); - if (hint == 0 || hint > ((intptr_t)30 << 40)) { // try to wrap around after 30TiB (area after 32TiB is used for huge OS pages) + if (hint == 0 || hint > ((intptr_t)30<<40)) { // try to wrap around after 30TiB (area after 32TiB is used for huge OS pages) intptr_t init = ((intptr_t)4 << 40); // start at 4TiB area -#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode + #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode uintptr_t r = _mi_random_init((uintptr_t)&mi_os_get_aligned_hint ^ hint); - init = init + (MI_SEGMENT_SIZE * ((r >> 17) & 0xFFFF)); // (randomly 0-64k)*4MiB == 0 to 256GiB -#endif + init = init + (MI_SEGMENT_SIZE * ((r>>17) & 0xFFFF)); // (randomly 0-64k)*4MiB == 0 to 256GiB + #endif mi_atomic_cas_strong(mi_atomic_cast(uintptr_t, &aligned_base), init, hint + size); hint = mi_atomic_add(&aligned_base, size); // this may still give 0 or > 30TiB but that is ok, it is a hint after all } - if (hint % try_alignment != 0) return NULL; + if (hint%try_alignment != 0) return NULL; return (void*)hint; } #else @@ -444,17 +444,17 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo } */ -#if defined(_WIN32) - int flags = MEM_RESERVE; - if (commit) flags |= MEM_COMMIT; - p = mi_win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large); -#elif defined(__wasi__) - *is_large = false; - p = mi_wasm_heap_grow(size, try_alignment); -#else - int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); - p = mi_unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large); -#endif + #if defined(_WIN32) + int flags = MEM_RESERVE; + if (commit) flags |= MEM_COMMIT; + p = mi_win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large); + #elif defined(__wasi__) + *is_large = false; + p = mi_wasm_heap_grow(size, try_alignment); + #else + int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); + p = mi_unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large); + #endif mi_stat_counter_increase(stats->mmap_calls, 1); if (p != NULL) { _mi_stat_increase(&stats->reserved, size); @@ -564,7 +564,7 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* lar allow_large = *large; *large = false; } - return mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, (large != NULL ? large : &allow_large), tld->stats); + return mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, (large!=NULL?large:&allow_large), tld->stats); } @@ -616,7 +616,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ _mi_stat_decrease(&stats->committed, csize); } -#if defined(_WIN32) + #if defined(_WIN32) if (commit) { // if the memory was already committed, the call succeeds but it is not zero'd // *is_zero = true; @@ -627,9 +627,9 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ BOOL ok = VirtualFree(start, csize, MEM_DECOMMIT); err = (ok ? 0 : GetLastError()); } -#elif defined(__wasi__) + #elif defined(__wasi__) // WebAssembly guests can't control memory protection -#elif defined(MAP_FIXED) + #elif defined(MAP_FIXED) if (!commit) { // use mmap with MAP_FIXED to discard the existing memory (and reduce commit charge) void* p = mmap(start, size, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), -1, 0); @@ -640,10 +640,10 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ err = mprotect(start, csize, (PROT_READ | PROT_WRITE)); if (err != 0) { err = errno; } } -#else + #else err = mprotect(start, csize, (commit ? (PROT_READ | PROT_WRITE) : PROT_NONE)); if (err != 0) { err = errno; } -#endif + #endif if (err != 0) { _mi_warning_message("%s error: start: 0x%p, csize: 0x%x, err: %i\n", commit ? "commit" : "decommit", start, csize, err); } @@ -674,24 +674,24 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) void* start = mi_os_page_align_area_conservative(addr, size, &csize); if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr) if (reset) _mi_stat_increase(&stats->reset, csize); - else _mi_stat_decrease(&stats->reset, csize); + else _mi_stat_decrease(&stats->reset, csize); if (!reset) return true; // nothing to do on unreset! -#if (MI_DEBUG>1) - if (MI_SECURE == 0) { + #if (MI_DEBUG>1) + if (MI_SECURE==0) { memset(start, 0, csize); // pretend it is eagerly reset } -#endif + #endif #if defined(_WIN32) // Testing shows that for us (on `malloc-large`) MEM_RESET is 2x faster than DiscardVirtualMemory void* p = VirtualAlloc(start, csize, MEM_RESET, PAGE_READWRITE); mi_assert_internal(p == start); -#if 1 + #if 1 if (p == start && start != NULL) { - VirtualUnlock(start, csize); // VirtualUnlock after MEM_RESET removes the memory from the working set + VirtualUnlock(start,csize); // VirtualUnlock after MEM_RESET removes the memory from the working set } -#endif + #endif if (p != start) return false; #else #if defined(MADV_FREE) @@ -748,7 +748,7 @@ static bool mi_os_protectx(void* addr, size_t size, bool protect) { if (csize == 0) return false; /* if (_mi_os_is_huge_reserved(addr)) { - _mi_warning_message("cannot mprotect memory allocated in huge OS pages\n"); + _mi_warning_message("cannot mprotect memory allocated in huge OS pages\n"); } */ int err = 0; @@ -780,7 +780,7 @@ bool _mi_os_unprotect(void* addr, size_t size) { bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize, mi_stats_t* stats) { // page align conservatively within the range - mi_assert_internal(oldsize > newsize&& p != NULL); + mi_assert_internal(oldsize > newsize && p != NULL); if (oldsize < newsize || p == NULL) return false; if (oldsize == newsize) return true; @@ -808,20 +808,20 @@ and possibly associated with a specific NUMA node. (use `numa_node>=0`) #if defined(WIN32) && (MI_INTPTR_SIZE >= 8) static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) { - mi_assert_internal(size % GiB == 0); + mi_assert_internal(size%GiB == 0); mi_assert_internal(addr != NULL); const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE; mi_win_enable_large_os_pages(); -#if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) + #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) MEM_EXTENDED_PARAMETER params[3] = { {0,0},{0,0},{0,0} }; // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages static bool mi_huge_pages_available = true; if (pNtAllocateVirtualMemoryEx != NULL && mi_huge_pages_available) { -#ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE -#define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE (0x10) -#endif + #ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE + #define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE (0x10) + #endif params[0].Type = 5; // == MemExtendedParameterAttributeFlags; params[0].ULong64 = MEM_EXTENDED_PARAMETER_NONPAGED_HUGE; ULONG param_count = 1; @@ -848,7 +848,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) params[0].ULong = (unsigned)numa_node; return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1); } -#endif + #endif // otherwise use regular virtual alloc on older windows return VirtualAlloc(addr, size, flags, PAGE_READWRITE); } @@ -869,16 +869,16 @@ static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, cons } #endif static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) { - mi_assert_internal(size % GiB == 0); + mi_assert_internal(size%GiB == 0); bool is_large = true; void* p = mi_unix_mmap(addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); if (p == NULL) return NULL; - if (numa_node >= 0 && numa_node < 8 * MI_INTPTR_SIZE) { // at most 64 nodes + if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes uintptr_t numa_mask = (1UL << numa_node); // TODO: does `mbind` work correctly for huge OS pages? should we // use `set_mempolicy` before calling mmap instead? // see: - long err = mi_os_mbind(p, size, MPOL_PREFERRED, &numa_mask, 8 * MI_INTPTR_SIZE, 0); + long err = mi_os_mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); if (err != 0) { _mi_warning_message("failed to bind huge (1GiB) pages to NUMA node %d: %s\n", numa_node, strerror(errno)); } @@ -910,7 +910,7 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { start = ((uintptr_t)32 << 40); // 32TiB virtual start address #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode uintptr_t r = _mi_random_init((uintptr_t)&mi_os_claim_huge_pages); - start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r >> 17) & 0x3FF)); // (randomly 0-1024)*1GiB == 0 to 1TiB + start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF)); // (randomly 0-1024)*1GiB == 0 to 1TiB #endif } end = start + size; @@ -963,8 +963,8 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse if (max_msecs > 0) { mi_msecs_t elapsed = _mi_clock_end(start_t); if (page >= 1) { - mi_msecs_t estimate = ((elapsed / (page + 1)) * pages); - if (estimate > 2 * max_msecs) { // seems like we are going to timeout, break + mi_msecs_t estimate = ((elapsed / (page+1)) * pages); + if (estimate > 2*max_msecs) { // seems like we are going to timeout, break elapsed = max_msecs + 1; } } @@ -974,7 +974,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse } } } - mi_assert_internal(page * MI_HUGE_OS_PAGE_SIZE <= size); + mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size); if (pages_reserved != NULL) *pages_reserved = page; if (psize != NULL) *psize = page * MI_HUGE_OS_PAGE_SIZE; return (page == 0 ? NULL : start); @@ -983,7 +983,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse // free every huge page in a range individually (as we allocated per page) // note: needed with VirtualAlloc but could potentially be done in one go on mmap'd systems. void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) { - if (p == NULL || size == 0) return; + if (p==NULL || size==0) return; uint8_t* base = (uint8_t*)p; while (size >= MI_HUGE_OS_PAGE_SIZE) { _mi_os_free(base, MI_HUGE_OS_PAGE_SIZE, stats); @@ -999,7 +999,7 @@ static size_t mi_os_numa_nodex() { PROCESSOR_NUMBER pnum; USHORT numa_node = 0; GetCurrentProcessorNumberEx(&pnum); - GetNumaProcessorNodeEx(&pnum, &numa_node); + GetNumaProcessorNodeEx(&pnum,&numa_node); return numa_node; } @@ -1026,12 +1026,12 @@ static size_t mi_os_numa_nodex(void) { static size_t mi_os_numa_node_countx(void) { char buf[128]; unsigned node = 0; - for (node = 0; node < 256; node++) { + for(node = 0; node < 256; node++) { // enumerate node entries -- todo: it there a more efficient way to do this? (but ensure there is no allocation) snprintf(buf, 127, "/sys/devices/system/node/node%u", node + 1); - if (access(buf, R_OK) != 0) break; + if (access(buf,R_OK) != 0) break; } - return (node + 1); + return (node+1); } #else static size_t mi_os_numa_nodex(void) { @@ -1058,7 +1058,7 @@ size_t _mi_os_numa_node_count_get(void) { int _mi_os_numa_node_get(mi_os_tld_t* tld) { UNUSED(tld); size_t numa_count = _mi_os_numa_node_count(); - if (numa_count <= 1) return 0; // optimize on single numa node systems: always node 0 + if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0 // never more than the node count and >= 0 size_t numa_node = mi_os_numa_nodex(); if (numa_node >= numa_count) { numa_node = numa_node % numa_count; } diff --git a/src/page.c b/src/page.c index 9085ccb5..df6ecc71 100644 --- a/src/page.c +++ b/src/page.c @@ -75,7 +75,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) { mi_segment_t* segment = _mi_page_segment(page); uint8_t* start = _mi_page_start(segment,page,NULL); - mi_assert_internal(start == _mi_segment_page_start(segment,page,page->block_size,NULL)); + mi_assert_internal(start == _mi_segment_page_start(segment,page,page->block_size,NULL,NULL)); //mi_assert_internal(start + page->capacity*page->block_size == page->top); mi_assert_internal(mi_page_list_is_valid(page,page->free)); @@ -229,6 +229,7 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { mi_assert_expensive(mi_page_is_valid_init(page)); mi_assert_internal(page->heap == NULL); mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE); + mi_assert_internal(!page->is_reset); _mi_page_free_collect(page,false); mi_page_queue_t* pq = mi_page_queue(heap, page->block_size); mi_page_queue_push(heap, pq, page); @@ -342,7 +343,7 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) { mi_assert_expensive(_mi_page_is_valid(page)); mi_assert_internal(pq == mi_page_queue_of(page)); mi_assert_internal(page->heap != NULL); - + #if MI_DEBUG > 1 mi_heap_t* pheap = (mi_heap_t*)mi_atomic_read_ptr(mi_atomic_cast(void*, &page->heap)); #endif @@ -597,7 +598,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(block_size > 0); // set fields size_t page_size; - _mi_segment_page_start(segment, page, block_size, &page_size); + _mi_segment_page_start(segment, page, block_size, &page_size, NULL); page->block_size = block_size; mi_assert_internal(page_size / block_size < (1L<<16)); page->reserved = (uint16_t)(page_size / block_size); diff --git a/src/segment.c b/src/segment.c index 549dd339..ffba8c0d 100644 --- a/src/segment.c +++ b/src/segment.c @@ -13,6 +13,8 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_PAGE_HUGE_ALIGN (256*1024) +static uint8_t* mi_segment_raw_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); + /* ----------------------------------------------------------- Segment allocation We allocate pages inside big OS allocated "segments" @@ -40,7 +42,6 @@ terms of the MIT license. A copy of the license can be found in the file Queue of segments containing free pages ----------------------------------------------------------- */ - #if (MI_DEBUG>=3) static bool mi_segment_queue_contains(const mi_segment_queue_t* queue, mi_segment_t* segment) { mi_assert_internal(segment != NULL); @@ -143,31 +144,50 @@ static bool mi_segment_is_valid(mi_segment_t* segment) { } #endif + +/* ----------------------------------------------------------- + Page reset +----------------------------------------------------------- */ + +static void mi_page_reset(mi_segment_t* segment, mi_page_t* page, size_t size, mi_segments_tld_t* tld) { + if (!mi_option_is_enabled(mi_option_page_reset)) return; + if (segment->mem_is_fixed || page->segment_in_use || page->is_reset) return; + size_t psize; + void* start = mi_segment_raw_page_start(segment, page, &psize); + page->is_reset = true; + mi_assert_internal(size <= psize); + _mi_mem_reset(start, ((size == 0 || size > psize) ? psize : size), tld->os); +} + +static void mi_page_unreset(mi_segment_t* segment, mi_page_t* page, size_t size, mi_segments_tld_t* tld) +{ + mi_assert_internal(page->is_reset); + mi_assert_internal(!segment->mem_is_fixed); + page->is_reset = false; + size_t psize; + uint8_t* start = mi_segment_raw_page_start(segment, page, &psize); + bool is_zero = false; + _mi_mem_unreset(start, ((size == 0 || size > psize) ? psize : size), &is_zero, tld->os); + if (is_zero) page->is_zero_init = true; +} + + /* ----------------------------------------------------------- Segment size calculations ----------------------------------------------------------- */ -// Start of the page available memory; can be used on uninitialized pages (only `segment_idx` must be set) -uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size) -{ +// Raw start of the page available memory; can be used on uninitialized pages (only `segment_idx` must be set) +// The raw start is not taking aligned block allocation into consideration. +static uint8_t* mi_segment_raw_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) { size_t psize = (segment->page_kind == MI_PAGE_HUGE ? segment->segment_size : (size_t)1 << segment->page_shift); - uint8_t* p = (uint8_t*)segment + page->segment_idx*psize; + uint8_t* p = (uint8_t*)segment + page->segment_idx * psize; if (page->segment_idx == 0) { // the first page starts after the segment info (and possible guard page) - p += segment->segment_info_size; + p += segment->segment_info_size; psize -= segment->segment_info_size; - // for small and medium objects, ensure the page start is aligned with the block size (PR#66 by kickunderscore) - if (block_size > 0 && segment->page_kind <= MI_PAGE_MEDIUM) { - size_t adjust = block_size - ((uintptr_t)p % block_size); - if (adjust < block_size) { - p += adjust; - psize -= adjust; - } - mi_assert_internal((uintptr_t)p % block_size == 0); - } } - + if (MI_SECURE > 1 || (MI_SECURE == 1 && page->segment_idx == segment->capacity - 1)) { // secure == 1: the last page has an os guard page at the end // secure > 1: every page has an os guard page @@ -175,19 +195,36 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa } if (page_size != NULL) *page_size = psize; - mi_assert_internal(_mi_ptr_page(p) == page); + mi_assert_internal(page->block_size == 0 || _mi_ptr_page(p) == page); mi_assert_internal(_mi_ptr_segment(p) == segment); return p; } -static size_t mi_segment_size(size_t capacity, size_t required, size_t* pre_size, size_t* info_size) { - /* - if (mi_option_is_enabled(mi_option_secure)) { - // always reserve maximally so the protection falls on - // the same address area, as we need to reuse them from the caches interchangably. - capacity = MI_SMALL_PAGES_PER_SEGMENT; +// Start of the page available memory; can be used on uninitialized pages (only `segment_idx` must be set) +uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size, size_t* pre_size) +{ + size_t psize; + uint8_t* p = mi_segment_raw_page_start(segment, page, &psize); + if (pre_size != NULL) *pre_size = 0; + if (page->segment_idx == 0 && block_size > 0 && segment->page_kind <= MI_PAGE_MEDIUM) { + // for small and medium objects, ensure the page start is aligned with the block size (PR#66 by kickunderscore) + size_t adjust = block_size - ((uintptr_t)p % block_size); + if (adjust < block_size) { + p += adjust; + psize -= adjust; + if (pre_size != NULL) *pre_size = adjust; + } + mi_assert_internal((uintptr_t)p % block_size == 0); } - */ + + if (page_size != NULL) *page_size = psize; + mi_assert_internal(page->block_size==0 || _mi_ptr_page(p) == page); + mi_assert_internal(_mi_ptr_segment(p) == segment); + return p; +} + +static size_t mi_segment_size(size_t capacity, size_t required, size_t* pre_size, size_t* info_size) +{ const size_t minsize = sizeof(mi_segment_t) + ((capacity - 1) * sizeof(mi_page_t)) + 16 /* padding */; size_t guardsize = 0; size_t isize = 0; @@ -234,7 +271,15 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se mi_assert_internal(!segment->mem_is_fixed); _mi_mem_unprotect(segment, segment->segment_size); // ensure no more guard pages are set } - _mi_mem_free(segment, segment_size, segment->memid, tld->os); + + bool fully_committed = true; + bool any_reset = false; + for (size_t i = 0; i < segment->capacity; i++) { + const mi_page_t* page = &segment->pages[i]; + if (!page->is_committed) fully_committed = false; + if (page->is_reset) any_reset = true; + } + _mi_mem_free(segment, segment_size, segment->memid, fully_committed, any_reset, tld->os); } @@ -275,7 +320,7 @@ static bool mi_segment_cache_full(mi_segments_tld_t* tld) static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(!mi_segment_is_in_free_queue(segment, tld)); - mi_assert_internal(segment->next == NULL); + mi_assert_internal(segment->next == NULL); if (segment->segment_size != MI_SEGMENT_SIZE || mi_segment_cache_full(tld)) { return false; } @@ -328,31 +373,31 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, bool eager_delayed = (page_kind <= MI_PAGE_MEDIUM && tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); bool eager = !eager_delayed && mi_option_is_enabled(mi_option_eager_commit); bool commit = eager || (page_kind >= MI_PAGE_LARGE); - bool protection_still_good = false; + bool pages_still_good = false; bool is_zero = false; // Try to get it from our thread local cache first - mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld); + mi_segment_t* segment = NULL; // mi_segment_cache_pop(segment_size, tld); if (segment != NULL) { - if (MI_SECURE!=0) { - mi_assert_internal(!segment->mem_is_fixed); - if (segment->page_kind != page_kind) { + if (page_kind <= MI_PAGE_MEDIUM && segment->page_kind == page_kind && segment->segment_size == segment_size) { + pages_still_good = true; + } + else + { + // different page kinds; unreset any reset pages, and unprotect + // TODO: optimize cache pop to return fitting pages if possible? + for (size_t i = 0; i < segment->capacity; i++) { + mi_page_t* page = &segment->pages[i]; + if (page->is_reset) { + mi_page_unreset(segment, page, 0, tld); // todo: only unreset the part that was reset? (instead of the full page) + } + } + if (MI_SECURE!=0) { + mi_assert_internal(!segment->mem_is_fixed); + // TODO: should we unprotect per page? (with is_protected flag?) _mi_mem_unprotect(segment, segment->segment_size); // reset protection if the page kind differs } - else { - protection_still_good = true; // otherwise, the guard pages are still in place - } - } - if (!segment->mem_is_committed && page_kind > MI_PAGE_MEDIUM) { - mi_assert_internal(!segment->mem_is_fixed); - _mi_mem_commit(segment, segment->segment_size, &is_zero, tld->os); - segment->mem_is_committed = true; - } - if (!segment->mem_is_fixed && mi_option_is_enabled(mi_option_page_reset)) { - bool reset_zero = false; - _mi_mem_unreset(segment, segment->segment_size, &reset_zero, tld->os); - if (reset_zero) is_zero = true; - } + } } else { // Allocate the segment from the OS @@ -373,27 +418,42 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, } mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); - // zero the segment info (but not the `mem` fields) - ptrdiff_t ofs = offsetof(mi_segment_t,next); - memset((uint8_t*)segment + ofs, 0, info_size - ofs); - - // guard pages - if ((MI_SECURE != 0) && !protection_still_good) { - // in secure mode, we set up a protected page in between the segment info - // and the page data - mi_assert_internal( info_size == pre_size - _mi_os_page_size() && info_size % _mi_os_page_size() == 0); - _mi_mem_protect( (uint8_t*)segment + info_size, (pre_size - info_size) ); - size_t os_page_size = _mi_os_page_size(); - if (MI_SECURE <= 1) { - // and protect the last page too - _mi_mem_protect( (uint8_t*)segment + segment_size - os_page_size, os_page_size ); - } - else { - // protect every page - for (size_t i = 0; i < capacity; i++) { - _mi_mem_protect( (uint8_t*)segment + (i+1)*page_size - os_page_size, os_page_size ); + if (!pages_still_good) { + // guard pages + if (MI_SECURE != 0) { + // in secure mode, we set up a protected page in between the segment info + // and the page data + mi_assert_internal(info_size == pre_size - _mi_os_page_size() && info_size % _mi_os_page_size() == 0); + _mi_mem_protect((uint8_t*)segment + info_size, (pre_size - info_size)); + const size_t os_page_size = _mi_os_page_size(); + if (MI_SECURE <= 1) { + // and protect the last page too + _mi_mem_protect((uint8_t*)segment + segment_size - os_page_size, os_page_size); + } + else { + // protect every page + for (size_t i = 0; i < capacity; i++) { + _mi_mem_protect((uint8_t*)segment + (i+1)*page_size - os_page_size, os_page_size); + } } } + + // zero the segment info (but not the `mem` fields) + ptrdiff_t ofs = offsetof(mi_segment_t, next); + memset((uint8_t*)segment + ofs, 0, info_size - ofs); + + // initialize pages info + for (uint8_t i = 0; i < capacity; i++) { + segment->pages[i].segment_idx = i; + segment->pages[i].is_reset = false; + segment->pages[i].is_committed = commit; + segment->pages[i].is_zero_init = is_zero; + } + } + else { + // zero the segment info but not the pages info (and mem fields) + ptrdiff_t ofs = offsetof(mi_segment_t, next); + memset((uint8_t*)segment + ofs, 0, offsetof(mi_segment_t,pages) - ofs); } // initialize @@ -404,13 +464,8 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, segment->segment_info_size = pre_size; segment->thread_id = _mi_thread_id(); segment->cookie = _mi_ptr_cookie(segment); - for (uint8_t i = 0; i < segment->capacity; i++) { - segment->pages[i].segment_idx = i; - segment->pages[i].is_reset = false; - segment->pages[i].is_committed = commit; - segment->pages[i].is_zero_init = is_zero; - } _mi_stat_increase(&tld->stats->page_committed, segment->segment_info_size); + //fprintf(stderr,"mimalloc: alloc segment at %p\n", (void*)segment); return segment; } @@ -463,24 +518,22 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t* for (size_t i = 0; i < segment->capacity; i++) { mi_page_t* page = &segment->pages[i]; if (!page->segment_in_use) { - if (page->is_reset || !page->is_committed) { + // set in-use before doing unreset to prevent delayed reset + page->segment_in_use = true; + segment->used++; + if (!page->is_committed) { + mi_assert_internal(!segment->mem_is_fixed); + mi_assert_internal(!page->is_reset); size_t psize; - uint8_t* start = _mi_page_start(segment, page, &psize); - if (!page->is_committed) { - mi_assert_internal(!segment->mem_is_fixed); - page->is_committed = true; - bool is_zero = false; - _mi_mem_commit(start,psize,&is_zero,tld->os); - if (is_zero) page->is_zero_init = true; - } - if (page->is_reset) { - mi_assert_internal(!segment->mem_is_fixed); - page->is_reset = false; - bool is_zero = false; - _mi_mem_unreset(start, psize, &is_zero, tld->os); - if (is_zero) page->is_zero_init = true; - } + uint8_t* start = _mi_page_start(segment, page, &psize); + page->is_committed = true; + bool is_zero = false; + _mi_mem_commit(start,psize,&is_zero,tld->os); + if (is_zero) page->is_zero_init = true; } + if (page->is_reset) { + mi_page_unreset(segment, page, 0, tld); // todo: only unreset the part that was reset? + } return page; } } @@ -503,22 +556,21 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg _mi_stat_decrease(&tld->stats->page_committed, inuse); _mi_stat_decrease(&tld->stats->pages, 1); - // reset the page memory to reduce memory pressure? - if (!segment->mem_is_fixed && !page->is_reset && mi_option_is_enabled(mi_option_page_reset)) - // && segment->page_kind <= MI_PAGE_MEDIUM) // to prevent partial overlapping resets - { - size_t psize; - uint8_t* start = _mi_page_start(segment, page, &psize); - page->is_reset = true; - _mi_mem_reset(start, psize, tld->os); - } + // calculate the used size from the raw (non-aligned) start of the page + size_t pre_size; + _mi_segment_page_start(segment, page, page->block_size, NULL, &pre_size); + size_t used_size = pre_size + (page->capacity * page->block_size); - // zero the page data, but not the segment fields + // zero the page data, but not the segment fields page->is_zero_init = false; ptrdiff_t ofs = offsetof(mi_page_t,capacity); memset((uint8_t*)page + ofs, 0, sizeof(*page) - ofs); page->segment_in_use = false; segment->used--; + + // reset the page memory to reduce memory pressure? + // note: must come after setting `segment_in_use` to false + mi_page_reset(segment, page, used_size, tld); } void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) @@ -568,7 +620,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { // remove the segment from the free page queue if needed mi_segment_remove_from_free_queue(segment,tld); mi_assert_internal(segment->next == NULL && segment->prev == NULL); - + // all pages in the segment are abandoned; add it to the abandoned list _mi_stat_increase(&tld->stats->segments_abandoned, 1); mi_segments_track_size(-((long)segment->segment_size), tld); @@ -628,6 +680,8 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen for (size_t i = 0; i < segment->capacity; i++) { mi_page_t* page = &segment->pages[i]; if (page->segment_in_use) { + mi_assert_internal(!page->is_reset); + mi_assert_internal(page->is_committed); segment->abandoned--; mi_assert(page->next == NULL); _mi_stat_decrease(&tld->stats->pages_abandoned, 1); @@ -636,7 +690,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen mi_segment_page_clear(segment,page,tld); } else { - // otherwise reclaim it + // otherwise reclaim it _mi_page_reclaim(heap,page); } } @@ -666,8 +720,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(mi_segment_has_free(segment)); mi_page_t* page = mi_segment_find_free(segment, tld); - page->segment_in_use = true; - segment->used++; + mi_assert_internal(page->segment_in_use); mi_assert_internal(segment->used <= segment->capacity); if (segment->used == segment->capacity) { // if no more free pages, remove from the queue @@ -685,7 +738,11 @@ static mi_page_t* mi_segment_page_alloc(mi_page_kind_t kind, size_t page_shift, mi_segment_enqueue(free_queue, segment); } mi_assert_internal(free_queue->first != NULL); - return mi_segment_page_alloc_in(free_queue->first,tld); + mi_page_t* page = mi_segment_page_alloc_in(free_queue->first,tld); +#if MI_DEBUG>=2 + _mi_segment_page_start(_mi_page_segment(page), page, sizeof(void*), NULL, NULL)[0] = 0; +#endif + return page; } static mi_page_t* mi_segment_small_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { @@ -706,6 +763,9 @@ static mi_page_t* mi_segment_large_page_alloc(mi_segments_tld_t* tld, mi_os_tld_ segment->used = 1; mi_page_t* page = &segment->pages[0]; page->segment_in_use = true; +#if MI_DEBUG>=2 + _mi_segment_page_start(segment, page, sizeof(void*), NULL, NULL)[0] = 0; +#endif return page; } @@ -717,7 +777,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld segment->used = 1; segment->thread_id = 0; // huge pages are immediately abandoned mi_page_t* page = &segment->pages[0]; - page->segment_in_use = true; + page->segment_in_use = true; return page; } From 049dbf41bacbf8a839551cd3e7710ffa1925b770 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Wed, 20 Nov 2019 15:44:07 -0800 Subject: [PATCH 048/179] fix commit bits for huge page allocations --- src/memory.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/memory.c b/src/memory.c index 94b6348f..214bf0d3 100644 --- a/src/memory.c +++ b/src/memory.c @@ -181,6 +181,7 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, ®ion_commit, ®ion_large, &is_zero, &arena_memid, tld); if (start == NULL) return false; mi_assert_internal(!(region_large && !allow_large)); + mi_assert_internal(!region_large || region_commit); // claim a fresh slot const uintptr_t idx = mi_atomic_increment(®ions_count); @@ -194,8 +195,8 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t* r = ®ions[idx]; r->arena_memid = arena_memid; mi_atomic_write(&r->in_use, 0); - mi_atomic_write(&r->dirty, (is_zero ? 0 : ~0UL)); - mi_atomic_write(&r->commit, (region_commit ? ~0UL : 0)); + mi_atomic_write(&r->dirty, (is_zero ? 0 : MI_BITMAP_FIELD_FULL)); + mi_atomic_write(&r->commit, (region_commit ? MI_BITMAP_FIELD_FULL : 0)); mi_atomic_write(&r->reset, 0); *bit_idx = 0; mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL); @@ -291,6 +292,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo bool any_uncommitted; mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, &any_uncommitted); if (any_uncommitted) { + mi_assert_internal(!info.is_large); bool commit_zero; _mi_mem_commit(p, blocks * MI_SEGMENT_SIZE, &commit_zero, tld); if (commit_zero) *is_zero = true; @@ -304,6 +306,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo // unreset reset blocks if (mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)) { + mi_assert_internal(!info.is_large); mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit); mi_bitmap_unclaim(®ion->reset, 1, blocks, bit_idx); bool reset_zero; From 24b768363efa415f74ba25d53c6fdae55c1aa24c Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 22 Nov 2019 09:28:48 -0800 Subject: [PATCH 049/179] bump version to 1.3 for further development --- cmake/mimalloc-config-version.cmake | 2 +- ide/vs2019/mimalloc-override.vcxproj.filters | 6 ++++++ ide/vs2019/mimalloc.vcxproj.filters | 6 ++++++ include/mimalloc.h | 2 +- test/CMakeLists.txt | 2 +- 5 files changed, 15 insertions(+), 3 deletions(-) diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake index 9d78b5a0..f64948d3 100644 --- a/cmake/mimalloc-config-version.cmake +++ b/cmake/mimalloc-config-version.cmake @@ -1,5 +1,5 @@ set(mi_version_major 1) -set(mi_version_minor 2) +set(mi_version_minor 3) set(mi_version ${mi_version_major}.${mi_version_minor}) set(PACKAGE_VERSION ${mi_version}) diff --git a/ide/vs2019/mimalloc-override.vcxproj.filters b/ide/vs2019/mimalloc-override.vcxproj.filters index bc1e4c60..b2dea4e1 100644 --- a/ide/vs2019/mimalloc-override.vcxproj.filters +++ b/ide/vs2019/mimalloc-override.vcxproj.filters @@ -40,6 +40,12 @@ Source Files + + Source Files + + + Source Files + diff --git a/ide/vs2019/mimalloc.vcxproj.filters b/ide/vs2019/mimalloc.vcxproj.filters index b2282df3..0cce0c4f 100644 --- a/ide/vs2019/mimalloc.vcxproj.filters +++ b/ide/vs2019/mimalloc.vcxproj.filters @@ -43,6 +43,12 @@ Source Files + + Source Files + + + Source Files + diff --git a/include/mimalloc.h b/include/mimalloc.h index f727a990..2944de89 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_H #define MIMALLOC_H -#define MI_MALLOC_VERSION 120 // major + 2 digits minor +#define MI_MALLOC_VERSION 130 // major + 2 digits minor // ------------------------------------------------------ // Compiler specific attributes diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index a80dde58..ed204888 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -13,7 +13,7 @@ if (NOT CMAKE_BUILD_TYPE) endif() # Import mimalloc (if installed) -find_package(mimalloc 1.2 REQUIRED NO_SYSTEM_ENVIRONMENT_PATH) +find_package(mimalloc 1.3 REQUIRED NO_SYSTEM_ENVIRONMENT_PATH) message(STATUS "Found mimalloc installed at: ${MIMALLOC_TARGET_DIR}") # overriding with a dynamic library From 0d3c195f376f32ba7de5124d19294a765aaf68f3 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Fri, 22 Nov 2019 11:28:55 -0800 Subject: [PATCH 050/179] update stress test with more documentation --- test/test-stress.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/test/test-stress.c b/test/test-stress.c index 6b2fb8c4..b549e1b4 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -5,9 +5,14 @@ terms of the MIT license. -----------------------------------------------------------------------------*/ /* This is a stress test for the allocator, using multiple threads and - transferring objects between threads. This is not a typical workload - but uses a random linear size distribution. Timing can also depend on - (random) thread scheduling. Do not use this test as a benchmark! + transferring objects between threads. It tries to reflect real-world workloads: + - allocation size is distributed linearly in powers of two + - with some fraction extra large (and some extra extra large) + - the allocations are initialized and read again at free + - pointers transfer between threads + - threads are terminated and recreated with some objects surviving in between + - uses deterministic "randomness", but execution can still depend on + (random) thread scheduling. Do not use this test as a benchmark! */ #include @@ -22,13 +27,13 @@ terms of the MIT license. // argument defaults static int THREADS = 32; // more repeatable if THREADS <= #processors static int SCALE = 50; // scaling factor -static int ITER = 10; // N full iterations re-creating all threads +static int ITER = 10; // N full iterations destructing and re-creating all threads // static int THREADS = 8; // more repeatable if THREADS <= #processors // static int SCALE = 100; // scaling factor static bool allow_large_objects = true; // allow very large objects? -static size_t use_one_size = 0; // use single object size of N uintptr_t? +static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`? #ifdef USE_STD_MALLOC @@ -185,7 +190,7 @@ int main(int argc, char** argv) { long n = (strtol(argv[3], &end, 10)); if (n > 0) ITER = n; } - printf("start with %d threads with a %d%% load-per-thread and %d iterations\n", THREADS, SCALE, ITER); + printf("Using %d threads with a %d%% load-per-thread and %d iterations\n", THREADS, SCALE, ITER); //int res = mi_reserve_huge_os_pages(4,1); //printf("(reserve huge: %i\n)", res); @@ -204,7 +209,7 @@ int main(int argc, char** argv) { } mi_collect(false); #ifndef NDEBUG - if ((n + 1) % 10 == 0) { printf("- iterations: %3d\n", n + 1); } + if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - n + 1); } #endif } From 4a0d35afd0714f3c8d37957d3a8b384d0591995d Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 23 Nov 2019 11:59:19 -0800 Subject: [PATCH 051/179] improve secure guard page allocation to work with non-eager commit --- src/memory.c | 4 +- src/options.c | 2 +- src/segment.c | 101 +++++++++++++++++++++++++++++++++----------------- 3 files changed, 70 insertions(+), 37 deletions(-) diff --git a/src/memory.c b/src/memory.c index 214bf0d3..b29e18f3 100644 --- a/src/memory.c +++ b/src/memory.c @@ -302,14 +302,14 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo // no need to commit, but check if already fully committed *commit = mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx); } - mi_assert_internal(mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx)); + mi_assert_internal(!*commit || mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx)); // unreset reset blocks if (mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)) { mi_assert_internal(!info.is_large); mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit); mi_bitmap_unclaim(®ion->reset, 1, blocks, bit_idx); - bool reset_zero; + bool reset_zero = false; _mi_mem_unreset(p, blocks * MI_SEGMENT_SIZE, &reset_zero, tld); if (reset_zero) *is_zero = true; } diff --git a/src/options.c b/src/options.c index 9b6e4cd0..8975a6d3 100644 --- a/src/options.c +++ b/src/options.c @@ -69,7 +69,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 1, UNINIT, MI_OPTION(reset_decommits) }, // reset decommits memory { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed - { 500,UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds + { 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output diff --git a/src/segment.c b/src/segment.c index ffba8c0d..0b6501d8 100644 --- a/src/segment.c +++ b/src/segment.c @@ -123,10 +123,18 @@ static bool mi_segment_is_in_free_queue(mi_segment_t* segment, mi_segments_tld_t } #endif -#if (MI_DEBUG>=3) -static size_t mi_segment_pagesize(mi_segment_t* segment) { - return ((size_t)1 << segment->page_shift); +static size_t mi_segment_page_size(mi_segment_t* segment) { + if (segment->capacity > 1) { + mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM); + return ((size_t)1 << segment->page_shift); + } + else { + mi_assert_internal(segment->page_kind >= MI_PAGE_LARGE); + return segment->segment_size; + } } + +#if (MI_DEBUG>=3) static bool mi_segment_is_valid(mi_segment_t* segment) { mi_assert_internal(segment != NULL); mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); @@ -139,11 +147,47 @@ static bool mi_segment_is_valid(mi_segment_t* segment) { mi_assert_internal(nfree + segment->used == segment->capacity); mi_assert_internal(segment->thread_id == _mi_thread_id() || (segment->thread_id==0)); // or 0 mi_assert_internal(segment->page_kind == MI_PAGE_HUGE || - (mi_segment_pagesize(segment) * segment->capacity == segment->segment_size)); + (mi_segment_page_size(segment) * segment->capacity == segment->segment_size)); return true; } #endif +/* ----------------------------------------------------------- + Guard pages +----------------------------------------------------------- */ + +static void mi_segment_protect_range(void* p, size_t size, bool protect) { + if (protect) { + _mi_mem_protect(p, size); + } + else { + _mi_mem_unprotect(p, size); + } +} + +static void mi_segment_protect(mi_segment_t* segment, bool protect) { + // add/remove guard pages + if (MI_SECURE != 0) { + // in secure mode, we set up a protected page in between the segment info and the page data + const size_t os_page_size = _mi_os_page_size(); + mi_assert_internal((segment->segment_info_size - os_page_size) >= (sizeof(mi_segment_t) + ((segment->capacity - 1) * sizeof(mi_page_t)))); + mi_assert_internal(((uintptr_t)segment + segment->segment_info_size) % os_page_size == 0); + mi_segment_protect_range((uint8_t*)segment + segment->segment_info_size - os_page_size, os_page_size, protect); + if (MI_SECURE <= 1 || segment->capacity == 1) { + // and protect the last (or only) page too + mi_segment_protect_range((uint8_t*)segment + segment->segment_size - os_page_size, os_page_size, protect); + } + else { + // or protect every page + const size_t page_size = mi_segment_page_size(segment); + for (size_t i = 0; i < segment->capacity; i++) { + if (segment->pages[i].is_committed) { + mi_segment_protect_range((uint8_t*)segment + (i+1)*page_size - os_page_size, os_page_size, protect); + } + } + } + } +} /* ----------------------------------------------------------- Page reset @@ -269,15 +313,18 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se mi_segments_track_size(-((long)segment_size),tld); if (MI_SECURE != 0) { mi_assert_internal(!segment->mem_is_fixed); - _mi_mem_unprotect(segment, segment->segment_size); // ensure no more guard pages are set + mi_segment_protect(segment, false); // ensure no more guard pages are set } bool fully_committed = true; bool any_reset = false; for (size_t i = 0; i < segment->capacity; i++) { - const mi_page_t* page = &segment->pages[i]; + mi_page_t* page = &segment->pages[i]; if (!page->is_committed) fully_committed = false; - if (page->is_reset) any_reset = true; + else if (page->is_reset) { + any_reset = true; + // mi_page_unreset(segment, page, 0, tld); + } } _mi_mem_free(segment, segment_size, segment->memid, fully_committed, any_reset, tld->os); } @@ -394,8 +441,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, } if (MI_SECURE!=0) { mi_assert_internal(!segment->mem_is_fixed); - // TODO: should we unprotect per page? (with is_protected flag?) - _mi_mem_unprotect(segment, segment->segment_size); // reset protection if the page kind differs + mi_segment_protect(segment, false); // reset protection if the page kind differs } } } @@ -408,7 +454,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, if (!commit) { // ensure the initial info is committed bool commit_zero = false; - _mi_mem_commit(segment, info_size, &commit_zero, tld->os); + _mi_mem_commit(segment, pre_size, &commit_zero, tld->os); if (commit_zero) is_zero = true; } segment->memid = memid; @@ -419,25 +465,6 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); if (!pages_still_good) { - // guard pages - if (MI_SECURE != 0) { - // in secure mode, we set up a protected page in between the segment info - // and the page data - mi_assert_internal(info_size == pre_size - _mi_os_page_size() && info_size % _mi_os_page_size() == 0); - _mi_mem_protect((uint8_t*)segment + info_size, (pre_size - info_size)); - const size_t os_page_size = _mi_os_page_size(); - if (MI_SECURE <= 1) { - // and protect the last page too - _mi_mem_protect((uint8_t*)segment + segment_size - os_page_size, os_page_size); - } - else { - // protect every page - for (size_t i = 0; i < capacity; i++) { - _mi_mem_protect((uint8_t*)segment + (i+1)*page_size - os_page_size, os_page_size); - } - } - } - // zero the segment info (but not the `mem` fields) ptrdiff_t ofs = offsetof(mi_segment_t, next); memset((uint8_t*)segment + ofs, 0, info_size - ofs); @@ -465,6 +492,9 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, segment->thread_id = _mi_thread_id(); segment->cookie = _mi_ptr_cookie(segment); _mi_stat_increase(&tld->stats->page_committed, segment->segment_info_size); + + // set protection + mi_segment_protect(segment, true); //fprintf(stderr,"mimalloc: alloc segment at %p\n", (void*)segment); return segment; @@ -525,11 +555,13 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t* mi_assert_internal(!segment->mem_is_fixed); mi_assert_internal(!page->is_reset); size_t psize; - uint8_t* start = _mi_page_start(segment, page, &psize); + uint8_t* start = mi_segment_raw_page_start(segment, page, &psize); page->is_committed = true; bool is_zero = false; - _mi_mem_commit(start,psize,&is_zero,tld->os); - if (is_zero) page->is_zero_init = true; + const size_t gsize = (MI_SECURE >= 2 ? _mi_os_page_size() : 0); + _mi_mem_commit(start,psize + gsize,&is_zero,tld->os); + if (gsize > 0) { mi_segment_protect_range(start + psize, gsize, true); } + if (is_zero) { page->is_zero_init = true; } } if (page->is_reset) { mi_page_unreset(segment, page, 0, tld); // todo: only unreset the part that was reset? @@ -759,7 +791,7 @@ static mi_page_t* mi_segment_medium_page_alloc(mi_segments_tld_t* tld, mi_os_tld static mi_page_t* mi_segment_large_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { mi_segment_t* segment = mi_segment_alloc(0,MI_PAGE_LARGE,MI_LARGE_PAGE_SHIFT,tld,os_tld); - if (segment == NULL) return NULL; + if (segment == NULL) return NULL; segment->used = 1; mi_page_t* page = &segment->pages[0]; page->segment_in_use = true; @@ -773,7 +805,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld { mi_segment_t* segment = mi_segment_alloc(size, MI_PAGE_HUGE, MI_SEGMENT_SHIFT,tld,os_tld); if (segment == NULL) return NULL; - mi_assert_internal(segment->segment_size - segment->segment_info_size >= size); + mi_assert_internal(mi_segment_page_size(segment) - segment->segment_info_size - (2*(MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= size); segment->used = 1; segment->thread_id = 0; // huge pages are immediately abandoned mi_page_t* page = &segment->pages[0]; @@ -800,5 +832,6 @@ mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_ page = mi_segment_huge_page_alloc(block_size,tld,os_tld); } mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page))); + mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size); return page; } From 41ef691292caa2417ef7e954f8eb9db2b18d1031 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= Date: Sun, 1 Sep 2019 01:06:01 -0700 Subject: [PATCH 052/179] avoid deadlock with BSD systems that call malloc from the dynamic linker extend the exception used for macOS to cover also OpenBSD (tested in 6.4+) and DragonFlyBSD (tested in 5.6.2) --- include/mimalloc-internal.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 452f0b68..2ddf3f16 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -10,7 +10,8 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc-types.h" -#if defined(MI_MALLOC_OVERRIDE) && (defined(__APPLE__) || defined(__OpenBSD__)) +#if defined(MI_MALLOC_OVERRIDE) && \ + (defined(__APPLE__) || defined(__OpenBSD__) || defined(__DragonFly__)) #define MI_TLS_RECURSE_GUARD #endif @@ -221,7 +222,7 @@ extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate static inline mi_heap_t* mi_get_default_heap(void) { #ifdef MI_TLS_RECURSE_GUARD - // on some platforms, like macOS, the dynamic loader calls `malloc` + // on some BSD platforms, like macOS, the dynamic loader calls `malloc` // to initialize thread local data. To avoid recursion, we need to avoid // accessing the thread local `_mi_default_heap` until our module is loaded // and use the statically allocated main heap until that time. From 727d33b96f9d120d022a9de1bf8b0f39f7645c15 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 24 Nov 2019 14:40:47 -0800 Subject: [PATCH 053/179] more precise memory reset --- src/memory.c | 16 ++++++++++------ src/segment.c | 50 +++++++++++++++++++++----------------------------- 2 files changed, 31 insertions(+), 35 deletions(-) diff --git a/src/memory.c b/src/memory.c index b29e18f3..9505c98f 100644 --- a/src/memory.c +++ b/src/memory.c @@ -306,15 +306,18 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo // unreset reset blocks if (mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)) { + // some blocks are still reset mi_assert_internal(!info.is_large); mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit); mi_bitmap_unclaim(®ion->reset, 1, blocks, bit_idx); - bool reset_zero = false; - _mi_mem_unreset(p, blocks * MI_SEGMENT_SIZE, &reset_zero, tld); - if (reset_zero) *is_zero = true; + if (*commit || !mi_option_is_enabled(mi_option_reset_decommits)) { // only if needed + bool reset_zero = false; + _mi_mem_unreset(p, blocks * MI_SEGMENT_SIZE, &reset_zero, tld); + if (reset_zero) *is_zero = true; + } } mi_assert_internal(!mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)); - + #if (MI_DEBUG>=2) if (*commit) { ((uint8_t*)p)[0] = 0; } #endif @@ -409,8 +412,9 @@ void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_re } // reset the blocks to reduce the working set. - if (!info.is_large && mi_option_is_enabled(mi_option_segment_reset) && - mi_option_is_enabled(mi_option_eager_commit)) // cannot reset halfway committed segments, use only `option_page_reset` instead + if (!info.is_large && mi_option_is_enabled(mi_option_segment_reset) + && (mi_option_is_enabled(mi_option_eager_commit) || + mi_option_is_enabled(mi_option_reset_decommits))) // cannot reset halfway committed segments, use only `option_page_reset` instead { bool any_unreset; mi_bitmap_claim(®ion->reset, 1, blocks, bit_idx, &any_unreset); diff --git a/src/segment.c b/src/segment.c index 0b6501d8..887248b4 100644 --- a/src/segment.c +++ b/src/segment.c @@ -320,10 +320,10 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se bool any_reset = false; for (size_t i = 0; i < segment->capacity; i++) { mi_page_t* page = &segment->pages[i]; - if (!page->is_committed) fully_committed = false; - else if (page->is_reset) { + if (!page->is_committed) { fully_committed = false; } + if (page->is_reset) { any_reset = true; - // mi_page_unreset(segment, page, 0, tld); + if (mi_option_is_enabled(mi_option_reset_decommits)) { fully_committed = false;} } } _mi_mem_free(segment, segment_size, segment->memid, fully_committed, any_reset, tld->os); @@ -419,7 +419,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, // Initialize parameters bool eager_delayed = (page_kind <= MI_PAGE_MEDIUM && tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); bool eager = !eager_delayed && mi_option_is_enabled(mi_option_eager_commit); - bool commit = eager || (page_kind >= MI_PAGE_LARGE); + bool commit = eager; // || (page_kind >= MI_PAGE_LARGE); bool pages_still_good = false; bool is_zero = false; @@ -431,18 +431,23 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, } else { + if (MI_SECURE!=0) { + mi_assert_internal(!segment->mem_is_fixed); + mi_segment_protect(segment, false); // reset protection if the page kind differs + } // different page kinds; unreset any reset pages, and unprotect // TODO: optimize cache pop to return fitting pages if possible? for (size_t i = 0; i < segment->capacity; i++) { mi_page_t* page = &segment->pages[i]; if (page->is_reset) { - mi_page_unreset(segment, page, 0, tld); // todo: only unreset the part that was reset? (instead of the full page) + if (!commit && mi_option_is_enabled(mi_option_reset_decommits)) { + page->is_reset = false; + } + else { + mi_page_unreset(segment, page, 0, tld); // todo: only unreset the part that was reset? (instead of the full page) + } } } - if (MI_SECURE!=0) { - mi_assert_internal(!segment->mem_is_fixed); - mi_segment_protect(segment, false); // reset protection if the page kind differs - } } } else { @@ -491,7 +496,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, segment->segment_info_size = pre_size; segment->thread_id = _mi_thread_id(); segment->cookie = _mi_ptr_cookie(segment); - _mi_stat_increase(&tld->stats->page_committed, segment->segment_info_size); + // _mi_stat_increase(&tld->stats->page_committed, segment->segment_info_size); // set protection mi_segment_protect(segment, true); @@ -512,18 +517,7 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t mi_assert(segment->next == NULL); mi_assert(segment->prev == NULL); _mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size); - - // update reset memory statistics - /* - for (uint8_t i = 0; i < segment->capacity; i++) { - mi_page_t* page = &segment->pages[i]; - if (page->is_reset) { - page->is_reset = false; - mi_stat_decrease( tld->stats->reset,mi_page_size(page)); - } - } - */ - + if (!force && mi_segment_cache_push(segment, tld)) { // it is put in our cache } @@ -602,7 +596,7 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg // reset the page memory to reduce memory pressure? // note: must come after setting `segment_in_use` to false - mi_page_reset(segment, page, used_size, tld); + mi_page_reset(segment, page, 0 /*used_size*/, tld); } void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) @@ -792,9 +786,8 @@ static mi_page_t* mi_segment_medium_page_alloc(mi_segments_tld_t* tld, mi_os_tld static mi_page_t* mi_segment_large_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { mi_segment_t* segment = mi_segment_alloc(0,MI_PAGE_LARGE,MI_LARGE_PAGE_SHIFT,tld,os_tld); if (segment == NULL) return NULL; - segment->used = 1; - mi_page_t* page = &segment->pages[0]; - page->segment_in_use = true; + mi_page_t* page = mi_segment_find_free(segment, tld); + mi_assert_internal(page != NULL); #if MI_DEBUG>=2 _mi_segment_page_start(segment, page, sizeof(void*), NULL, NULL)[0] = 0; #endif @@ -806,10 +799,9 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld mi_segment_t* segment = mi_segment_alloc(size, MI_PAGE_HUGE, MI_SEGMENT_SHIFT,tld,os_tld); if (segment == NULL) return NULL; mi_assert_internal(mi_segment_page_size(segment) - segment->segment_info_size - (2*(MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= size); - segment->used = 1; segment->thread_id = 0; // huge pages are immediately abandoned - mi_page_t* page = &segment->pages[0]; - page->segment_in_use = true; + mi_page_t* page = mi_segment_find_free(segment, tld); + mi_assert_internal(page != NULL); return page; } From 4452431b6c66250776200b24465a01e03a393d0a Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 24 Nov 2019 15:25:19 -0800 Subject: [PATCH 054/179] reenable segment cache and fix initial segment commit --- src/segment.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/segment.c b/src/segment.c index 887248b4..9aba8525 100644 --- a/src/segment.c +++ b/src/segment.c @@ -348,7 +348,7 @@ static mi_segment_t* mi_segment_cache_pop(size_t segment_size, mi_segments_tld_t static bool mi_segment_cache_full(mi_segments_tld_t* tld) { - if (tld->count == 1 && tld->cache_count==0) return false; // always cache at least the final segment of a thread + // if (tld->count == 1 && tld->cache_count==0) return false; // always cache at least the final segment of a thread size_t max_cache = mi_option_get(mi_option_segment_cache); if (tld->cache_count < max_cache && tld->cache_count < (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION)) // at least allow a 1 element cache @@ -424,7 +424,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, bool is_zero = false; // Try to get it from our thread local cache first - mi_segment_t* segment = NULL; // mi_segment_cache_pop(segment_size, tld); + mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld); if (segment != NULL) { if (page_kind <= MI_PAGE_MEDIUM && segment->page_kind == page_kind && segment->segment_size == segment_size) { pages_still_good = true; @@ -448,6 +448,12 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, } } } + // ensure the initial info is committed + if (segment->capacity < capacity) { + bool commit_zero = false; + _mi_mem_commit(segment, pre_size, &commit_zero, tld->os); + if (commit_zero) is_zero = true; + } } } else { From c6df7a199c384ed0394e0e57475e6e866172b544 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 24 Nov 2019 22:00:11 -0800 Subject: [PATCH 055/179] experimental eager page commit option --- include/mimalloc.h | 1 + src/options.c | 5 +++-- src/os.c | 2 +- src/page.c | 31 ++++++++++++++----------- src/segment.c | 56 ++++++++++++++++++++++++++++------------------ 5 files changed, 57 insertions(+), 38 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 2944de89..7da7cf62 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -267,6 +267,7 @@ typedef enum mi_option_e { // the following options are experimental mi_option_eager_commit, mi_option_eager_region_commit, + mi_option_eager_page_commit, mi_option_large_os_pages, // implies eager commit mi_option_reserve_huge_os_pages, mi_option_segment_cache, diff --git a/src/options.c b/src/options.c index 8975a6d3..bb6718be 100644 --- a/src/options.c +++ b/src/options.c @@ -56,18 +56,19 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(verbose) }, // the following options are experimental and not all combinations make sense. - { 1, UNINIT, MI_OPTION(eager_commit) }, // note: needs to be on when eager_region_commit is enabled + { 0, UNINIT, MI_OPTION(eager_commit) }, // note: needs to be on when eager_region_commit is enabled #ifdef _WIN32 // and BSD? { 0, UNINIT, MI_OPTION(eager_region_commit) }, // don't commit too eagerly on windows (just for looks...) #else { 1, UNINIT, MI_OPTION(eager_region_commit) }, #endif + { 1, UNINIT, MI_OPTION(eager_page_commit) }, { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread { 0, UNINIT, MI_OPTION(page_reset) }, // reset pages on free { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) - { 1, UNINIT, MI_OPTION(reset_decommits) }, // reset decommits memory + { 0, UNINIT, MI_OPTION(reset_decommits) }, // reset decommits memory { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. diff --git a/src/os.c b/src/os.c index 553d72c9..0197bafc 100644 --- a/src/os.c +++ b/src/os.c @@ -603,7 +603,7 @@ static void* mi_os_page_align_area_conservative(void* addr, size_t size, size_t* // (but not for the reset version where we want commit to be conservative as well) static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservative, bool* is_zero, mi_stats_t* stats) { // page align in the range, commit liberally, decommit conservative - *is_zero = false; + if (is_zero != NULL) { *is_zero = false; } size_t csize; void* start = mi_os_page_align_areax(conservative, addr, size, &csize); if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr)) diff --git a/src/page.c b/src/page.c index 31c8fd5f..2992bf09 100644 --- a/src/page.c +++ b/src/page.c @@ -35,7 +35,7 @@ static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_sta return (mi_block_t*)((uint8_t*)page_start + (i * page->block_size)); } -static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_stats_t* stats); +static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_tld_t* tld); #if (MI_DEBUG>=3) @@ -242,7 +242,7 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size mi_page_t* page = _mi_segment_page_alloc(block_size, &heap->tld->segments, &heap->tld->os); if (page == NULL) return NULL; mi_assert_internal(pq==NULL || _mi_page_segment(page)->page_kind != MI_PAGE_HUGE); - mi_page_init(heap, page, block_size, &heap->tld->stats); + mi_page_init(heap, page, block_size, heap->tld); _mi_stat_increase( &heap->tld->stats.pages, 1); if (pq!=NULL) mi_page_queue_push(heap, pq, page); // huge pages use pq==NULL mi_assert_expensive(_mi_page_is_valid(page)); @@ -544,8 +544,7 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co // Note: we also experimented with "bump" allocation on the first // allocations but this did not speed up any benchmark (due to an // extra test in malloc? or cache effects?) -static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* stats) { - UNUSED(stats); +static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) { mi_assert_expensive(mi_page_is_valid_init(page)); #if (MI_SECURE<=2) mi_assert(page->free == NULL); @@ -555,8 +554,8 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* st if (page->capacity >= page->reserved) return; size_t page_size; - _mi_page_start(_mi_page_segment(page), page, &page_size); - mi_stat_counter_increase(stats->pages_extended, 1); + uint8_t* page_start = _mi_page_start(_mi_page_segment(page), page, &page_size); + mi_stat_counter_increase(tld->stats.pages_extended, 1); // calculate the extend count size_t extend = page->reserved - page->capacity; @@ -572,16 +571,22 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* st mi_assert_internal(extend > 0 && extend + page->capacity <= page->reserved); mi_assert_internal(extend < (1UL<<16)); + // commit on-demand for large and huge pages? + if (_mi_page_segment(page)->page_kind >= MI_PAGE_LARGE && !mi_option_is_enabled(mi_option_eager_page_commit)) { + uint8_t* start = page_start + (page->capacity * page->block_size); + _mi_mem_commit(start, extend * page->block_size, NULL, &tld->os); + } + // and append the extend the free list if (extend < MI_MIN_SLICES || MI_SECURE==0) { //!mi_option_is_enabled(mi_option_secure)) { - mi_page_free_list_extend(page, extend, stats ); + mi_page_free_list_extend(page, extend, &tld->stats ); } else { - mi_page_free_list_extend_secure(heap, page, extend, stats); + mi_page_free_list_extend_secure(heap, page, extend, &tld->stats); } // enable the new free list page->capacity += (uint16_t)extend; - mi_stat_increase(stats->page_committed, extend * page->block_size); + mi_stat_increase(tld->stats.page_committed, extend * page->block_size); // extension into zero initialized memory preserves the zero'd free list if (!page->is_zero_init) { @@ -591,7 +596,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* st } // Initialize a fresh page -static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi_stats_t* stats) { +static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi_tld_t* tld) { mi_assert(page != NULL); mi_segment_t* segment = _mi_page_segment(page); mi_assert(segment != NULL); @@ -621,7 +626,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_expensive(mi_page_is_valid_init(page)); // initialize an initial free list - mi_page_extend_free(heap,page,stats); + mi_page_extend_free(heap,page,tld); mi_assert(mi_page_immediate_available(page)); } @@ -666,7 +671,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p // 2. Try to extend if (page->capacity < page->reserved) { - mi_page_extend_free(heap, page, &heap->tld->stats); + mi_page_extend_free(heap, page, heap->tld); mi_assert_internal(mi_page_immediate_available(page)); break; } @@ -707,7 +712,7 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) { if (page != NULL) { if ((MI_SECURE >= 3) && page->capacity < page->reserved && ((_mi_heap_random(heap) & 1) == 1)) { // in secure mode, we extend half the time to increase randomness - mi_page_extend_free(heap, page, &heap->tld->stats); + mi_page_extend_free(heap, page, heap->tld); mi_assert_internal(mi_page_immediate_available(page)); } else { diff --git a/src/segment.c b/src/segment.c index 9aba8525..13bcf56a 100644 --- a/src/segment.c +++ b/src/segment.c @@ -200,7 +200,12 @@ static void mi_page_reset(mi_segment_t* segment, mi_page_t* page, size_t size, m void* start = mi_segment_raw_page_start(segment, page, &psize); page->is_reset = true; mi_assert_internal(size <= psize); - _mi_mem_reset(start, ((size == 0 || size > psize) ? psize : size), tld->os); + size_t reset_size = (size == 0 || size > psize ? psize : size); + if (size == 0 && segment->page_kind >= MI_PAGE_LARGE && !mi_option_is_enabled(mi_option_eager_page_commit)) { + mi_assert_internal(page->block_size > 0); + reset_size = page->capacity * page->block_size; + } + _mi_mem_reset(start, reset_size, tld->os); } static void mi_page_unreset(mi_segment_t* segment, mi_page_t* page, size_t size, mi_segments_tld_t* tld) @@ -210,8 +215,13 @@ static void mi_page_unreset(mi_segment_t* segment, mi_page_t* page, size_t size, page->is_reset = false; size_t psize; uint8_t* start = mi_segment_raw_page_start(segment, page, &psize); + size_t unreset_size = (size == 0 || size > psize ? psize : size); + if (size == 0 && segment->page_kind >= MI_PAGE_LARGE && !mi_option_is_enabled(mi_option_eager_page_commit)) { + mi_assert_internal(page->block_size > 0); + unreset_size = page->capacity * page->block_size; + } bool is_zero = false; - _mi_mem_unreset(start, ((size == 0 || size > psize) ? psize : size), &is_zero, tld->os); + _mi_mem_unreset(start, unreset_size, &is_zero, tld->os); if (is_zero) page->is_zero_init = true; } @@ -414,8 +424,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t pre_size; size_t segment_size = mi_segment_size(capacity, required, &pre_size, &info_size); mi_assert_internal(segment_size >= required); - size_t page_size = (page_kind == MI_PAGE_HUGE ? segment_size : (size_t)1 << page_shift); - + // Initialize parameters bool eager_delayed = (page_kind <= MI_PAGE_MEDIUM && tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); bool eager = !eager_delayed && mi_option_is_enabled(mi_option_eager_commit); @@ -554,14 +563,16 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t* if (!page->is_committed) { mi_assert_internal(!segment->mem_is_fixed); mi_assert_internal(!page->is_reset); - size_t psize; - uint8_t* start = mi_segment_raw_page_start(segment, page, &psize); - page->is_committed = true; - bool is_zero = false; - const size_t gsize = (MI_SECURE >= 2 ? _mi_os_page_size() : 0); - _mi_mem_commit(start,psize + gsize,&is_zero,tld->os); - if (gsize > 0) { mi_segment_protect_range(start + psize, gsize, true); } - if (is_zero) { page->is_zero_init = true; } + if (segment->page_kind < MI_PAGE_LARGE || mi_option_is_enabled(mi_option_eager_page_commit)) { + page->is_committed = true; + size_t psize; + uint8_t* start = mi_segment_raw_page_start(segment, page, &psize); + bool is_zero = false; + const size_t gsize = (MI_SECURE >= 2 ? _mi_os_page_size() : 0); + _mi_mem_commit(start, psize + gsize, &is_zero, tld->os); + if (gsize > 0) { mi_segment_protect_range(start + psize, gsize, true); } + if (is_zero) { page->is_zero_init = true; } + } } if (page->is_reset) { mi_page_unreset(segment, page, 0, tld); // todo: only unreset the part that was reset? @@ -583,26 +594,27 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld); static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { mi_assert_internal(page->segment_in_use); mi_assert_internal(mi_page_all_free(page)); - mi_assert_internal(page->is_committed); + mi_assert_internal(segment->page_kind >= MI_PAGE_LARGE || page->is_committed); size_t inuse = page->capacity * page->block_size; _mi_stat_decrease(&tld->stats->page_committed, inuse); _mi_stat_decrease(&tld->stats->pages, 1); // calculate the used size from the raw (non-aligned) start of the page - size_t pre_size; - _mi_segment_page_start(segment, page, page->block_size, NULL, &pre_size); - size_t used_size = pre_size + (page->capacity * page->block_size); + //size_t pre_size; + //_mi_segment_page_start(segment, page, page->block_size, NULL, &pre_size); + //size_t used_size = pre_size + (page->capacity * page->block_size); - // zero the page data, but not the segment fields page->is_zero_init = false; - ptrdiff_t ofs = offsetof(mi_page_t,capacity); - memset((uint8_t*)page + ofs, 0, sizeof(*page) - ofs); page->segment_in_use = false; - segment->used--; // reset the page memory to reduce memory pressure? - // note: must come after setting `segment_in_use` to false + // note: must come after setting `segment_in_use` to false but before block_size becomes 0 mi_page_reset(segment, page, 0 /*used_size*/, tld); + + // zero the page data, but not the segment fields + ptrdiff_t ofs = offsetof(mi_page_t,capacity); + memset((uint8_t*)page + ofs, 0, sizeof(*page) - ofs); + segment->used--; } void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) @@ -713,7 +725,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen mi_page_t* page = &segment->pages[i]; if (page->segment_in_use) { mi_assert_internal(!page->is_reset); - mi_assert_internal(page->is_committed); + mi_assert_internal(segment->page_kind >= MI_PAGE_LARGE || page->is_committed); segment->abandoned--; mi_assert(page->next == NULL); _mi_stat_decrease(&tld->stats->pages_abandoned, 1); From 1643273357ac13fbe698306776d35a9d25afcb53 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 25 Nov 2019 10:11:29 -0800 Subject: [PATCH 056/179] fix unix bug in decommit size --- src/os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/os.c b/src/os.c index 0197bafc..6cf89c99 100644 --- a/src/os.c +++ b/src/os.c @@ -632,7 +632,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ #elif defined(MAP_FIXED) if (!commit) { // use mmap with MAP_FIXED to discard the existing memory (and reduce commit charge) - void* p = mmap(start, size, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), -1, 0); + void* p = mmap(start, csize, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), -1, 0); if (p != start) { err = errno; } } else { From 1d998af85432bc744275df7c9723821d947e796a Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 25 Nov 2019 10:47:17 -0800 Subject: [PATCH 057/179] clean up options; make secure work with eager_page_commit --- include/mimalloc.h | 6 +++--- src/options.c | 14 +++++++------- src/segment.c | 36 +++++++++++++++++++++++------------- 3 files changed, 33 insertions(+), 23 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 7da7cf62..94d9edfc 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -267,19 +267,19 @@ typedef enum mi_option_e { // the following options are experimental mi_option_eager_commit, mi_option_eager_region_commit, - mi_option_eager_page_commit, + mi_option_reset_decommits, mi_option_large_os_pages, // implies eager commit mi_option_reserve_huge_os_pages, mi_option_segment_cache, mi_option_page_reset, mi_option_segment_reset, - mi_option_reset_decommits, mi_option_eager_commit_delay, mi_option_reset_delay, mi_option_use_numa_nodes, mi_option_os_tag, mi_option_max_errors, - _mi_option_last + _mi_option_last, + mi_option_eager_page_commit = mi_option_eager_commit } mi_option_t; diff --git a/src/options.c b/src/options.c index bb6718be..c8df29a8 100644 --- a/src/options.c +++ b/src/options.c @@ -56,21 +56,21 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(verbose) }, // the following options are experimental and not all combinations make sense. - { 0, UNINIT, MI_OPTION(eager_commit) }, // note: needs to be on when eager_region_commit is enabled - #ifdef _WIN32 // and BSD? - { 0, UNINIT, MI_OPTION(eager_region_commit) }, // don't commit too eagerly on windows (just for looks...) + { 1, UNINIT, MI_OPTION(eager_commit) }, // commit on demand + #if defined(_WIN32) || (MI_INTPTR_SIZE <= 4) // and other OS's without overcommit? + { 0, UNINIT, MI_OPTION(eager_region_commit) }, + { 1, UNINIT, MI_OPTION(reset_decommits) }, // reset decommits memory #else - { 1, UNINIT, MI_OPTION(eager_region_commit) }, + { 1, UNINIT, MI_OPTION(eager_region_commit) }, + { 0, UNINIT, MI_OPTION(reset_decommits) }, // reset uses MADV_FREE/MADV_DONTNEED #endif - { 1, UNINIT, MI_OPTION(eager_page_commit) }, { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread { 0, UNINIT, MI_OPTION(page_reset) }, // reset pages on free { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) - { 0, UNINIT, MI_OPTION(reset_decommits) }, // reset decommits memory { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed - { 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds + { 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output diff --git a/src/segment.c b/src/segment.c index 13bcf56a..f6ce939b 100644 --- a/src/segment.c +++ b/src/segment.c @@ -165,7 +165,7 @@ static void mi_segment_protect_range(void* p, size_t size, bool protect) { } } -static void mi_segment_protect(mi_segment_t* segment, bool protect) { +static void mi_segment_protect(mi_segment_t* segment, bool protect, mi_os_tld_t* tld) { // add/remove guard pages if (MI_SECURE != 0) { // in secure mode, we set up a protected page in between the segment info and the page data @@ -175,7 +175,13 @@ static void mi_segment_protect(mi_segment_t* segment, bool protect) { mi_segment_protect_range((uint8_t*)segment + segment->segment_info_size - os_page_size, os_page_size, protect); if (MI_SECURE <= 1 || segment->capacity == 1) { // and protect the last (or only) page too - mi_segment_protect_range((uint8_t*)segment + segment->segment_size - os_page_size, os_page_size, protect); + mi_assert_internal(segment->page_kind >= MI_PAGE_LARGE); + uint8_t* start = (uint8_t*)segment + segment->segment_size - os_page_size; + if (protect && !mi_option_is_enabled(mi_option_eager_page_commit)) { + // ensure secure page is committed + _mi_mem_commit(start, os_page_size, NULL, tld); + } + mi_segment_protect_range(start, os_page_size, protect); } else { // or protect every page @@ -323,19 +329,23 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se mi_segments_track_size(-((long)segment_size),tld); if (MI_SECURE != 0) { mi_assert_internal(!segment->mem_is_fixed); - mi_segment_protect(segment, false); // ensure no more guard pages are set + mi_segment_protect(segment, false, tld->os); // ensure no more guard pages are set } - bool fully_committed = true; bool any_reset = false; + bool fully_committed = true; for (size_t i = 0; i < segment->capacity; i++) { mi_page_t* page = &segment->pages[i]; if (!page->is_committed) { fully_committed = false; } - if (page->is_reset) { - any_reset = true; - if (mi_option_is_enabled(mi_option_reset_decommits)) { fully_committed = false;} - } + if (page->is_reset) { any_reset = true; } } + if (any_reset && mi_option_is_enabled(mi_option_reset_decommits)) { + fully_committed = false; + } + if (segment->page_kind >= MI_PAGE_LARGE && !mi_option_is_enabled(mi_option_eager_page_commit)) { + fully_committed = false; + } + _mi_mem_free(segment, segment_size, segment->memid, fully_committed, any_reset, tld->os); } @@ -442,7 +452,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, { if (MI_SECURE!=0) { mi_assert_internal(!segment->mem_is_fixed); - mi_segment_protect(segment, false); // reset protection if the page kind differs + mi_segment_protect(segment, false, tld->os); // reset protection if the page kind differs } // different page kinds; unreset any reset pages, and unprotect // TODO: optimize cache pop to return fitting pages if possible? @@ -514,7 +524,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, // _mi_stat_increase(&tld->stats->page_committed, segment->segment_info_size); // set protection - mi_segment_protect(segment, true); + mi_segment_protect(segment, true, tld->os); //fprintf(stderr,"mimalloc: alloc segment at %p\n", (void*)segment); return segment; @@ -563,8 +573,8 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t* if (!page->is_committed) { mi_assert_internal(!segment->mem_is_fixed); mi_assert_internal(!page->is_reset); + page->is_committed = true; if (segment->page_kind < MI_PAGE_LARGE || mi_option_is_enabled(mi_option_eager_page_commit)) { - page->is_committed = true; size_t psize; uint8_t* start = mi_segment_raw_page_start(segment, page, &psize); bool is_zero = false; @@ -594,7 +604,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld); static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { mi_assert_internal(page->segment_in_use); mi_assert_internal(mi_page_all_free(page)); - mi_assert_internal(segment->page_kind >= MI_PAGE_LARGE || page->is_committed); + mi_assert_internal(page->is_committed); size_t inuse = page->capacity * page->block_size; _mi_stat_decrease(&tld->stats->page_committed, inuse); _mi_stat_decrease(&tld->stats->pages, 1); @@ -725,7 +735,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen mi_page_t* page = &segment->pages[i]; if (page->segment_in_use) { mi_assert_internal(!page->is_reset); - mi_assert_internal(segment->page_kind >= MI_PAGE_LARGE || page->is_committed); + mi_assert_internal(page->is_committed); segment->abandoned--; mi_assert(page->next == NULL); _mi_stat_decrease(&tld->stats->pages_abandoned, 1); From a799a191360a060afc14ca686f5803bb26448e3b Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 25 Nov 2019 14:30:12 -0800 Subject: [PATCH 058/179] fix non-standard line continuation --- include/mimalloc-internal.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index bf59656c..99e4b5ba 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -10,8 +10,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc-types.h" -#if defined(MI_MALLOC_OVERRIDE) && \ - (defined(__APPLE__) || defined(__OpenBSD__) || defined(__DragonFly__)) +#if defined(MI_MALLOC_OVERRIDE) && (defined(__APPLE__) || defined(__OpenBSD__) || defined(__DragonFly__)) #define MI_TLS_RECURSE_GUARD #endif From a407f35c64321f02dbaf956893ced313ca7e199c Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 1 Dec 2019 00:01:14 -0800 Subject: [PATCH 059/179] add arena.c into the static override object --- src/arena.c | 52 ++++++++++++++++++++++++++-------------------------- src/static.c | 1 + 2 files changed, 27 insertions(+), 26 deletions(-) diff --git a/src/arena.c b/src/arena.c index 4a596b2c..90ea2b40 100644 --- a/src/arena.c +++ b/src/arena.c @@ -7,13 +7,13 @@ terms of the MIT license. A copy of the license can be found in the file /* ---------------------------------------------------------------------------- "Arenas" are fixed area's of OS memory from which we can allocate -large blocks (>= MI_ARENA_BLOCK_SIZE, 32MiB). -In contrast to the rest of mimalloc, the arenas are shared between +large blocks (>= MI_ARENA_BLOCK_SIZE, 32MiB). +In contrast to the rest of mimalloc, the arenas are shared between threads and need to be accessed using atomic operations. Currently arenas are only used to for huge OS page (1GiB) reservations, otherwise it delegates to direct allocation from the OS. -In the future, we can expose an API to manually add more kinds of arenas +In the future, we can expose an API to manually add more kinds of arenas which is sometimes needed for embedded devices or shared memory for example. (We can also employ this with WASI or `sbrk` systems to reserve large arenas on demand and be able to reuse them efficiently). @@ -41,7 +41,7 @@ void _mi_os_free(void* p, size_t size, mi_stats_t* stats); void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize); void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats); -bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); +bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); /* ----------------------------------------------------------- Arena allocation @@ -82,13 +82,13 @@ static _Atomic(uintptr_t) mi_arena_count; // = 0 // Use `0` as a special id for direct OS allocated memory. #define MI_MEMID_OS 0 -static size_t mi_memid_create(size_t arena_index, mi_bitmap_index_t bitmap_index) { +static size_t mi_arena_id_create(size_t arena_index, mi_bitmap_index_t bitmap_index) { mi_assert_internal(arena_index < 0xFE); mi_assert_internal(((bitmap_index << 8) >> 8) == bitmap_index); // no overflow? return ((bitmap_index << 8) | ((arena_index+1) & 0xFF)); } -static void mi_memid_indices(size_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) { +static void mi_arena_id_indices(size_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) { mi_assert_internal(memid != MI_MEMID_OS); *arena_index = (memid & 0xFF) - 1; *bitmap_index = (memid >> 8); @@ -101,7 +101,7 @@ static size_t mi_block_count_of_size(size_t size) { /* ----------------------------------------------------------- Thread safe allocation in an arena ----------------------------------------------------------- */ -static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx) +static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx) { const size_t fcount = arena->field_count; size_t idx = mi_atomic_read(&arena->search_idx); // start from last search @@ -120,15 +120,15 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* Arena Allocation ----------------------------------------------------------- */ -static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, - bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) +static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, + bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { mi_bitmap_index_t bitmap_index; if (!mi_arena_alloc(arena, needed_bcount, &bitmap_index)) return NULL; // claimed it! set the dirty bits (todo: no need for an atomic op here?) void* p = arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE); - *memid = mi_memid_create(arena_index, bitmap_index); + *memid = mi_arena_id_create(arena_index, bitmap_index); *is_zero = mi_bitmap_claim(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL); *large = arena->is_large; if (arena->is_committed) { @@ -152,19 +152,19 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n return p; } -void* _mi_arena_alloc_aligned(size_t size, size_t alignment, - bool* commit, bool* large, bool* is_zero, - size_t* memid, mi_os_tld_t* tld) +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, + bool* commit, bool* large, bool* is_zero, + size_t* memid, mi_os_tld_t* tld) { mi_assert_internal(commit != NULL && large != NULL && is_zero != NULL && memid != NULL && tld != NULL); mi_assert_internal(size > 0); *memid = MI_MEMID_OS; *is_zero = false; - + // try to allocate in an arena if the alignment is small enough // and the object is not too large or too small. - if (alignment <= MI_SEGMENT_ALIGN && - size <= MI_ARENA_MAX_OBJ_SIZE && + if (alignment <= MI_SEGMENT_ALIGN && + size <= MI_ARENA_MAX_OBJ_SIZE && size >= MI_ARENA_MIN_OBJ_SIZE) { const size_t bcount = mi_block_count_of_size(size); @@ -177,7 +177,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, if (arena==NULL) break; // end reached if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local? (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages - { + { void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid, tld); mi_assert_internal((uintptr_t)p % alignment == 0); if (p != NULL) return p; @@ -224,7 +224,7 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) { // allocated in an arena size_t arena_idx; size_t bitmap_idx; - mi_memid_indices(memid, &arena_idx, &bitmap_idx); + mi_arena_id_indices(memid, &arena_idx, &bitmap_idx); mi_assert_internal(arena_idx < MI_MAX_ARENAS); mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[arena_idx])); mi_assert_internal(arena != NULL); @@ -254,7 +254,7 @@ static bool mi_arena_add(mi_arena_t* arena) { mi_assert_internal(arena != NULL); mi_assert_internal((uintptr_t)arena->start % MI_SEGMENT_ALIGN == 0); mi_assert_internal(arena->block_count > 0); - + uintptr_t i = mi_atomic_addu(&mi_arena_count,1); if (i >= MI_MAX_ARENAS) { mi_atomic_subu(&mi_arena_count, 1); @@ -283,10 +283,10 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec return ENOMEM; } _mi_verbose_message("reserved %zu gb huge pages\n", pages_reserved); - + size_t bcount = mi_block_count_of_size(hsize); size_t fields = (bcount + MI_BITMAP_FIELD_BITS - 1) / MI_BITMAP_FIELD_BITS; - size_t asize = sizeof(mi_arena_t) + (2*fields*sizeof(mi_bitmap_field_t)); + size_t asize = sizeof(mi_arena_t) + (2*fields*sizeof(mi_bitmap_field_t)); mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS? if (arena == NULL) { _mi_os_free_huge_pages(p, hsize, &_mi_stats_main); @@ -294,7 +294,7 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec } arena->block_count = bcount; arena->field_count = fields; - arena->start = (uint8_t*)p; + arena->start = (uint8_t*)p; arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1) arena->is_large = true; arena->is_zero_init = true; @@ -308,9 +308,9 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec if (post > 0) { // don't use leftover bits at the end mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post); - mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL); + mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL); } - + mi_arena_add(arena); return 0; } @@ -326,7 +326,7 @@ int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t const size_t pages_per = pages / numa_count; const size_t pages_mod = pages % numa_count; const size_t timeout_per = (timeout_msecs / numa_count) + 50; - + // reserve evenly among numa nodes for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { size_t node_pages = pages_per; // can be 0 @@ -348,7 +348,7 @@ int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserv UNUSED(max_secs); _mi_warning_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n"); if (pages_reserved != NULL) *pages_reserved = 0; - int err = mi_reserve_huge_os_pages_interleave(pages, 0, (size_t)(max_secs * 1000.0)); + int err = mi_reserve_huge_os_pages_interleave(pages, 0, (size_t)(max_secs * 1000.0)); if (err==0 && pages_reserved!=NULL) *pages_reserved = pages; return err; } diff --git a/src/static.c b/src/static.c index f1656fa9..d31fca8f 100644 --- a/src/static.c +++ b/src/static.c @@ -15,6 +15,7 @@ terms of the MIT license. A copy of the license can be found in the file // functions (on Unix's). #include "stats.c" #include "os.c" +#include "arena.c" #include "memory.c" #include "segment.c" #include "page.c" From 36d168a2d9880648c697761dbc6ec90211fd7b8b Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 1 Dec 2019 00:03:35 -0800 Subject: [PATCH 060/179] add preload check to options initialization --- src/options.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/options.c b/src/options.c index c8df29a8..0d3bd393 100644 --- a/src/options.c +++ b/src/options.c @@ -28,7 +28,7 @@ int mi_version(void) mi_attr_noexcept { // -------------------------------------------------------- // Options -// These can be accessed by multiple threads and may be +// These can be accessed by multiple threads and may be // concurrently initialized, but an initializing data race // is ok since they resolve to the same value. // -------------------------------------------------------- @@ -61,7 +61,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(eager_region_commit) }, { 1, UNINIT, MI_OPTION(reset_decommits) }, // reset decommits memory #else - { 1, UNINIT, MI_OPTION(eager_region_commit) }, + { 1, UNINIT, MI_OPTION(eager_region_commit) }, { 0, UNINIT, MI_OPTION(reset_decommits) }, // reset uses MADV_FREE/MADV_DONTNEED #endif { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's @@ -71,7 +71,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds - { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. + { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output }; @@ -89,7 +89,7 @@ void _mi_options_init(void) { mi_option_desc_t* desc = &options[option]; _mi_verbose_message("option '%s': %ld\n", desc->name, desc->value); } - } + } mi_max_error_count = mi_option_get(mi_option_max_errors); } @@ -98,7 +98,7 @@ long mi_option_get(mi_option_t option) { mi_option_desc_t* desc = &options[option]; mi_assert(desc->option == option); // index should match the option if (mi_unlikely(desc->init == UNINIT)) { - mi_option_init(desc); + mi_option_init(desc); } return desc->value; } @@ -142,7 +142,7 @@ void mi_option_disable(mi_option_t option) { static void mi_out_stderr(const char* msg) { #ifdef _WIN32 - // on windows with redirection, the C runtime cannot handle locale dependent output + // on windows with redirection, the C runtime cannot handle locale dependent output // after the main thread closes so we use direct console output. if (!_mi_preloading()) { _cputs(msg); } #else @@ -184,7 +184,7 @@ static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf) { out_buf[count] = 0; out(out_buf); if (!no_more_buf) { - out_buf[count] = '\n'; // if continue with the buffer, insert a newline + out_buf[count] = '\n'; // if continue with the buffer, insert a newline } } @@ -340,7 +340,7 @@ static void mi_strlcat(char* dest, const char* src, size_t dest_size) { #include static bool mi_getenv(const char* name, char* result, size_t result_size) { result[0] = 0; - size_t len = GetEnvironmentVariableA(name, result, (DWORD)result_size); + size_t len = GetEnvironmentVariableA(name, result, (DWORD)result_size); return (len > 0 && len < result_size); } #else @@ -366,7 +366,11 @@ static bool mi_getenv(const char* name, char* result, size_t result_size) { } } #endif -static void mi_option_init(mi_option_desc_t* desc) { +static void mi_option_init(mi_option_desc_t* desc) { + #ifndef _WIN32 + // cannot call getenv() when still initializing the C runtime. + if (_mi_preloading()) return; + #endif // Read option value from the environment char buf[64+1]; mi_strlcpy(buf, "mimalloc_", sizeof(buf)); From e31e609414d047aa198e5e59820a5f96c1a751bc Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sun, 1 Dec 2019 01:03:39 -0800 Subject: [PATCH 061/179] add preload check in option initialization (issues #179) --- src/options.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/options.c b/src/options.c index 0bee74e0..d6b0558b 100644 --- a/src/options.c +++ b/src/options.c @@ -28,7 +28,7 @@ int mi_version(void) mi_attr_noexcept { // -------------------------------------------------------- // Options -// These can be accessed by multiple threads and may be +// These can be accessed by multiple threads and may be // concurrently initialized, but an initializing data race // is ok since they resolve to the same value. // -------------------------------------------------------- @@ -96,7 +96,7 @@ long mi_option_get(mi_option_t option) { mi_option_desc_t* desc = &options[option]; mi_assert(desc->option == option); // index should match the option if (mi_unlikely(desc->init == UNINIT)) { - mi_option_init(desc); + mi_option_init(desc); } return desc->value; } @@ -140,7 +140,7 @@ void mi_option_disable(mi_option_t option) { static void mi_out_stderr(const char* msg) { #ifdef _WIN32 - // on windows with redirection, the C runtime cannot handle locale dependent output + // on windows with redirection, the C runtime cannot handle locale dependent output // after the main thread closes so we use direct console output. if (!_mi_preloading()) { _cputs(msg); } #else @@ -182,7 +182,7 @@ static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf) { out_buf[count] = 0; out(out_buf); if (!no_more_buf) { - out_buf[count] = '\n'; // if continue with the buffer, insert a newline + out_buf[count] = '\n'; // if continue with the buffer, insert a newline } } @@ -339,7 +339,7 @@ static void mi_strlcat(char* dest, const char* src, size_t dest_size) { #include static bool mi_getenv(const char* name, char* result, size_t result_size) { result[0] = 0; - size_t len = GetEnvironmentVariableA(name, result, (DWORD)result_size); + size_t len = GetEnvironmentVariableA(name, result, (DWORD)result_size); return (len > 0 && len < result_size); } #else @@ -365,7 +365,11 @@ static bool mi_getenv(const char* name, char* result, size_t result_size) { } } #endif -static void mi_option_init(mi_option_desc_t* desc) { +static void mi_option_init(mi_option_desc_t* desc) { + #ifndef _WIN32 + // cannot call getenv() when still initializing the C runtime. + if (_mi_preloading()) return; + #endif // Read option value from the environment char buf[64+1]; mi_strlcpy(buf, "mimalloc_", sizeof(buf)); From f9b942d80d0d51a18bcb12959b3f8f72803a981d Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 22 Dec 2019 17:08:46 -0800 Subject: [PATCH 062/179] fix compilation of region descriptor on 32-bit --- src/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/memory.c b/src/memory.c index 9505c98f..3d6a22f5 100644 --- a/src/memory.c +++ b/src/memory.c @@ -79,7 +79,7 @@ typedef union mi_region_info_u { struct { bool valid; bool is_large; - int numa_node; + short numa_node; }; } mi_region_info_t; From ba87a39d9fcfab97fce28c16c7e1c799ee6af524 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 22 Dec 2019 17:07:01 -0800 Subject: [PATCH 063/179] updated random cookie generation using OS primitives and chacha20 --- CMakeLists.txt | 3 +- ide/vs2017/mimalloc-override.vcxproj | 5 +- ide/vs2017/mimalloc-override.vcxproj.filters | 3 + ide/vs2017/mimalloc.vcxproj | 1 + ide/vs2017/mimalloc.vcxproj.filters | 3 + ide/vs2019/mimalloc-override.vcxproj | 1 + ide/vs2019/mimalloc-override.vcxproj.filters | 3 + ide/vs2019/mimalloc.vcxproj | 1 + ide/vs2019/mimalloc.vcxproj.filters | 3 + include/mimalloc-internal.h | 35 ++- include/mimalloc-types.h | 11 +- src/heap.c | 14 +- src/init.c | 77 +---- src/memory.c | 2 +- src/os.c | 8 +- src/page.c | 14 +- src/random.c | 290 +++++++++++++++++++ src/static.c | 1 + 18 files changed, 378 insertions(+), 97 deletions(-) create mode 100644 src/random.c diff --git a/CMakeLists.txt b/CMakeLists.txt index c4480b89..a894de9b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,6 +18,7 @@ include("cmake/mimalloc-config-version.cmake") set(mi_sources src/stats.c + src/random.c src/os.c src/arena.c src/memory.c @@ -115,7 +116,7 @@ endif() # extra needed libraries if(WIN32) - list(APPEND mi_libraries psapi shell32 user32) + list(APPEND mi_libraries psapi shell32 user32 bcrypt) else() list(APPEND mi_libraries pthread) find_library(LIBRT rt) diff --git a/ide/vs2017/mimalloc-override.vcxproj b/ide/vs2017/mimalloc-override.vcxproj index 1fc70b33..821645e9 100644 --- a/ide/vs2017/mimalloc-override.vcxproj +++ b/ide/vs2017/mimalloc-override.vcxproj @@ -129,7 +129,7 @@ Default - $(ProjectDir)\..\..\bin\mimalloc-redirect.lib;%(AdditionalDependencies) + $(ProjectDir)\..\..\bin\mimalloc-redirect.lib;bcrypt.lib;%(AdditionalDependencies) @@ -195,7 +195,7 @@ true true - $(ProjectDir)\..\..\bin\mimalloc-redirect.lib;%(AdditionalDependencies) + $(ProjectDir)\..\..\bin\mimalloc-redirect.lib;bcrypt.lib;%(AdditionalDependencies) Default @@ -244,6 +244,7 @@ true + diff --git a/ide/vs2017/mimalloc-override.vcxproj.filters b/ide/vs2017/mimalloc-override.vcxproj.filters index 75a8e032..037fbcbb 100644 --- a/ide/vs2017/mimalloc-override.vcxproj.filters +++ b/ide/vs2017/mimalloc-override.vcxproj.filters @@ -73,5 +73,8 @@ Source Files + + Source Files + \ No newline at end of file diff --git a/ide/vs2017/mimalloc.vcxproj b/ide/vs2017/mimalloc.vcxproj index 484c4db8..01c6ad27 100644 --- a/ide/vs2017/mimalloc.vcxproj +++ b/ide/vs2017/mimalloc.vcxproj @@ -229,6 +229,7 @@ true + diff --git a/ide/vs2017/mimalloc.vcxproj.filters b/ide/vs2017/mimalloc.vcxproj.filters index 598b8643..5fe74aa0 100644 --- a/ide/vs2017/mimalloc.vcxproj.filters +++ b/ide/vs2017/mimalloc.vcxproj.filters @@ -56,6 +56,9 @@ Source Files + + Source Files + diff --git a/ide/vs2019/mimalloc-override.vcxproj b/ide/vs2019/mimalloc-override.vcxproj index 49f3d213..6ac6541d 100644 --- a/ide/vs2019/mimalloc-override.vcxproj +++ b/ide/vs2019/mimalloc-override.vcxproj @@ -247,6 +247,7 @@ true + diff --git a/ide/vs2019/mimalloc-override.vcxproj.filters b/ide/vs2019/mimalloc-override.vcxproj.filters index b2dea4e1..a8c5a5de 100644 --- a/ide/vs2019/mimalloc-override.vcxproj.filters +++ b/ide/vs2019/mimalloc-override.vcxproj.filters @@ -46,6 +46,9 @@ Source Files + + Source Files + diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index bae49bab..1860f26a 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -232,6 +232,7 @@ true + diff --git a/ide/vs2019/mimalloc.vcxproj.filters b/ide/vs2019/mimalloc.vcxproj.filters index 0cce0c4f..61de4afe 100644 --- a/ide/vs2019/mimalloc.vcxproj.filters +++ b/ide/vs2019/mimalloc.vcxproj.filters @@ -49,6 +49,9 @@ Source Files + + Source Files + diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 99e4b5ba..e648c1ff 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -42,12 +42,17 @@ void _mi_trace_message(const char* fmt, ...); void _mi_options_init(void); void _mi_fatal_error(const char* fmt, ...) mi_attr_noreturn; -// "init.c" +// random.c +void _mi_random_init(mi_random_ctx_t* ctx); +void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx); +uintptr_t _mi_random_next(mi_random_ctx_t* ctx); +uintptr_t _mi_heap_random_next(mi_heap_t* heap); +static inline uintptr_t _mi_random_shuffle(uintptr_t x); + +// init.c extern mi_stats_t _mi_stats_main; extern const mi_page_t _mi_page_empty; bool _mi_is_main_thread(void); -uintptr_t _mi_random_shuffle(uintptr_t x); -uintptr_t _mi_random_init(uintptr_t seed /* can be zero */); bool _mi_preloading(); // true while the C runtime is not ready // os.c @@ -100,7 +105,6 @@ uint8_t _mi_bsr(uintptr_t x); // bit-scan-right, used on BSD i // "heap.c" void _mi_heap_destroy_pages(mi_heap_t* heap); void _mi_heap_collect_abandon(mi_heap_t* heap); -uintptr_t _mi_heap_random(mi_heap_t* heap); void _mi_heap_set_default_direct(mi_heap_t* heap); // "stats.c" @@ -454,6 +458,29 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c #endif } +// ------------------------------------------------------------------- +// Fast "random" shuffle +// ------------------------------------------------------------------- + +static inline uintptr_t _mi_random_shuffle(uintptr_t x) { + mi_assert_internal(x!=0); +#if (MI_INTPTR_SIZE==8) + // by Sebastiano Vigna, see: + x ^= x >> 30; + x *= 0xbf58476d1ce4e5b9UL; + x ^= x >> 27; + x *= 0x94d049bb133111ebUL; + x ^= x >> 31; +#elif (MI_INTPTR_SIZE==4) + // by Chris Wellons, see: + x ^= x >> 16; + x *= 0x7feb352dUL; + x ^= x >> 15; + x *= 0x846ca68bUL; + x ^= x >> 16; +#endif + return x; +} // ------------------------------------------------------------------- // Optimize numa node access for the common case (= one node) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index f79c5a64..1360c125 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -76,6 +76,7 @@ terms of the MIT license. A copy of the license can be found in the file #endif #define MI_INTPTR_SIZE (1<random; - heap->random = _mi_random_shuffle(r); - return r; -} - mi_heap_t* mi_heap_new(void) { mi_heap_t* bheap = mi_heap_get_backing(); mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t); @@ -197,12 +191,16 @@ mi_heap_t* mi_heap_new(void) { memcpy(heap, &_mi_heap_empty, sizeof(mi_heap_t)); heap->tld = bheap->tld; heap->thread_id = _mi_thread_id(); - heap->cookie = ((uintptr_t)heap ^ _mi_heap_random(bheap)) | 1; - heap->random = _mi_heap_random(bheap); + _mi_random_split(&bheap->random, &heap->random); + heap->cookie = _mi_heap_random_next(heap) | 1; heap->no_reclaim = true; // don't reclaim abandoned pages or otherwise destroy is unsafe return heap; } +uintptr_t _mi_heap_random_next(mi_heap_t* heap) { + return _mi_random_next(&heap->random); +} + // zero out the page queues static void mi_heap_reset_pages(mi_heap_t* heap) { mi_assert_internal(mi_heap_is_initialized(heap)); diff --git a/src/init.c b/src/init.c index d8fff823..768bc2bf 100644 --- a/src/init.c +++ b/src/init.c @@ -85,7 +85,7 @@ const mi_heap_t _mi_heap_empty = { ATOMIC_VAR_INIT(NULL), 0, 0, - 0, + { {0}, {0}, 0 }, 0, false }; @@ -116,7 +116,7 @@ mi_heap_t _mi_heap_main = { #else 0xCDCDCDCDUL, #endif - 0, // random + { {0}, {0}, 0 }, // random 0, // page count false // can reclaim }; @@ -125,66 +125,6 @@ bool _mi_process_is_initialized = false; // set to `true` in `mi_process_init`. mi_stats_t _mi_stats_main = { MI_STATS_NULL }; -/* ----------------------------------------------------------- - Initialization of random numbers ------------------------------------------------------------ */ - -#if defined(_WIN32) -#include -#elif defined(__APPLE__) -#include -#else -#include -#endif - -uintptr_t _mi_random_shuffle(uintptr_t x) { - #if (MI_INTPTR_SIZE==8) - // by Sebastiano Vigna, see: - x ^= x >> 30; - x *= 0xbf58476d1ce4e5b9UL; - x ^= x >> 27; - x *= 0x94d049bb133111ebUL; - x ^= x >> 31; - #elif (MI_INTPTR_SIZE==4) - // by Chris Wellons, see: - x ^= x >> 16; - x *= 0x7feb352dUL; - x ^= x >> 15; - x *= 0x846ca68bUL; - x ^= x >> 16; - #endif - return x; -} - -uintptr_t _mi_random_init(uintptr_t seed /* can be zero */) { -#ifdef __wasi__ // no ASLR when using WebAssembly, and time granularity may be coarse - uintptr_t x; - arc4random_buf(&x, sizeof x); -#else - // Hopefully, ASLR makes our function address random - uintptr_t x = (uintptr_t)((void*)&_mi_random_init); - x ^= seed; - // xor with high res time -#if defined(_WIN32) - LARGE_INTEGER pcount; - QueryPerformanceCounter(&pcount); - x ^= (uintptr_t)(pcount.QuadPart); -#elif defined(__APPLE__) - x ^= (uintptr_t)mach_absolute_time(); -#else - struct timespec time; - clock_gettime(CLOCK_MONOTONIC, &time); - x ^= (uintptr_t)time.tv_sec; - x ^= (uintptr_t)time.tv_nsec; -#endif - // and do a few randomization steps - uintptr_t max = ((x ^ (x >> 17)) & 0x0F) + 1; - for (uintptr_t i = 0; i < max; i++) { - x = _mi_random_shuffle(x); - } -#endif - return x; -} /* ----------------------------------------------------------- Initialization and freeing of the thread local heaps @@ -214,8 +154,8 @@ static bool _mi_heap_init(void) { mi_heap_t* heap = &td->heap; memcpy(heap, &_mi_heap_empty, sizeof(*heap)); heap->thread_id = _mi_thread_id(); - heap->random = _mi_random_init(heap->thread_id); - heap->cookie = ((uintptr_t)heap ^ _mi_heap_random(heap)) | 1; + _mi_random_init(&heap->random); + heap->cookie = _mi_heap_random_next(heap) | 1; heap->tld = tld; memset(tld, 0, sizeof(*tld)); tld->heap_backing = heap; @@ -451,16 +391,15 @@ void mi_process_init(void) mi_attr_noexcept { // access _mi_heap_default before setting _mi_process_is_initialized to ensure // that the TLS slot is allocated without getting into recursion on macOS // when using dynamic linking with interpose. - mi_heap_t* h = mi_get_default_heap(); + mi_get_default_heap(); _mi_process_is_initialized = true; _mi_heap_main.thread_id = _mi_thread_id(); _mi_verbose_message("process init: 0x%zx\n", _mi_heap_main.thread_id); - uintptr_t random = _mi_random_init(_mi_heap_main.thread_id) ^ (uintptr_t)h; - #ifndef __APPLE__ - _mi_heap_main.cookie = (uintptr_t)&_mi_heap_main ^ random; + _mi_random_init(&_mi_heap_main.random); + #ifndef __APPLE__ // TODO: fix this? cannot update cookie if allocation already happened.. + _mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main); #endif - _mi_heap_main.random = _mi_random_shuffle(random); mi_process_setup_auto_thread_done(); _mi_os_init(); #if (MI_DEBUG) diff --git a/src/memory.c b/src/memory.c index 9505c98f..3d6a22f5 100644 --- a/src/memory.c +++ b/src/memory.c @@ -79,7 +79,7 @@ typedef union mi_region_info_u { struct { bool valid; bool is_large; - int numa_node; + short numa_node; }; } mi_region_info_t; diff --git a/src/os.c b/src/os.c index 6cf89c99..9da209ad 100644 --- a/src/os.c +++ b/src/os.c @@ -409,8 +409,8 @@ static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { if (hint == 0 || hint > ((intptr_t)30<<40)) { // try to wrap around after 30TiB (area after 32TiB is used for huge OS pages) intptr_t init = ((intptr_t)4 << 40); // start at 4TiB area #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode - uintptr_t r = _mi_random_init((uintptr_t)&mi_os_get_aligned_hint ^ hint); - init = init + (MI_SEGMENT_SIZE * ((r>>17) & 0xFFFF)); // (randomly 0-64k)*4MiB == 0 to 256GiB + uintptr_t r = _mi_heap_random_next(mi_get_default_heap()); + init = init + (MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)); // (randomly 20 bits)*4MiB == 0 to 4TiB #endif mi_atomic_cas_strong(mi_atomic_cast(uintptr_t, &aligned_base), init, hint + size); hint = mi_atomic_add(&aligned_base, size); // this may still give 0 or > 30TiB but that is ok, it is a hint after all @@ -909,8 +909,8 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { // Initialize the start address after the 32TiB area start = ((uintptr_t)32 << 40); // 32TiB virtual start address #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode - uintptr_t r = _mi_random_init((uintptr_t)&mi_os_claim_huge_pages); - start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF)); // (randomly 0-1024)*1GiB == 0 to 1TiB + uintptr_t r = _mi_heap_random_next(mi_get_default_heap()); + start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x0FFF)); // (randomly 12bits)*1GiB == between 0 to 4TiB #endif } end = start + size; diff --git a/src/page.c b/src/page.c index 2992bf09..471dca97 100644 --- a/src/page.c +++ b/src/page.c @@ -475,11 +475,12 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co // and initialize the free list by randomly threading through them // set up first element - size_t current = _mi_heap_random(heap) % slice_count; + const uintptr_t r = _mi_heap_random_next(heap); + size_t current = r % slice_count; counts[current]--; mi_block_t* const free_start = blocks[current]; - // and iterate through the rest - uintptr_t rnd = heap->random; + // and iterate through the rest; use `random_shuffle` for performance + uintptr_t rnd = _mi_random_shuffle(r); for (size_t i = 1; i < extend; i++) { // call random_shuffle only every INTPTR_SIZE rounds const size_t round = i%MI_INTPTR_SIZE; @@ -499,8 +500,7 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co } // prepend to the free list (usually NULL) mi_block_set_next(page, blocks[current], page->free); // end of the list - page->free = free_start; - heap->random = _mi_random_shuffle(rnd); + page->free = free_start; } static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t extend, mi_stats_t* const stats) @@ -608,7 +608,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(page_size / block_size < (1L<<16)); page->reserved = (uint16_t)(page_size / block_size); #ifdef MI_ENCODE_FREELIST - page->cookie = _mi_heap_random(heap) | 1; + page->cookie = _mi_heap_random_next(heap) | 1; #endif page->is_zero = page->is_zero_init; @@ -710,7 +710,7 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) { mi_page_queue_t* pq = mi_page_queue(heap,size); mi_page_t* page = pq->first; if (page != NULL) { - if ((MI_SECURE >= 3) && page->capacity < page->reserved && ((_mi_heap_random(heap) & 1) == 1)) { + if ((MI_SECURE >= 3) && page->capacity < page->reserved && ((_mi_heap_random_next(heap) & 1) == 1)) { // in secure mode, we extend half the time to increase randomness mi_page_extend_free(heap, page, heap->tld); mi_assert_internal(mi_page_immediate_available(page)); diff --git a/src/random.c b/src/random.c new file mode 100644 index 00000000..063633ff --- /dev/null +++ b/src/random.c @@ -0,0 +1,290 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2019, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#include "mimalloc.h" +#include "mimalloc-internal.h" + +#include // memset + +/* ---------------------------------------------------------------------------- +We use our own PRNG to keep predictable performance of random number generation +and to avoid implementations that use a lock. We only use the OS provided +random source to initialize the initial seeds. Since we do not need ultimate +performance but we do rely on the security (for secret cookies in secure mode) +we use a cryptographically secure generator (chacha20). +-----------------------------------------------------------------------------*/ + +#define MI_CHACHA_ROUNDS (20) // perhaps use 12 for better performance? + + +/* ---------------------------------------------------------------------------- +Chacha20 implementation as the original algorithm with a 64-bit nonce +and counter: https://en.wikipedia.org/wiki/Salsa20 +The input matrix has sixteen 32-bit values: +Position 0 to 3: constant key +Position 4 to 11: the key +Position 12 to 13: the counter. +Position 14 to 15: the nonce. + +The implementation uses regular C code which compiles very well on modern compilers. +(gcc x64 has no register spills, and clang 6+ uses SSE instructions) +-----------------------------------------------------------------------------*/ + +static inline uint32_t rotl(uint32_t x, uint32_t shift) { + return (x << shift) | (x >> (32 - shift)); +} + +static inline void qround(uint32_t x[16], size_t a, size_t b, size_t c, size_t d) { + x[a] += x[b]; x[d] = rotl(x[d] ^ x[a], 16); + x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 12); + x[a] += x[b]; x[d] = rotl(x[d] ^ x[a], 8); + x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 7); +} + +static void chacha_block(mi_random_ctx_t* r) +{ + // scramble into `x` + uint32_t x[16]; + for (size_t i = 0; i < 16; i++) { + x[i] = r->input[i]; + } + for (size_t i = 0; i < MI_CHACHA_ROUNDS; i += 2) { + qround(x, 0, 4, 8, 12); + qround(x, 1, 5, 9, 13); + qround(x, 2, 6, 10, 14); + qround(x, 3, 7, 11, 15); + qround(x, 0, 5, 10, 15); + qround(x, 1, 6, 11, 12); + qround(x, 2, 7, 8, 13); + qround(x, 3, 4, 9, 14); + } + + // add scrambled data to the initial state + for (size_t i = 0; i < 16; i++) { + r->output[i] = x[i] + r->input[i]; + } + r->output_available = 16; + + // increment the counter for the next round + r->input[12] += 1; + if (r->input[12] == 0) { + r->input[13] += 1; + if (r->input[13] == 0) { // and keep increasing into the nonce + r->input[14] += 1; + } + } +} + +static uint32_t chacha_next32(mi_random_ctx_t* r) { + if (r->output_available <= 0) { + chacha_block(r); + r->output_available = 16; // (assign again to suppress static analysis warning) + } + r->output_available--; + const uint32_t x = r->output[r->output_available]; + r->output[r->output_available] = 0; // reset once the data is handed out + return x; +} + +static inline uint32_t read32(const uint8_t* p, size_t idx32) { + const size_t i = 4*idx32; + return ((uint32_t)p[i+0] | (uint32_t)p[i+1] << 8 | (uint32_t)p[i+2] << 16 | (uint32_t)p[i+3] << 24); +} + +static void chacha_init(mi_random_ctx_t* r, const uint8_t key[32], uint64_t nonce) +{ + // since we only use chacha for randomness (and not encryption) we + // do not _need_ to read 32-bit values as little endian but we do anyways + // just for being compatible :-) + memset(r, 0, sizeof(*r)); + for (size_t i = 0; i < 4; i++) { + const uint8_t* sigma = (uint8_t*)"expand 32-byte k"; + r->input[i] = read32(sigma,i); + } + for (size_t i = 0; i < 8; i++) { + r->input[i + 4] = read32(key,i); + } + r->input[12] = 0; + r->input[13] = 0; + r->input[14] = (uint32_t)nonce; + r->input[15] = (uint32_t)(nonce >> 32); +} + +static void chacha_split(mi_random_ctx_t* r, uint64_t nonce, mi_random_ctx_t* init) { + memset(init, 0, sizeof(*init)); + memcpy(init->input, r->input, sizeof(init->input)); + init->input[12] = 0; + init->input[13] = 0; + init->input[14] = (uint32_t)nonce; + init->input[15] = (uint32_t)(nonce >> 32); + mi_assert_internal(r->input[14] != init->input[14] || r->input[15] != init->input[15]); // do not reuse nonces! + chacha_block(init); +} + + +/* ---------------------------------------------------------------------------- +Random interface +-----------------------------------------------------------------------------*/ + +#if MI_DEBUG>1 +static bool mi_random_is_initialized(mi_random_ctx_t* ctx) { + return (ctx != NULL && ctx->input[0] != 0); +} +#endif + +void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx) { + mi_assert_internal(mi_random_is_initialized(ctx)); + mi_assert_internal(ctx != new_ctx); + chacha_split(ctx, (uintptr_t)new_ctx /*nonce*/, new_ctx); +} + +uintptr_t _mi_random_next(mi_random_ctx_t* ctx) { + mi_assert_internal(mi_random_is_initialized(ctx)); + #if MI_INTPTR_SIZE <= 4 + return chacha_next32(ctx); + #elif MI_INTPTR_SIZE == 8 + return (((uintptr_t)chacha_next32(ctx) << 32) | chacha_next32(ctx)); + #else + # error "define mi_random_next for this platform" + #endif +} + + +/* ---------------------------------------------------------------------------- +To initialize a fresh random context we rely on the OS: +- windows: BCryptGenRandom +- bsd,wasi: arc4random_buf +- linux: getrandom +If we cannot get good randomness, we fall back to weak randomness based on a timer and ASLR. +-----------------------------------------------------------------------------*/ + +#if defined(_WIN32) +#pragma comment (lib,"bcrypt.lib") +#include +static bool os_random_buf(void* buf, size_t buf_len) { + return (BCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0); +} +/* +#define SystemFunction036 NTAPI SystemFunction036 +#include +#undef SystemFunction036 +static bool os_random_buf(void* buf, size_t buf_len) { + RtlGenRandom(buf, (ULONG)buf_len); + return true; +} +*/ +#elif defined(ANDROID) || defined(XP_DARWIN) || defined(__DragonFly__) || \ + defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ + defined(__wasi__) +#include +static bool os_random_buf(void* buf, size_t buf_len) { + arc4random_buf(buf, buf_len); + return true; +} +#elif defined(__linux__) +#include +static bool os_random_buf(void* buf, size_t buf_len) { + return (getrandom(buf, buf_len, GRND_NONBLOCK) == (ssize_t)buf_len); +} +#else +static bool os_random_buf(void* buf, size_t buf_len) { + return false; +} +#endif + +#if defined(_WIN32) +#include +#elif defined(__APPLE__) +#include +#else +#include +#endif + +static uintptr_t os_random_weak(uintptr_t extra_seed) { + uintptr_t x = (uintptr_t)&os_random_weak ^ extra_seed; // ASLR makes the address random + #if defined(_WIN32) + LARGE_INTEGER pcount; + QueryPerformanceCounter(&pcount); + x ^= (uintptr_t)(pcount.QuadPart); + #elif defined(__APPLE__) + x ^= (uintptr_t)mach_absolute_time(); + #else + struct timespec time; + clock_gettime(CLOCK_MONOTONIC, &time); + x ^= (uintptr_t)time.tv_sec; + x ^= (uintptr_t)time.tv_nsec; + #endif + // and do a few randomization steps + uintptr_t max = ((x ^ (x >> 17)) & 0x0F) + 1; + for (uintptr_t i = 0; i < max; i++) { + x = _mi_random_shuffle(x); + } + mi_assert_internal(x != 0); + return x; +} + +void _mi_random_init(mi_random_ctx_t* ctx) { + uint8_t key[32]; + if (!os_random_buf(key, sizeof(key))) { + // if we fail to get random data from the OS, we fall back to a + // weak random source based on the current time + uintptr_t x = os_random_weak(0); + for (size_t i = 0; i < 8; i++) { // key is eight 32-bit words. + _mi_warning_message("unable to use secure randomness\n"); + x = _mi_random_shuffle(x); + ((uint32_t*)key)[i] = (uint32_t)x; + } + } + chacha_init(ctx, key, (uintptr_t)ctx /*nonce*/ ); +} + +/* -------------------------------------------------------- +test vectors from +----------------------------------------------------------- */ +/* +static bool array_equals(uint32_t* x, uint32_t* y, size_t n) { + for (size_t i = 0; i < n; i++) { + if (x[i] != y[i]) return false; + } + return true; +} +static void chacha_test(void) +{ + uint32_t x[4] = { 0x11111111, 0x01020304, 0x9b8d6f43, 0x01234567 }; + uint32_t x_out[4] = { 0xea2a92f4, 0xcb1cf8ce, 0x4581472e, 0x5881c4bb }; + qround(x, 0, 1, 2, 3); + mi_assert_internal(array_equals(x, x_out, 4)); + + uint32_t y[16] = { + 0x879531e0, 0xc5ecf37d, 0x516461b1, 0xc9a62f8a, + 0x44c20ef3, 0x3390af7f, 0xd9fc690b, 0x2a5f714c, + 0x53372767, 0xb00a5631, 0x974c541a, 0x359e9963, + 0x5c971061, 0x3d631689, 0x2098d9d6, 0x91dbd320 }; + uint32_t y_out[16] = { + 0x879531e0, 0xc5ecf37d, 0xbdb886dc, 0xc9a62f8a, + 0x44c20ef3, 0x3390af7f, 0xd9fc690b, 0xcfacafd2, + 0xe46bea80, 0xb00a5631, 0x974c541a, 0x359e9963, + 0x5c971061, 0xccc07c79, 0x2098d9d6, 0x91dbd320 }; + qround(y, 2, 7, 8, 13); + mi_assert_internal(array_equals(y, y_out, 16)); + + mi_random_ctx_t r = { + { 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574, + 0x03020100, 0x07060504, 0x0b0a0908, 0x0f0e0d0c, + 0x13121110, 0x17161514, 0x1b1a1918, 0x1f1e1d1c, + 0x00000001, 0x09000000, 0x4a000000, 0x00000000 }, + {0}, + 0 + }; + uint32_t r_out[16] = { + 0xe4e7f110, 0x15593bd1, 0x1fdd0f50, 0xc47120a3, + 0xc7f4d1c7, 0x0368c033, 0x9aaa2204, 0x4e6cd4c3, + 0x466482d2, 0x09aa9f07, 0x05d7c214, 0xa2028bd9, + 0xd19c12b5, 0xb94e16de, 0xe883d0cb, 0x4e3c50a2 }; + chacha_block(&r); + mi_assert_internal(array_equals(r.output, r_out, 16)); +} +*/ \ No newline at end of file diff --git a/src/static.c b/src/static.c index d31fca8f..0519453e 100644 --- a/src/static.c +++ b/src/static.c @@ -14,6 +14,7 @@ terms of the MIT license. A copy of the license can be found in the file // it will override all the standard library allocation // functions (on Unix's). #include "stats.c" +#include "random.c" #include "os.c" #include "arena.c" #include "memory.c" From e05a1edc038477574ee5c1e4ea00f0a7b9ab9e67 Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 24 Dec 2019 10:32:44 -0800 Subject: [PATCH 064/179] fix large OS page size on Linux (issue #184, due to fix for #179) --- src/os.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/os.c b/src/os.c index 8f5afc5b..54088f83 100644 --- a/src/os.c +++ b/src/os.c @@ -171,9 +171,7 @@ void _mi_os_init() { os_page_size = (size_t)result; os_alloc_granularity = os_page_size; } - if (mi_option_is_enabled(mi_option_large_os_pages)) { - large_os_page_size = (1UL << 21); // 2MiB - } + large_os_page_size = 2*MiB; // TODO: can we query the OS for this? } #endif From 49acc88924c7afd6a00c0836231e8923769fbe26 Mon Sep 17 00:00:00 2001 From: Daan Date: Tue, 24 Dec 2019 10:38:13 -0800 Subject: [PATCH 065/179] Update readme.md --- readme.md | 1 + 1 file changed, 1 insertion(+) diff --git a/readme.md b/readme.md index 9d3974c9..32332c08 100644 --- a/readme.md +++ b/readme.md @@ -56,6 +56,7 @@ Enjoy! ### Releases +* 2019-12-22, `v1.2.2`: stable release 1.2: minor updates. * 2019-11-22, `v1.2.0`: stable release 1.2: bug fixes, improved secure mode (free list corruption checks, double free mitigation). Improved dynamic overriding on Windows. * 2019-10-07, `v1.1.0`: stable release 1.1. * 2019-09-01, `v1.0.8`: pre-release 8: more robust windows dynamic overriding, initial huge page support. From ce02986d56cb69dd2f2d2b1a5c25260338665957 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 27 Dec 2019 22:30:23 -0800 Subject: [PATCH 066/179] variable renaming --- src/random.c | 72 ++++++++++++++++++++++++++-------------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/src/random.c b/src/random.c index 063633ff..43e7dd5c 100644 --- a/src/random.c +++ b/src/random.c @@ -44,12 +44,12 @@ static inline void qround(uint32_t x[16], size_t a, size_t b, size_t c, size_t d x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 7); } -static void chacha_block(mi_random_ctx_t* r) +static void chacha_block(mi_random_ctx_t* ctx) { // scramble into `x` uint32_t x[16]; for (size_t i = 0; i < 16; i++) { - x[i] = r->input[i]; + x[i] = ctx->input[i]; } for (size_t i = 0; i < MI_CHACHA_ROUNDS; i += 2) { qround(x, 0, 4, 8, 12); @@ -64,28 +64,28 @@ static void chacha_block(mi_random_ctx_t* r) // add scrambled data to the initial state for (size_t i = 0; i < 16; i++) { - r->output[i] = x[i] + r->input[i]; + ctx->output[i] = x[i] + ctx->input[i]; } - r->output_available = 16; + ctx->output_available = 16; // increment the counter for the next round - r->input[12] += 1; - if (r->input[12] == 0) { - r->input[13] += 1; - if (r->input[13] == 0) { // and keep increasing into the nonce - r->input[14] += 1; + ctx->input[12] += 1; + if (ctx->input[12] == 0) { + ctx->input[13] += 1; + if (ctx->input[13] == 0) { // and keep increasing into the nonce + ctx->input[14] += 1; } } } -static uint32_t chacha_next32(mi_random_ctx_t* r) { - if (r->output_available <= 0) { - chacha_block(r); - r->output_available = 16; // (assign again to suppress static analysis warning) +static uint32_t chacha_next32(mi_random_ctx_t* ctx) { + if (ctx->output_available <= 0) { + chacha_block(ctx); + ctx->output_available = 16; // (assign again to suppress static analysis warning) } - r->output_available--; - const uint32_t x = r->output[r->output_available]; - r->output[r->output_available] = 0; // reset once the data is handed out + const uint32_t x = ctx->output[16 - ctx->output_available]; + ctx->output[16 - ctx->output_available] = 0; // reset once the data is handed out + ctx->output_available--; return x; } @@ -94,34 +94,34 @@ static inline uint32_t read32(const uint8_t* p, size_t idx32) { return ((uint32_t)p[i+0] | (uint32_t)p[i+1] << 8 | (uint32_t)p[i+2] << 16 | (uint32_t)p[i+3] << 24); } -static void chacha_init(mi_random_ctx_t* r, const uint8_t key[32], uint64_t nonce) +static void chacha_init(mi_random_ctx_t* ctx, const uint8_t key[32], uint64_t nonce) { // since we only use chacha for randomness (and not encryption) we // do not _need_ to read 32-bit values as little endian but we do anyways // just for being compatible :-) - memset(r, 0, sizeof(*r)); + memset(ctx, 0, sizeof(*ctx)); for (size_t i = 0; i < 4; i++) { const uint8_t* sigma = (uint8_t*)"expand 32-byte k"; - r->input[i] = read32(sigma,i); + ctx->input[i] = read32(sigma,i); } for (size_t i = 0; i < 8; i++) { - r->input[i + 4] = read32(key,i); + ctx->input[i + 4] = read32(key,i); } - r->input[12] = 0; - r->input[13] = 0; - r->input[14] = (uint32_t)nonce; - r->input[15] = (uint32_t)(nonce >> 32); + ctx->input[12] = 0; + ctx->input[13] = 0; + ctx->input[14] = (uint32_t)nonce; + ctx->input[15] = (uint32_t)(nonce >> 32); } -static void chacha_split(mi_random_ctx_t* r, uint64_t nonce, mi_random_ctx_t* init) { - memset(init, 0, sizeof(*init)); - memcpy(init->input, r->input, sizeof(init->input)); - init->input[12] = 0; - init->input[13] = 0; - init->input[14] = (uint32_t)nonce; - init->input[15] = (uint32_t)(nonce >> 32); - mi_assert_internal(r->input[14] != init->input[14] || r->input[15] != init->input[15]); // do not reuse nonces! - chacha_block(init); +static void chacha_split(mi_random_ctx_t* ctx, uint64_t nonce, mi_random_ctx_t* ctx_new) { + memset(ctx_new, 0, sizeof(*ctx_new)); + memcpy(ctx_new->input, ctx->input, sizeof(ctx_new->input)); + ctx_new->input[12] = 0; + ctx_new->input[13] = 0; + ctx_new->input[14] = (uint32_t)nonce; + ctx_new->input[15] = (uint32_t)(nonce >> 32); + mi_assert_internal(ctx->input[14] != ctx_new->input[14] || ctx->input[15] != ctx_new->input[15]); // do not reuse nonces! + chacha_block(ctx_new); } @@ -135,10 +135,10 @@ static bool mi_random_is_initialized(mi_random_ctx_t* ctx) { } #endif -void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx) { +void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* ctx_new) { mi_assert_internal(mi_random_is_initialized(ctx)); - mi_assert_internal(ctx != new_ctx); - chacha_split(ctx, (uintptr_t)new_ctx /*nonce*/, new_ctx); + mi_assert_internal(ctx != ctx_new); + chacha_split(ctx, (uintptr_t)ctx_new /*nonce*/, ctx_new); } uintptr_t _mi_random_next(mi_random_ctx_t* ctx) { From e3391d9a53c66f922c6e0ac12df4723701a05110 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 27 Dec 2019 23:33:50 -0800 Subject: [PATCH 067/179] stronger encoding of free lists using two keys per page --- include/mimalloc-internal.h | 58 +++++++++++++++++++++++++------------ include/mimalloc-types.h | 17 ++++++----- src/alloc.c | 8 ++--- src/heap.c | 2 ++ src/init.c | 30 ++++++++++++------- src/page.c | 14 ++++----- src/random.c | 2 +- src/segment.c | 2 +- 8 files changed, 83 insertions(+), 50 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index e648c1ff..cdaac963 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -392,12 +392,28 @@ static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) { } -// ------------------------------------------------------------------- -// Encoding/Decoding the free list next pointers -// Note: we pass a `null` value to be used as the `NULL` value for the -// end of a free list. This is to prevent the cookie itself to ever -// be present among user blocks (as `cookie^0==cookie`). -// ------------------------------------------------------------------- +/* ------------------------------------------------------------------- +Encoding/Decoding the free list next pointers + +This is to protect against buffer overflow exploits where the +free list is mutated. Many hardened allocators xor the next pointer `p` +with a secret key `k1`, as `p^k1`, but if the attacker can guess +the pointer `p` this can reveal `k1` (since `p^k1^p == k1`). +Moreover, if multiple blocks can be read, the attacker can +xor both as `(p1^k1) ^ (p2^k1) == p1^p2` which may reveal a lot +about the pointers (and subsequently `k1`). + +Instead mimalloc uses an extra key `k2` and encode as `rotl(p+k2,13)^k1`. +Since these operations are not associative, the above approaches do not +work so well any more even if the `p` can be guesstimated. (We include +the rotation since xor and addition are otherwise linear in the lowest bit) +Both keys are unique per page. + +We also pass a separate `null` value to be used as `NULL` or otherwise +`rotl(k2,13)^k1` would appear (too) often as a sentinel value. +------------------------------------------------------------------- */ + +#define MI_ENCODE_ROTATE_BITS (13) static inline bool mi_is_in_same_segment(const void* p, const void* q) { return (_mi_ptr_segment(p) == _mi_ptr_segment(q)); @@ -412,49 +428,55 @@ static inline bool mi_is_in_same_page(const void* p, const void* q) { return (idxp == idxq); } -static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, uintptr_t cookie ) { +static inline uintptr_t mi_rotl(uintptr_t x, uintptr_t shift) { + return ((x << shift) | (x >> (MI_INTPTR_BITS - shift))); +} +static inline uintptr_t mi_rotr(uintptr_t x, uintptr_t shift) { + return ((x >> shift) | (x << (MI_INTPTR_BITS - shift))); +} +static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, uintptr_t key1, uintptr_t key2 ) { #ifdef MI_ENCODE_FREELIST - mi_block_t* b = (mi_block_t*)(block->next ^ cookie); + mi_block_t* b = (mi_block_t*)(mi_rotr(block->next ^ key1, MI_ENCODE_ROTATE_BITS) - key2); if (mi_unlikely((void*)b==null)) { b = NULL; } return b; #else - UNUSED(cookie); UNUSED(null); + UNUSED(key1); UNUSED(key2); UNUSED(null); return (mi_block_t*)block->next; #endif } -static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, uintptr_t cookie) { +static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, uintptr_t key1, uintptr_t key2) { #ifdef MI_ENCODE_FREELIST if (mi_unlikely(next==NULL)) { next = (mi_block_t*)null; } - block->next = (mi_encoded_t)next ^ cookie; + block->next = mi_rotl((mi_encoded_t)next + key2, MI_ENCODE_ROTATE_BITS) ^ key1; #else - UNUSED(cookie); UNUSED(null); + UNUSED(key1); UNUSED(key2); UNUSED(null); block->next = (mi_encoded_t)next; #endif } static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* block) { #ifdef MI_ENCODE_FREELIST - mi_block_t* next = mi_block_nextx(page,block,page->cookie); - // check for free list corruption: is `next` at least in our segment range? + mi_block_t* next = mi_block_nextx(page,block,page->key[0],page->key[1]); + // check for free list corruption: is `next` at least in the same page? // TODO: check if `next` is `page->block_size` aligned? - if (next!=NULL && !mi_is_in_same_page(block, next)) { + if (mi_unlikely(next!=NULL && !mi_is_in_same_page(block, next))) { _mi_fatal_error("corrupted free list entry of size %zub at %p: value 0x%zx\n", page->block_size, block, (uintptr_t)next); next = NULL; } return next; #else UNUSED(page); - return mi_block_nextx(page,block,0); + return mi_block_nextx(page,block,0,0); #endif } static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, const mi_block_t* next) { #ifdef MI_ENCODE_FREELIST - mi_block_set_nextx(page,block,next, page->cookie); + mi_block_set_nextx(page,block,next, page->key[0], page->key[1]); #else UNUSED(page); - mi_block_set_nextx(page,block, next,0); + mi_block_set_nextx(page,block, next,0,0); #endif } diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 1360c125..ab7d7c53 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -191,7 +191,7 @@ typedef struct mi_page_s { mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) #ifdef MI_ENCODE_FREELIST - uintptr_t cookie; // random cookie to encode the free lists + uintptr_t key[2]; // two random keys to encode the free lists (see `_mi_block_next`) #endif size_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`) @@ -206,9 +206,9 @@ typedef struct mi_page_s { struct mi_page_s* prev; // previous page owned by this thread with the same `block_size` // improve page index calculation - // without padding: 10 words on 64-bit, 11 on 32-bit. Secure adds one word - #if (MI_INTPTR_SIZE==8 && defined(MI_ENCODE_FREELIST)) || (MI_INTPTR_SIZE==4 && !defined(MI_ENCODE_FREELIST)) - void* padding[1]; // 12 words on 64-bit with cookie, 12 words on 32-bit plain + // without padding: 10 words on 64-bit, 11 on 32-bit. Secure adds two words + #if (MI_INTPTR_SIZE==4) + void* padding[1]; // 12/14 words on 32-bit plain #endif } mi_page_t; @@ -239,8 +239,8 @@ typedef struct mi_segment_s { size_t capacity; // count of available pages (`#free + used`) size_t segment_size;// for huge pages this may be different from `MI_SEGMENT_SIZE` size_t segment_info_size; // space we are using from the first page for segment meta-data and possible guard pages. - uintptr_t cookie; // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie` - + uintptr_t cookie; // verify addresses in secure mode: `_mi_ptr_cookie(segment) == segment->cookie` + // layout like this to optimize access in `mi_free` size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`). volatile _Atomic(uintptr_t) thread_id; // unique id of the thread owning this segment @@ -289,8 +289,9 @@ struct mi_heap_s { mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin") volatile _Atomic(mi_block_t*) thread_delayed_free; uintptr_t thread_id; // thread this heap belongs too - uintptr_t cookie; - mi_random_ctx_t random; // random number used for secure allocation + uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`) + uintptr_t key[2]; // twb random keys used to encode the `thread_delayed_free` list + mi_random_ctx_t random; // random number context used for secure allocation size_t page_count; // total number of pages in the `pages` queues. bool no_reclaim; // `true` if this heap should not reclaim abandoned pages }; diff --git a/src/alloc.c b/src/alloc.c index e68b48d2..714acc76 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -157,7 +157,7 @@ static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, con } static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) { - mi_block_t* n = mi_block_nextx(page, block, page->cookie); // pretend it is freed, and get the decoded first field + mi_block_t* n = mi_block_nextx(page, block, page->key[0], page->key[1]); // pretend it is freed, and get the decoded first field if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer? (n==NULL || mi_is_in_same_segment(block, n))) // quick check: in same segment or NULL? { @@ -242,7 +242,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc mi_block_t* dfree; do { dfree = (mi_block_t*)heap->thread_delayed_free; - mi_block_set_nextx(heap,block,dfree, heap->cookie); + mi_block_set_nextx(heap,block,dfree, heap->key[0], heap->key[1]); } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), block, dfree)); } @@ -266,7 +266,7 @@ static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block // and push it on the free list if (mi_likely(local)) { // owning thread can free a block directly - if (mi_check_is_double_free(page, block)) return; + if (mi_unlikely(mi_check_is_double_free(page, block))) return; mi_block_set_next(page, block, page->local_free); page->local_free = block; page->used--; @@ -341,7 +341,7 @@ void mi_free(void* p) mi_attr_noexcept if (mi_likely(tid == segment->thread_id && page->flags.full_aligned == 0)) { // the thread id matches and it is not a full page, nor has aligned blocks // local, and not full or aligned mi_block_t* block = (mi_block_t*)p; - if (mi_check_is_double_free(page,block)) return; + if (mi_unlikely(mi_check_is_double_free(page,block))) return; mi_block_set_next(page, block, page->local_free); page->local_free = block; page->used--; diff --git a/src/heap.c b/src/heap.c index 6d6948df..f90c4624 100644 --- a/src/heap.c +++ b/src/heap.c @@ -193,6 +193,8 @@ mi_heap_t* mi_heap_new(void) { heap->thread_id = _mi_thread_id(); _mi_random_split(&bheap->random, &heap->random); heap->cookie = _mi_heap_random_next(heap) | 1; + heap->key[0] = _mi_heap_random_next(heap); + heap->key[1] = _mi_heap_random_next(heap); heap->no_reclaim = true; // don't reclaim abandoned pages or otherwise destroy is unsafe return heap; } diff --git a/src/init.c b/src/init.c index 768bc2bf..cadcd2a3 100644 --- a/src/init.c +++ b/src/init.c @@ -16,13 +16,13 @@ const mi_page_t _mi_page_empty = { { 0 }, false, NULL, // free #if MI_ENCODE_FREELIST - 0, + { 0, 0 }, #endif 0, // used NULL, ATOMIC_VAR_INIT(0), ATOMIC_VAR_INIT(0), 0, NULL, NULL, NULL - #if (MI_INTPTR_SIZE==8 && defined(MI_ENCODE_FREELIST)) || (MI_INTPTR_SIZE==4 && !defined(MI_ENCODE_FREELIST)) + #if (MI_INTPTR_SIZE==4) , { NULL } // padding #endif }; @@ -83,8 +83,9 @@ const mi_heap_t _mi_heap_empty = { MI_SMALL_PAGES_EMPTY, MI_PAGE_QUEUES_EMPTY, ATOMIC_VAR_INIT(NULL), - 0, - 0, + 0, // tid + 0, // cookie + { 0, 0 }, // keys { {0}, {0}, 0 }, 0, false @@ -105,18 +106,21 @@ static mi_tld_t tld_main = { { MI_STATS_NULL } // stats }; +#if MI_INTPTR_SIZE==8 +#define MI_INIT_COOKIE (0xCDCDCDCDCDCDCDCDUL) +#else +#define MI_INIT_COOKIE (0xCDCDCDCDUL) +#endif + mi_heap_t _mi_heap_main = { &tld_main, MI_SMALL_PAGES_EMPTY, MI_PAGE_QUEUES_EMPTY, NULL, - 0, // thread id -#if MI_INTPTR_SIZE==8 // the cookie of the main heap can be fixed (unlike page cookies that need to be secure!) - 0xCDCDCDCDCDCDCDCDUL, -#else - 0xCDCDCDCDUL, -#endif - { {0}, {0}, 0 }, // random + 0, // thread id + MI_INIT_COOKIE, // initial cookie + { MI_INIT_COOKIE, MI_INIT_COOKIE }, // the key of the main heap can be fixed (unlike page keys that need to be secure!) + { {0}, {0}, 0 }, // random 0, // page count false // can reclaim }; @@ -156,6 +160,8 @@ static bool _mi_heap_init(void) { heap->thread_id = _mi_thread_id(); _mi_random_init(&heap->random); heap->cookie = _mi_heap_random_next(heap) | 1; + heap->key[0] = _mi_heap_random_next(heap); + heap->key[1] = _mi_heap_random_next(heap); heap->tld = tld; memset(tld, 0, sizeof(*tld)); tld->heap_backing = heap; @@ -399,6 +405,8 @@ void mi_process_init(void) mi_attr_noexcept { _mi_random_init(&_mi_heap_main.random); #ifndef __APPLE__ // TODO: fix this? cannot update cookie if allocation already happened.. _mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main); + _mi_heap_main.key[0] = _mi_heap_random_next(&_mi_heap_main); + _mi_heap_main.key[1] = _mi_heap_random_next(&_mi_heap_main); #endif mi_process_setup_auto_thread_done(); _mi_os_init(); diff --git a/src/page.c b/src/page.c index 471dca97..901fbda1 100644 --- a/src/page.c +++ b/src/page.c @@ -103,7 +103,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) { bool _mi_page_is_valid(mi_page_t* page) { mi_assert_internal(mi_page_is_valid_init(page)); #if MI_SECURE - mi_assert_internal(page->cookie != 0); + mi_assert_internal(page->key != 0); #endif if (page->heap!=NULL) { mi_segment_t* segment = _mi_page_segment(page); @@ -284,7 +284,7 @@ void _mi_heap_delayed_free(mi_heap_t* heap) { // and free them all while(block != NULL) { - mi_block_t* next = mi_block_nextx(heap,block, heap->cookie); + mi_block_t* next = mi_block_nextx(heap,block, heap->key[0], heap->key[1]); // use internal free instead of regular one to keep stats etc correct if (!_mi_free_delayed_block(block)) { // we might already start delayed freeing while another thread has not yet @@ -292,9 +292,8 @@ void _mi_heap_delayed_free(mi_heap_t* heap) { mi_block_t* dfree; do { dfree = (mi_block_t*)heap->thread_delayed_free; - mi_block_set_nextx(heap, block, dfree, heap->cookie); + mi_block_set_nextx(heap, block, dfree, heap->key[0], heap->key[1]); } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), block, dfree)); - } block = next; } @@ -357,7 +356,7 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) { #if MI_DEBUG>1 // check there are no references left.. - for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->cookie)) { + for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->key[0], pheap->key[1])) { mi_assert_internal(_mi_ptr_page(block) != page); } #endif @@ -608,7 +607,8 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(page_size / block_size < (1L<<16)); page->reserved = (uint16_t)(page_size / block_size); #ifdef MI_ENCODE_FREELIST - page->cookie = _mi_heap_random_next(heap) | 1; + page->key[0] = _mi_heap_random_next(heap); + page->key[1] = _mi_heap_random_next(heap); #endif page->is_zero = page->is_zero_init; @@ -621,7 +621,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(page->prev == NULL); mi_assert_internal(!mi_page_has_aligned(page)); #if (MI_ENCODE_FREELIST) - mi_assert_internal(page->cookie != 0); + mi_assert_internal(page->key != 0); #endif mi_assert_expensive(mi_page_is_valid_init(page)); diff --git a/src/random.c b/src/random.c index 43e7dd5c..af6cd876 100644 --- a/src/random.c +++ b/src/random.c @@ -231,9 +231,9 @@ void _mi_random_init(mi_random_ctx_t* ctx) { if (!os_random_buf(key, sizeof(key))) { // if we fail to get random data from the OS, we fall back to a // weak random source based on the current time + _mi_warning_message("unable to use secure randomness\n"); uintptr_t x = os_random_weak(0); for (size_t i = 0; i < 8; i++) { // key is eight 32-bit words. - _mi_warning_message("unable to use secure randomness\n"); x = _mi_random_shuffle(x); ((uint32_t*)key)[i] = (uint32_t)x; } diff --git a/src/segment.c b/src/segment.c index f6ce939b..bbe88f82 100644 --- a/src/segment.c +++ b/src/segment.c @@ -520,7 +520,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, segment->segment_size = segment_size; segment->segment_info_size = pre_size; segment->thread_id = _mi_thread_id(); - segment->cookie = _mi_ptr_cookie(segment); + segment->cookie = _mi_ptr_cookie(segment); // _mi_stat_increase(&tld->stats->page_committed, segment->segment_info_size); // set protection From 77134e1ad072aa3bf3fd5e225f58ae88b48db589 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 28 Dec 2019 15:17:49 -0800 Subject: [PATCH 068/179] update free list encoding to stronger formula with addition last --- include/mimalloc-internal.h | 29 +++++++++++++++++------------ src/page.c | 2 +- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index cdaac963..d41dfadc 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -397,24 +397,26 @@ Encoding/Decoding the free list next pointers This is to protect against buffer overflow exploits where the free list is mutated. Many hardened allocators xor the next pointer `p` -with a secret key `k1`, as `p^k1`, but if the attacker can guess +with a secret key `k1`, as `p^k1`. This prevents overwriting with known +values but might be still too weak: if the attacker can guess the pointer `p` this can reveal `k1` (since `p^k1^p == k1`). -Moreover, if multiple blocks can be read, the attacker can +Moreover, if multiple blocks can be read as well, the attacker can xor both as `(p1^k1) ^ (p2^k1) == p1^p2` which may reveal a lot about the pointers (and subsequently `k1`). -Instead mimalloc uses an extra key `k2` and encode as `rotl(p+k2,13)^k1`. +Instead mimalloc uses an extra key `k2` and encodes as `((p^k2)<<> (MI_INTPTR_BITS - shift))); } static inline uintptr_t mi_rotr(uintptr_t x, uintptr_t shift) { + shift %= MI_INTPTR_BITS; return ((x >> shift) | (x << (MI_INTPTR_BITS - shift))); } + static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, uintptr_t key1, uintptr_t key2 ) { #ifdef MI_ENCODE_FREELIST - mi_block_t* b = (mi_block_t*)(mi_rotr(block->next ^ key1, MI_ENCODE_ROTATE_BITS) - key2); + mi_block_t* b = (mi_block_t*)(mi_rotr(block->next - key1, key1) ^ key2); if (mi_unlikely((void*)b==null)) { b = NULL; } return b; #else @@ -448,7 +453,7 @@ static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* bl static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, uintptr_t key1, uintptr_t key2) { #ifdef MI_ENCODE_FREELIST if (mi_unlikely(next==NULL)) { next = (mi_block_t*)null; } - block->next = mi_rotl((mi_encoded_t)next + key2, MI_ENCODE_ROTATE_BITS) ^ key1; + block->next = mi_rotl((uintptr_t)next ^ key2, key1) + key1; #else UNUSED(key1); UNUSED(key2); UNUSED(null); block->next = (mi_encoded_t)next; @@ -485,7 +490,7 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c // ------------------------------------------------------------------- static inline uintptr_t _mi_random_shuffle(uintptr_t x) { - mi_assert_internal(x!=0); + if (x==0) { x = 17; } // ensure we don't get stuck in generating zeros #if (MI_INTPTR_SIZE==8) // by Sebastiano Vigna, see: x ^= x >> 30; diff --git a/src/page.c b/src/page.c index 901fbda1..b070e56a 100644 --- a/src/page.c +++ b/src/page.c @@ -479,7 +479,7 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co counts[current]--; mi_block_t* const free_start = blocks[current]; // and iterate through the rest; use `random_shuffle` for performance - uintptr_t rnd = _mi_random_shuffle(r); + uintptr_t rnd = _mi_random_shuffle(r|1); // ensure not 0 for (size_t i = 1; i < extend; i++) { // call random_shuffle only every INTPTR_SIZE rounds const size_t round = i%MI_INTPTR_SIZE; From fc3e537bd4ac6d9ffec0243ec595ed15ca1649b8 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 28 Dec 2019 15:28:13 -0800 Subject: [PATCH 069/179] improve double free detection with faster same page check --- include/mimalloc-types.h | 2 +- src/alloc.c | 26 +++++++++++--------------- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index ab7d7c53..76539bd6 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -29,7 +29,7 @@ terms of the MIT license. A copy of the license can be found in the file // #define MI_SECURE 4 // checks for double free. (may be more expensive) #if !defined(MI_SECURE) -#define MI_SECURE 0 +#define MI_SECURE 4 #endif // Define MI_DEBUG for debug mode diff --git a/src/alloc.c b/src/alloc.c index 714acc76..82d97786 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -140,28 +140,24 @@ static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, cons } static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block, const mi_block_t* n) { - size_t psize; - uint8_t* pstart = _mi_page_start(_mi_page_segment(page), page, &psize); - if (n == NULL || ((uint8_t*)n >= pstart && (uint8_t*)n < (pstart + psize))) { - // Suspicious: the decoded value is in the same page (or NULL). - // Walk the free lists to verify positively if it is already freed - if (mi_list_contains(page, page->free, block) || - mi_list_contains(page, page->local_free, block) || - mi_list_contains(page, (const mi_block_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&page->thread_free)), block)) - { - _mi_fatal_error("double free detected of block %p with size %zu\n", block, page->block_size); - return true; - } + // The decoded value is in the same page (or NULL). + // Walk the free lists to verify positively if it is already freed + if (mi_list_contains(page, page->free, block) || + mi_list_contains(page, page->local_free, block) || + mi_list_contains(page, (const mi_block_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&page->thread_free)), block)) + { + _mi_fatal_error("double free detected of block %p with size %zu\n", block, page->block_size); + return true; } return false; } static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) { mi_block_t* n = mi_block_nextx(page, block, page->key[0], page->key[1]); // pretend it is freed, and get the decoded first field - if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer? - (n==NULL || mi_is_in_same_segment(block, n))) // quick check: in same segment or NULL? + if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer? + (n==NULL || mi_is_in_same_page(block, n))) // quick check: in same page or NULL? { - // Suspicous: decoded value in block is in the same segment (or NULL) -- maybe a double free? + // Suspicous: decoded value a in block is in the same page (or NULL) -- maybe a double free? // (continue in separate function to improve code generation) return mi_check_is_double_freex(page, block, n); } From 1b5a08cd25ee0034942df3d5f67dab2d891ba3c1 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 2 Jan 2020 17:24:32 -0800 Subject: [PATCH 070/179] remove unused parameter in check double free --- src/segment.c | 72 +++++++++++++++++++++++++-------------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/src/segment.c b/src/segment.c index bbe88f82..676df00a 100644 --- a/src/segment.c +++ b/src/segment.c @@ -184,7 +184,7 @@ static void mi_segment_protect(mi_segment_t* segment, bool protect, mi_os_tld_t* mi_segment_protect_range(start, os_page_size, protect); } else { - // or protect every page + // or protect every page const size_t page_size = mi_segment_page_size(segment); for (size_t i = 0; i < segment->capacity; i++) { if (segment->pages[i].is_committed) { @@ -215,8 +215,8 @@ static void mi_page_reset(mi_segment_t* segment, mi_page_t* page, size_t size, m } static void mi_page_unreset(mi_segment_t* segment, mi_page_t* page, size_t size, mi_segments_tld_t* tld) -{ - mi_assert_internal(page->is_reset); +{ + mi_assert_internal(page->is_reset); mi_assert_internal(!segment->mem_is_fixed); page->is_reset = false; size_t psize; @@ -276,14 +276,14 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa } mi_assert_internal((uintptr_t)p % block_size == 0); } - + if (page_size != NULL) *page_size = psize; mi_assert_internal(page->block_size==0 || _mi_ptr_page(p) == page); mi_assert_internal(_mi_ptr_segment(p) == segment); return p; } -static size_t mi_segment_size(size_t capacity, size_t required, size_t* pre_size, size_t* info_size) +static size_t mi_segment_size(size_t capacity, size_t required, size_t* pre_size, size_t* info_size) { const size_t minsize = sizeof(mi_segment_t) + ((capacity - 1) * sizeof(mi_page_t)) + 16 /* padding */; size_t guardsize = 0; @@ -331,16 +331,16 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se mi_assert_internal(!segment->mem_is_fixed); mi_segment_protect(segment, false, tld->os); // ensure no more guard pages are set } - + bool any_reset = false; bool fully_committed = true; for (size_t i = 0; i < segment->capacity; i++) { - mi_page_t* page = &segment->pages[i]; + mi_page_t* page = &segment->pages[i]; if (!page->is_committed) { fully_committed = false; } if (page->is_reset) { any_reset = true; } } - if (any_reset && mi_option_is_enabled(mi_option_reset_decommits)) { - fully_committed = false; + if (any_reset && mi_option_is_enabled(mi_option_reset_decommits)) { + fully_committed = false; } if (segment->page_kind >= MI_PAGE_LARGE && !mi_option_is_enabled(mi_option_eager_page_commit)) { fully_committed = false; @@ -366,13 +366,13 @@ static mi_segment_t* mi_segment_cache_pop(size_t segment_size, mi_segments_tld_t return segment; } -static bool mi_segment_cache_full(mi_segments_tld_t* tld) +static bool mi_segment_cache_full(mi_segments_tld_t* tld) { // if (tld->count == 1 && tld->cache_count==0) return false; // always cache at least the final segment of a thread size_t max_cache = mi_option_get(mi_option_segment_cache); if (tld->cache_count < max_cache && tld->cache_count < (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION)) // at least allow a 1 element cache - ) { + ) { return false; } // take the opportunity to reduce the segment cache if it is too large (now) @@ -387,7 +387,7 @@ static bool mi_segment_cache_full(mi_segments_tld_t* tld) static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(!mi_segment_is_in_free_queue(segment, tld)); - mi_assert_internal(segment->next == NULL); + mi_assert_internal(segment->next == NULL); if (segment->segment_size != MI_SEGMENT_SIZE || mi_segment_cache_full(tld)) { return false; } @@ -434,21 +434,21 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t pre_size; size_t segment_size = mi_segment_size(capacity, required, &pre_size, &info_size); mi_assert_internal(segment_size >= required); - + // Initialize parameters - bool eager_delayed = (page_kind <= MI_PAGE_MEDIUM && tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); - bool eager = !eager_delayed && mi_option_is_enabled(mi_option_eager_commit); + const bool eager_delayed = (page_kind <= MI_PAGE_MEDIUM && tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); + const bool eager = !eager_delayed && mi_option_is_enabled(mi_option_eager_commit); bool commit = eager; // || (page_kind >= MI_PAGE_LARGE); bool pages_still_good = false; bool is_zero = false; - + // Try to get it from our thread local cache first mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld); if (segment != NULL) { if (page_kind <= MI_PAGE_MEDIUM && segment->page_kind == page_kind && segment->segment_size == segment_size) { pages_still_good = true; } - else + else { if (MI_SECURE!=0) { mi_assert_internal(!segment->mem_is_fixed); @@ -458,7 +458,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, // TODO: optimize cache pop to return fitting pages if possible? for (size_t i = 0; i < segment->capacity; i++) { mi_page_t* page = &segment->pages[i]; - if (page->is_reset) { + if (page->is_reset) { if (!commit && mi_option_is_enabled(mi_option_reset_decommits)) { page->is_reset = false; } @@ -473,12 +473,12 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, _mi_mem_commit(segment, pre_size, &commit_zero, tld->os); if (commit_zero) is_zero = true; } - } + } } else { // Allocate the segment from the OS size_t memid; - bool mem_large = (!eager_delayed && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy + bool mem_large = (!eager_delayed && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_zero, &memid, os_tld); if (segment == NULL) return NULL; // failed to allocate if (!commit) { @@ -489,12 +489,12 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, } segment->memid = memid; segment->mem_is_fixed = mem_large; - segment->mem_is_committed = commit; + segment->mem_is_committed = commit; mi_segments_track_size((long)segment_size, tld); } mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); - if (!pages_still_good) { + if (!pages_still_good) { // zero the segment info (but not the `mem` fields) ptrdiff_t ofs = offsetof(mi_segment_t, next); memset((uint8_t*)segment + ofs, 0, info_size - ofs); @@ -520,12 +520,12 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, segment->segment_size = segment_size; segment->segment_info_size = pre_size; segment->thread_id = _mi_thread_id(); - segment->cookie = _mi_ptr_cookie(segment); + segment->cookie = _mi_ptr_cookie(segment); // _mi_stat_increase(&tld->stats->page_committed, segment->segment_info_size); // set protection mi_segment_protect(segment, true, tld->os); - + //fprintf(stderr,"mimalloc: alloc segment at %p\n", (void*)segment); return segment; } @@ -541,8 +541,8 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t mi_assert_expensive(!mi_segment_queue_contains(&tld->medium_free, segment)); mi_assert(segment->next == NULL); mi_assert(segment->prev == NULL); - _mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size); - + _mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size); + if (!force && mi_segment_cache_push(segment, tld)) { // it is put in our cache } @@ -569,12 +569,12 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t* if (!page->segment_in_use) { // set in-use before doing unreset to prevent delayed reset page->segment_in_use = true; - segment->used++; + segment->used++; if (!page->is_committed) { mi_assert_internal(!segment->mem_is_fixed); mi_assert_internal(!page->is_reset); page->is_committed = true; - if (segment->page_kind < MI_PAGE_LARGE || mi_option_is_enabled(mi_option_eager_page_commit)) { + if (segment->page_kind < MI_PAGE_LARGE || !mi_option_is_enabled(mi_option_eager_page_commit)) { size_t psize; uint8_t* start = mi_segment_raw_page_start(segment, page, &psize); bool is_zero = false; @@ -586,7 +586,7 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t* } if (page->is_reset) { mi_page_unreset(segment, page, 0, tld); // todo: only unreset the part that was reset? - } + } return page; } } @@ -608,7 +608,7 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg size_t inuse = page->capacity * page->block_size; _mi_stat_decrease(&tld->stats->page_committed, inuse); _mi_stat_decrease(&tld->stats->pages, 1); - + // calculate the used size from the raw (non-aligned) start of the page //size_t pre_size; //_mi_segment_page_start(segment, page, page->block_size, NULL, &pre_size); @@ -621,7 +621,7 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg // note: must come after setting `segment_in_use` to false but before block_size becomes 0 mi_page_reset(segment, page, 0 /*used_size*/, tld); - // zero the page data, but not the segment fields + // zero the page data, but not the segment fields ptrdiff_t ofs = offsetof(mi_page_t,capacity); memset((uint8_t*)page + ofs, 0, sizeof(*page) - ofs); segment->used--; @@ -674,7 +674,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { // remove the segment from the free page queue if needed mi_segment_remove_from_free_queue(segment,tld); mi_assert_internal(segment->next == NULL && segment->prev == NULL); - + // all pages in the segment are abandoned; add it to the abandoned list _mi_stat_increase(&tld->stats->segments_abandoned, 1); mi_segments_track_size(-((long)segment->segment_size), tld); @@ -691,7 +691,7 @@ void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { mi_assert(page != NULL); mi_segment_t* segment = _mi_page_segment(page); mi_assert_expensive(mi_segment_is_valid(segment)); - segment->abandoned++; + segment->abandoned++; _mi_stat_increase(&tld->stats->pages_abandoned, 1); mi_assert_internal(segment->abandoned <= segment->used); if (segment->used == segment->abandoned) { @@ -744,7 +744,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen mi_segment_page_clear(segment,page,tld); } else { - // otherwise reclaim it + // otherwise reclaim it _mi_page_reclaim(heap,page); } } @@ -774,7 +774,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(mi_segment_has_free(segment)); mi_page_t* page = mi_segment_find_free(segment, tld); - mi_assert_internal(page->segment_in_use); + mi_assert_internal(page->segment_in_use); mi_assert_internal(segment->used <= segment->capacity); if (segment->used == segment->capacity) { // if no more free pages, remove from the queue @@ -813,7 +813,7 @@ static mi_page_t* mi_segment_medium_page_alloc(mi_segments_tld_t* tld, mi_os_tld static mi_page_t* mi_segment_large_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { mi_segment_t* segment = mi_segment_alloc(0,MI_PAGE_LARGE,MI_LARGE_PAGE_SHIFT,tld,os_tld); - if (segment == NULL) return NULL; + if (segment == NULL) return NULL; mi_page_t* page = mi_segment_find_free(segment, tld); mi_assert_internal(page != NULL); #if MI_DEBUG>=2 From 9629a0190f5eac495936e0b0970b4343c6abb975 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 2 Jan 2020 17:25:00 -0800 Subject: [PATCH 071/179] fix eager commit on large pages (issue #182) --- src/alloc.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/alloc.c b/src/alloc.c index 82d97786..8ee78338 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -125,7 +125,7 @@ mi_decl_allocator void* mi_zalloc(size_t size) mi_attr_noexcept { // ------------------------------------------------------ -// Check for double free in secure and debug mode +// Check for double free in secure and debug mode // This is somewhat expensive so only enabled for secure mode 4 // ------------------------------------------------------ @@ -139,12 +139,12 @@ static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, cons return false; } -static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block, const mi_block_t* n) { +static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block) { // The decoded value is in the same page (or NULL). // Walk the free lists to verify positively if it is already freed if (mi_list_contains(page, page->free, block) || mi_list_contains(page, page->local_free, block) || - mi_list_contains(page, (const mi_block_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&page->thread_free)), block)) + mi_list_contains(page, (const mi_block_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&page->thread_free)), block)) { _mi_fatal_error("double free detected of block %p with size %zu\n", block, page->block_size); return true; @@ -156,11 +156,11 @@ static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block mi_block_t* n = mi_block_nextx(page, block, page->key[0], page->key[1]); // pretend it is freed, and get the decoded first field if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer? (n==NULL || mi_is_in_same_page(block, n))) // quick check: in same page or NULL? - { + { // Suspicous: decoded value a in block is in the same page (or NULL) -- maybe a double free? // (continue in separate function to improve code generation) - return mi_check_is_double_freex(page, block, n); - } + return mi_check_is_double_freex(page, block); + } return false; } #else @@ -337,7 +337,7 @@ void mi_free(void* p) mi_attr_noexcept if (mi_likely(tid == segment->thread_id && page->flags.full_aligned == 0)) { // the thread id matches and it is not a full page, nor has aligned blocks // local, and not full or aligned mi_block_t* block = (mi_block_t*)p; - if (mi_unlikely(mi_check_is_double_free(page,block))) return; + if (mi_unlikely(mi_check_is_double_free(page,block))) return; mi_block_set_next(page, block, page->local_free); page->local_free = block; page->used--; From f9ca88f71cbc3f43601ddedd6547f3a85c865bb5 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 2 Jan 2020 17:57:41 -0800 Subject: [PATCH 072/179] set secure default to 0 again --- include/mimalloc-types.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 76539bd6..d334489c 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -29,7 +29,7 @@ terms of the MIT license. A copy of the license can be found in the file // #define MI_SECURE 4 // checks for double free. (may be more expensive) #if !defined(MI_SECURE) -#define MI_SECURE 4 +#define MI_SECURE 0 #endif // Define MI_DEBUG for debug mode @@ -46,7 +46,7 @@ terms of the MIT license. A copy of the license can be found in the file // Encoded free lists allow detection of corrupted free lists // and can detect buffer overflows and double `free`s. -#if (MI_SECURE>=3 || MI_DEBUG>=1) +#if (MI_SECURE>=3 || MI_DEBUG>=1) #define MI_ENCODE_FREELIST 1 #endif @@ -109,8 +109,8 @@ terms of the MIT license. A copy of the license can be found in the file // (Except for large pages since huge objects are allocated in 4MiB chunks) #define MI_SMALL_OBJ_SIZE_MAX (MI_SMALL_PAGE_SIZE/4) // 16kb #define MI_MEDIUM_OBJ_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/4) // 128kb -#define MI_LARGE_OBJ_SIZE_MAX (MI_LARGE_PAGE_SIZE/2) // 2mb -#define MI_LARGE_OBJ_WSIZE_MAX (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE) +#define MI_LARGE_OBJ_SIZE_MAX (MI_LARGE_PAGE_SIZE/2) // 2mb +#define MI_LARGE_OBJ_WSIZE_MAX (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE) #define MI_HUGE_OBJ_SIZE_MAX (2*MI_INTPTR_SIZE*MI_SEGMENT_SIZE) // (must match MI_REGION_MAX_ALLOC_SIZE in memory.c) // Minimal alignment necessary. On most platforms 16 bytes are needed @@ -143,14 +143,14 @@ typedef enum mi_delayed_e { } mi_delayed_t; -// The `in_full` and `has_aligned` page flags are put in a union to efficiently +// The `in_full` and `has_aligned` page flags are put in a union to efficiently // test if both are false (`full_aligned == 0`) in the `mi_free` routine. typedef union mi_page_flags_s { uint8_t full_aligned; struct { uint8_t in_full : 1; uint8_t has_aligned : 1; - } x; + } x; } mi_page_flags_t; // Thread free list. @@ -182,7 +182,7 @@ typedef struct mi_page_s { uint8_t is_reset:1; // `true` if the page memory was reset uint8_t is_committed:1; // `true` if the page virtual memory is committed uint8_t is_zero_init:1; // `true` if the page was zero initialized - + // layout like this to optimize access in `mi_malloc` and `mi_free` uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear` uint16_t reserved; // number of blocks reserved in memory @@ -194,7 +194,7 @@ typedef struct mi_page_s { uintptr_t key[2]; // two random keys to encode the free lists (see `_mi_block_next`) #endif size_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`) - + mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) volatile _Atomic(uintptr_t) thread_freed; // at least this number of blocks are in `thread_free` volatile _Atomic(mi_thread_free_t) thread_free; // list of deferred free blocks freed by other threads @@ -227,7 +227,7 @@ typedef enum mi_page_kind_e { typedef struct mi_segment_s { // memory fields size_t memid; // id for the os-level memory manager - bool mem_is_fixed; // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages) + bool mem_is_fixed; // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages) bool mem_is_committed; // `true` if the whole segment is eagerly committed // segment fields @@ -240,7 +240,7 @@ typedef struct mi_segment_s { size_t segment_size;// for huge pages this may be different from `MI_SEGMENT_SIZE` size_t segment_info_size; // space we are using from the first page for segment meta-data and possible guard pages. uintptr_t cookie; // verify addresses in secure mode: `_mi_ptr_cookie(segment) == segment->cookie` - + // layout like this to optimize access in `mi_free` size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`). volatile _Atomic(uintptr_t) thread_id; // unique id of the thread owning this segment From eeb623e6af4d00d96a147a0d782298c5e4db987d Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 3 Jan 2020 17:06:41 -0800 Subject: [PATCH 073/179] increase retire limit, collect retired pages --- include/mimalloc-types.h | 3 ++- src/init.c | 28 ++++++++++++--------- src/page.c | 54 +++++++++++++++++++++++++++++----------- 3 files changed, 58 insertions(+), 27 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index d334489c..68529c3f 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -187,7 +187,8 @@ typedef struct mi_page_s { uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear` uint16_t reserved; // number of blocks reserved in memory mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits) - bool is_zero; // `true` if the blocks in the free list are zero initialized + uint8_t is_zero:1; // `true` if the blocks in the free list are zero initialized + uint8_t retire_expire:7; // expiration count for retired blocks mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) #ifdef MI_ENCODE_FREELIST diff --git a/src/init.c b/src/init.c index cadcd2a3..3df854cf 100644 --- a/src/init.c +++ b/src/init.c @@ -12,8 +12,12 @@ terms of the MIT license. A copy of the license can be found in the file // Empty page used to initialize the small free pages array const mi_page_t _mi_page_empty = { - 0, false, false, false, false, 0, 0, - { 0 }, false, + 0, false, false, false, false, + 0, // capacity + 0, // reserved capacity + { 0 }, // flags + false, // is_zero + 0, // retire_expire NULL, // free #if MI_ENCODE_FREELIST { 0, 0 }, @@ -83,11 +87,11 @@ const mi_heap_t _mi_heap_empty = { MI_SMALL_PAGES_EMPTY, MI_PAGE_QUEUES_EMPTY, ATOMIC_VAR_INIT(NULL), - 0, // tid - 0, // cookie - { 0, 0 }, // keys + 0, // tid + 0, // cookie + { 0, 0 }, // keys { {0}, {0}, 0 }, - 0, + 0, // page count false }; @@ -106,7 +110,7 @@ static mi_tld_t tld_main = { { MI_STATS_NULL } // stats }; -#if MI_INTPTR_SIZE==8 +#if MI_INTPTR_SIZE==8 #define MI_INIT_COOKIE (0xCDCDCDCDCDCDCDCDUL) #else #define MI_INIT_COOKIE (0xCDCDCDCDUL) @@ -121,8 +125,8 @@ mi_heap_t _mi_heap_main = { MI_INIT_COOKIE, // initial cookie { MI_INIT_COOKIE, MI_INIT_COOKIE }, // the key of the main heap can be fixed (unlike page keys that need to be secure!) { {0}, {0}, 0 }, // random - 0, // page count - false // can reclaim + 0, // page count + false // can reclaim }; bool _mi_process_is_initialized = false; // set to `true` in `mi_process_init`. @@ -136,7 +140,7 @@ mi_stats_t _mi_stats_main = { MI_STATS_NULL }; typedef struct mi_thread_data_s { mi_heap_t heap; // must come first due to cast in `_mi_heap_done` - mi_tld_t tld; + mi_tld_t tld; } mi_thread_data_t; // Initialize the thread local default heap, called from `mi_thread_init` @@ -158,7 +162,7 @@ static bool _mi_heap_init(void) { mi_heap_t* heap = &td->heap; memcpy(heap, &_mi_heap_empty, sizeof(*heap)); heap->thread_id = _mi_thread_id(); - _mi_random_init(&heap->random); + _mi_random_init(&heap->random); heap->cookie = _mi_heap_random_next(heap) | 1; heap->key[0] = _mi_heap_random_next(heap); heap->key[1] = _mi_heap_random_next(heap); @@ -402,7 +406,7 @@ void mi_process_init(void) mi_attr_noexcept { _mi_heap_main.thread_id = _mi_thread_id(); _mi_verbose_message("process init: 0x%zx\n", _mi_heap_main.thread_id); - _mi_random_init(&_mi_heap_main.random); + _mi_random_init(&_mi_heap_main.random); #ifndef __APPLE__ // TODO: fix this? cannot update cookie if allocation already happened.. _mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main); _mi_heap_main.key[0] = _mi_heap_random_next(&_mi_heap_main); diff --git a/src/page.c b/src/page.c index b070e56a..f5f51a72 100644 --- a/src/page.c +++ b/src/page.c @@ -229,7 +229,7 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { mi_assert_expensive(mi_page_is_valid_init(page)); mi_assert_internal(page->heap == NULL); mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE); - mi_assert_internal(!page->is_reset); + mi_assert_internal(!page->is_reset); _mi_page_free_collect(page,false); mi_page_queue_t* pq = mi_page_queue(heap, page->block_size); mi_page_queue_push(heap, pq, page); @@ -342,7 +342,7 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) { mi_assert_expensive(_mi_page_is_valid(page)); mi_assert_internal(pq == mi_page_queue_of(page)); mi_assert_internal(page->heap != NULL); - + #if MI_DEBUG > 1 mi_heap_t* pheap = (mi_heap_t*)mi_atomic_read_ptr(mi_atomic_cast(void*, &page->heap)); #endif @@ -392,7 +392,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { _mi_stat_decrease(&page->heap->tld->stats.huge, page->block_size); } } - + // remove from the page list // (no need to do _mi_heap_delayed_free first as all blocks are already free) mi_segments_tld_t* segments_tld = &page->heap->tld->segments; @@ -420,20 +420,40 @@ void _mi_page_retire(mi_page_t* page) { // (or we end up retiring and re-allocating most of the time) // NOTE: refine this more: we should not retire if this // is the only page left with free blocks. It is not clear - // how to check this efficiently though... + // how to check this efficiently though... // for now, we don't retire if it is the only page left of this size class. mi_page_queue_t* pq = mi_page_queue_of(page); - if (mi_likely(page->block_size <= (MI_SMALL_SIZE_MAX/4))) { - // if (mi_page_mostly_used(page->prev) && mi_page_mostly_used(page->next)) { - if (pq->last==page && pq->first==page) { + if (mi_likely(page->block_size <= MI_SMALL_SIZE_MAX)) { + if (pq->last==page && pq->first==page) { // the only page in the queue? mi_stat_counter_increase(_mi_stats_main.page_no_retire,1); - return; // dont't retire after all + page->retire_expire = 2; + mi_assert_internal(mi_page_all_free(page)); + return; // dont't free after all } } _mi_page_free(page, pq, false); } +// free retired pages: we don't need to look at the entire queues +// since we only retire pages that are the last one in a queue. +static void mi_page_retired_collect(mi_heap_t* heap) { + for(mi_page_queue_t* pq = heap->pages; pq->block_size <= MI_SMALL_SIZE_MAX; pq++) { + mi_page_t* page = pq->first; + if (page != NULL && page->retire_expire != 0) { + if (mi_page_all_free(page)) { + page->retire_expire--; + if (page->retire_expire == 0) { + _mi_page_free(pq->first, pq, false); + } + } + else { + page->retire_expire = 0; + } + } + } +} + /* ----------------------------------------------------------- Initialize the initial free list in a page. @@ -499,7 +519,7 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co } // prepend to the free list (usually NULL) mi_block_set_next(page, blocks[current], page->free); // end of the list - page->free = free_start; + page->free = free_start; } static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t extend, mi_stats_t* const stats) @@ -513,15 +533,15 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL ); const size_t bsize = page->block_size; mi_block_t* const start = mi_page_block_at(page, page_area, page->capacity); - + // initialize a sequential free list - mi_block_t* const last = mi_page_block_at(page, page_area, page->capacity + extend - 1); + mi_block_t* const last = mi_page_block_at(page, page_area, page->capacity + extend - 1); mi_block_t* block = start; while(block <= last) { mi_block_t* next = (mi_block_t*)((uint8_t*)block + bsize); mi_block_set_next(page,block,next); block = next; - } + } // prepend to free list (usually `NULL`) mi_block_set_next(page, last, page->free); page->free = start; @@ -619,6 +639,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(page->thread_freed == 0); mi_assert_internal(page->next == NULL); mi_assert_internal(page->prev == NULL); + mi_assert_internal(page->retire_expire == 0); mi_assert_internal(!mi_page_has_aligned(page)); #if (MI_ENCODE_FREELIST) mi_assert_internal(page->key != 0); @@ -699,8 +720,12 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p } else { mi_assert(pq->first == page); + page->retire_expire = 0; } mi_assert_internal(page == NULL || mi_page_immediate_available(page)); + + // finally collect retired pages + mi_page_retired_collect(heap); return page; } @@ -719,6 +744,7 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) { _mi_page_free_collect(page,false); } if (mi_page_immediate_available(page)) { + page->retire_expire = 0; return page; // fast path } } @@ -759,7 +785,7 @@ void mi_register_deferred_free(mi_deferred_free_fun* fn) mi_attr_noexcept { // that frees the block can free the whole page and segment directly. static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) { size_t block_size = _mi_os_good_alloc_size(size); - mi_assert_internal(_mi_bin(block_size) == MI_BIN_HUGE); + mi_assert_internal(_mi_bin(block_size) == MI_BIN_HUGE); mi_page_t* page = mi_page_fresh_alloc(heap,NULL,block_size); if (page != NULL) { mi_assert_internal(mi_page_immediate_available(page)); @@ -777,7 +803,7 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) { _mi_stat_increase(&heap->tld->stats.huge, block_size); _mi_stat_counter_increase(&heap->tld->stats.huge_count, 1); } - } + } return page; } From 2b108c8748410b81ca239c4f6a3639845d135587 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 3 Jan 2020 21:39:18 -0800 Subject: [PATCH 074/179] increase retire expiration to 4 --- include/mimalloc-internal.h | 1 + src/heap.c | 5 +++-- src/page.c | 10 +++++----- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index d41dfadc..cfbd9782 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -90,6 +90,7 @@ void _mi_page_unfull(mi_page_t* page); void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force); // free the page void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq); // abandon the page, to be picked up by another thread... void _mi_heap_delayed_free(mi_heap_t* heap); +void _mi_heap_collect_retired(mi_heap_t* heap, bool force); void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay); size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append); diff --git a/src/heap.c b/src/heap.c index f90c4624..963cb982 100644 --- a/src/heap.c +++ b/src/heap.c @@ -46,7 +46,7 @@ static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void #if MI_DEBUG>=3 -static bool _mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) { +static bool mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) { UNUSED(arg1); UNUSED(arg2); UNUSED(pq); @@ -59,7 +59,7 @@ static bool _mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page static bool mi_heap_is_valid(mi_heap_t* heap) { mi_assert_internal(heap!=NULL); - mi_heap_visit_pages(heap, &_mi_heap_page_is_valid, NULL, NULL); + mi_heap_visit_pages(heap, &mi_heap_page_is_valid, NULL, NULL); return true; } #endif @@ -84,6 +84,7 @@ typedef enum mi_collect_e { static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg_collect, void* arg2 ) { UNUSED(arg2); UNUSED(heap); + mi_assert_internal(mi_heap_page_is_valid(heap, pq, page, NULL, NULL)); mi_collect_t collect = *((mi_collect_t*)arg_collect); _mi_page_free_collect(page, collect >= ABANDON); if (mi_page_all_free(page)) { diff --git a/src/page.c b/src/page.c index f5f51a72..b0b500ca 100644 --- a/src/page.c +++ b/src/page.c @@ -426,7 +426,7 @@ void _mi_page_retire(mi_page_t* page) { if (mi_likely(page->block_size <= MI_SMALL_SIZE_MAX)) { if (pq->last==page && pq->first==page) { // the only page in the queue? mi_stat_counter_increase(_mi_stats_main.page_no_retire,1); - page->retire_expire = 2; + page->retire_expire = 4; mi_assert_internal(mi_page_all_free(page)); return; // dont't free after all } @@ -437,14 +437,14 @@ void _mi_page_retire(mi_page_t* page) { // free retired pages: we don't need to look at the entire queues // since we only retire pages that are the last one in a queue. -static void mi_page_retired_collect(mi_heap_t* heap) { +void _mi_heap_collect_retired(mi_heap_t* heap, bool force) { for(mi_page_queue_t* pq = heap->pages; pq->block_size <= MI_SMALL_SIZE_MAX; pq++) { mi_page_t* page = pq->first; if (page != NULL && page->retire_expire != 0) { if (mi_page_all_free(page)) { page->retire_expire--; - if (page->retire_expire == 0) { - _mi_page_free(pq->first, pq, false); + if (force || page->retire_expire == 0) { + _mi_page_free(pq->first, pq, force); } } else { @@ -725,7 +725,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p mi_assert_internal(page == NULL || mi_page_immediate_available(page)); // finally collect retired pages - mi_page_retired_collect(heap); + _mi_heap_collect_retired(heap,false); return page; } From d596f0856930a885007088ff52db8db051963da0 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 3 Jan 2020 22:06:27 -0800 Subject: [PATCH 075/179] fix thread_free read in assertion --- src/alloc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/alloc.c b/src/alloc.c index 8ee78338..bd81aba0 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -142,9 +142,10 @@ static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, cons static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block) { // The decoded value is in the same page (or NULL). // Walk the free lists to verify positively if it is already freed + mi_thread_free_t tf = (mi_thread_free_t)mi_atomic_read_relaxed(mi_atomic_cast(uintptr_t, &page->thread_free)); if (mi_list_contains(page, page->free, block) || mi_list_contains(page, page->local_free, block) || - mi_list_contains(page, (const mi_block_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&page->thread_free)), block)) + mi_list_contains(page, mi_tf_block(tf), block)) { _mi_fatal_error("double free detected of block %p with size %zu\n", block, page->block_size); return true; From a2a9230ad6e404e23a724fa8c820e3533a961716 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 3 Jan 2020 22:52:52 -0800 Subject: [PATCH 076/179] remove empty page removal on page search (no longer needed with retired collection and delayed freeing) --- src/page.c | 25 +------------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/src/page.c b/src/page.c index b0b500ca..c38d7740 100644 --- a/src/page.c +++ b/src/page.c @@ -660,9 +660,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq) { // search through the pages in "next fit" order - mi_page_t* rpage = NULL; size_t count = 0; - size_t page_free_count = 0; mi_page_t* page = pq->first; while( page != NULL) { @@ -674,20 +672,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p // 1. if the page contains free blocks, we are done if (mi_page_immediate_available(page)) { - // If all blocks are free, we might retire this page instead. - // do this at most 8 times to bound allocation time. - // (note: this can happen if a page was earlier not retired due - // to having neighbours that were mostly full or due to concurrent frees) - if (page_free_count < 8 && mi_page_all_free(page)) { - page_free_count++; - if (rpage != NULL) _mi_page_free(rpage,pq,false); - rpage = page; - page = next; - continue; // and keep looking - } - else { - break; // pick this one - } + break; // pick this one } // 2. Try to extend @@ -707,14 +692,6 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p mi_stat_counter_increase(heap->tld->stats.searches,count); - if (page == NULL) { - page = rpage; - rpage = NULL; - } - if (rpage != NULL) { - _mi_page_free(rpage,pq,false); - } - if (page == NULL) { page = mi_page_fresh(heap, pq); } From 59fa2862941fe6c07c526d2221e2557492b3b1ab Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 4 Jan 2020 17:32:50 -0800 Subject: [PATCH 077/179] fix bug where continue would wrongly exit the do-while loop for delayed freeing --- src/page.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/page.c b/src/page.c index c38d7740..0df32f4c 100644 --- a/src/page.c +++ b/src/page.c @@ -119,23 +119,22 @@ bool _mi_page_is_valid(mi_page_t* page) { } #endif - -void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay ) { +void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay) { mi_thread_free_t tfree; mi_thread_free_t tfreex; - + mi_delayed_t old_delay; do { - tfreex = tfree = page->thread_free; - if (mi_unlikely(mi_tf_delayed(tfree) < MI_DELAYED_FREEING)) { - tfreex = mi_tf_set_delayed(tfree,delay); - } - else if (mi_unlikely(mi_tf_delayed(tfree) == MI_DELAYED_FREEING)) { + tfree = mi_atomic_read_relaxed(&page->thread_free); + tfreex = mi_tf_set_delayed(tfree, delay); + old_delay = mi_tf_delayed(tfree); + if (mi_unlikely(old_delay == MI_DELAYED_FREEING)) { mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done. - continue; // and try again } - } - while((mi_tf_delayed(tfreex) != mi_tf_delayed(tfree)) && // avoid atomic operation if already equal - !mi_atomic_cas_weak(mi_atomic_cast(uintptr_t,&page->thread_free), tfreex, tfree)); + else if (delay == old_delay) { + break; // avoid atomic operation if already equal + } + } while ((old_delay == MI_DELAYED_FREEING) || + !mi_atomic_cas_weak(mi_atomic_cast(uintptr_t, &page->thread_free), tfreex, tfree)); } From 45582d1fb5e076a334fb9c5fd704da9b7312dc5b Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 5 Jan 2020 13:58:49 -0800 Subject: [PATCH 078/179] revert a2a9230 (remove empty page removal on search): this is not generally valid when concurrent frees do not always add to thread_delayed_free. --- src/page.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/src/page.c b/src/page.c index 0df32f4c..78570ab0 100644 --- a/src/page.c +++ b/src/page.c @@ -659,7 +659,9 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq) { // search through the pages in "next fit" order + mi_page_t* rpage = NULL; size_t count = 0; + size_t page_free_count = 0; mi_page_t* page = pq->first; while( page != NULL) { @@ -671,7 +673,20 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p // 1. if the page contains free blocks, we are done if (mi_page_immediate_available(page)) { - break; // pick this one + // If all blocks are free, we might retire this page instead. + // do this at most 8 times to bound allocation time. + // (note: this can happen if a page was earlier not retired due + // to having neighbours that were mostly full or due to concurrent frees) + if (page_free_count < 8 && mi_page_all_free(page)) { + page_free_count++; + if (rpage != NULL) _mi_page_free(rpage,pq,false); + rpage = page; + page = next; + continue; // and keep looking + } + else { + break; // pick this one + } } // 2. Try to extend @@ -691,6 +706,14 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p mi_stat_counter_increase(heap->tld->stats.searches,count); + if (page == NULL) { + page = rpage; + rpage = NULL; + } + if (rpage != NULL) { + _mi_page_free(rpage,pq,false); + } + if (page == NULL) { page = mi_page_fresh(heap, pq); } From d8d69c2c94d0314e546f91bae8f19826aedf1e14 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 5 Jan 2020 22:07:16 -0800 Subject: [PATCH 079/179] disable MAP_NORESERVE on huge pages --- src/os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/os.c b/src/os.c index d7126e70..c9a04d27 100644 --- a/src/os.c +++ b/src/os.c @@ -331,7 +331,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro mi_atomic_cas_weak(&large_page_try_ok, try_ok - 1, try_ok); } else { - int lflags = flags; + int lflags = flags & ~MAP_NORESERVE; // using NORESERVE on huge pages seems to fail on Linux int lfd = fd; #ifdef MAP_ALIGNED_SUPER lflags |= MAP_ALIGNED_SUPER; From 743e89173819a9fe3283fb94f4f6830d2f648186 Mon Sep 17 00:00:00 2001 From: Kirsten Lee Date: Mon, 6 Jan 2020 16:18:22 -0800 Subject: [PATCH 080/179] add stl mimalloc wrapper --- CMakeLists.txt | 3 +- ide/vs2017/mimalloc-override.vcxproj | 3 +- ide/vs2017/mimalloc.vcxproj | 3 +- ide/vs2019/mimalloc-override.vcxproj | 3 +- ide/vs2019/mimalloc.vcxproj | 3 +- include/mimalloc-stl-allocator.h | 44 ++++++++++++++++++++++++++++ test/test-api.c | 31 ++++++++++++++++++++ 7 files changed, 85 insertions(+), 5 deletions(-) create mode 100644 include/mimalloc-stl-allocator.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 467fad95..dcbdefef 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -187,6 +187,7 @@ install(TARGETS mimalloc-static EXPORT mimalloc DESTINATION ${mi_install_dir}) install(FILES include/mimalloc.h DESTINATION ${mi_install_dir}/include) install(FILES include/mimalloc-override.h DESTINATION ${mi_install_dir}/include) install(FILES include/mimalloc-new-delete.h DESTINATION ${mi_install_dir}/include) +install(FILES include/mimalloc-stl-allocator.h DESTINATION ${mi_install_dir}/include) install(FILES cmake/mimalloc-config.cmake DESTINATION ${mi_install_dir}/cmake) install(FILES cmake/mimalloc-config-version.cmake DESTINATION ${mi_install_dir}/cmake) install(EXPORT mimalloc DESTINATION ${mi_install_dir}/cmake) @@ -233,7 +234,7 @@ if (MI_BUILD_TESTS MATCHES "ON") target_compile_options(mimalloc-test-stress PRIVATE ${mi_cflags}) target_include_directories(mimalloc-test-stress PRIVATE include) target_link_libraries(mimalloc-test-stress PRIVATE mimalloc-static ${mi_libraries}) - + enable_testing() add_test(test_api, mimalloc-test-api) add_test(test_stress, mimalloc-test-stress) diff --git a/ide/vs2017/mimalloc-override.vcxproj b/ide/vs2017/mimalloc-override.vcxproj index 511c0fab..e0a6d85b 100644 --- a/ide/vs2017/mimalloc-override.vcxproj +++ b/ide/vs2017/mimalloc-override.vcxproj @@ -1,4 +1,4 @@ - + @@ -214,6 +214,7 @@ + diff --git a/ide/vs2017/mimalloc.vcxproj b/ide/vs2017/mimalloc.vcxproj index 6147c349..ff6c8edb 100644 --- a/ide/vs2017/mimalloc.vcxproj +++ b/ide/vs2017/mimalloc.vcxproj @@ -1,4 +1,4 @@ - + @@ -239,6 +239,7 @@ + diff --git a/ide/vs2019/mimalloc-override.vcxproj b/ide/vs2019/mimalloc-override.vcxproj index 96a8924f..e6416e05 100644 --- a/ide/vs2019/mimalloc-override.vcxproj +++ b/ide/vs2019/mimalloc-override.vcxproj @@ -1,4 +1,4 @@ - + @@ -214,6 +214,7 @@ + diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index 28e96d71..ffede6ca 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -1,4 +1,4 @@ - + @@ -239,6 +239,7 @@ + diff --git a/include/mimalloc-stl-allocator.h b/include/mimalloc-stl-allocator.h new file mode 100644 index 00000000..11ba30fb --- /dev/null +++ b/include/mimalloc-stl-allocator.h @@ -0,0 +1,44 @@ +#pragma once +#ifndef MIMALLOC_STL_ALLOCATOR_H +#define MIMALLOC_STL_ALLOCATOR_H + +#ifdef __cplusplus +/* ---------------------------------------------------------------------------- +This header can be used to hook mimalloc into STL containers in place of +std::allocator. +-----------------------------------------------------------------------------*/ +#include +#include // true_type + +#pragma warning(disable: 4100) + +template +struct mi_stl_allocator { + typedef T value_type; + + using propagate_on_container_copy_assignment = std::true_type; + using propagate_on_container_move_assignment = std::true_type; + using propagate_on_container_swap = std::true_type; + using is_always_equal = std::true_type; + + mi_stl_allocator() noexcept {} + mi_stl_allocator(const mi_stl_allocator& other) noexcept {} + template + mi_stl_allocator(const mi_stl_allocator& other) noexcept {} + + T* allocate(size_t n, const void* hint = 0) { + return (T*)mi_mallocn(n, sizeof(T)); + } + + void deallocate(T* p, size_t n) { + mi_free(p); + } +}; + +template +bool operator==(const mi_stl_allocator& lhs, const mi_stl_allocator& rhs) noexcept { return true; } +template +bool operator!=(const mi_stl_allocator& lhs, const mi_stl_allocator& rhs) noexcept { return false; } + +#endif // __cplusplus +#endif // MIMALLOC_STL_ALLOCATOR_H diff --git a/test/test-api.c b/test/test-api.c index bd2291da..7a9ee785 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -25,8 +25,10 @@ we therefore test the API over various inputs. Please add more tests :-) #include #include #include +#include #include "mimalloc.h" #include "mimalloc-internal.h" +#include "mimalloc-stl-allocator.h" // --------------------------------------------------------------------------- // Test macros: CHECK(name,predicate) and CHECK_BODY(name,body) @@ -61,6 +63,8 @@ static int failed = 0; // --------------------------------------------------------------------------- bool test_heap1(); bool test_heap2(); +bool test_stl_allocator1(); +bool test_stl_allocator2(); // --------------------------------------------------------------------------- // Main testing @@ -150,6 +154,9 @@ int main() { mi_free(s); }); + CHECK("stl_allocator1", test_stl_allocator1()); + CHECK("stl_allocator2", test_stl_allocator2()); + // --------------------------------------------------- // Done // ---------------------------------------------------[] @@ -182,3 +189,27 @@ bool test_heap2() { mi_free(p2); return true; } + +bool test_stl_allocator1() { +#ifdef __cplusplus + std::vector> vec; + vec.push_back(1); + vec.pop_back(); + return vec.size() == 0; +#else + return true; +#endif +} + +bool test_stl_allocator2() { +#ifdef __cplusplus + struct some_struct { int i; int j; double z; }; + + std::vector> vec; + vec.push_back(some_struct()); + vec.pop_back(); + return vec.size() == 0; +#else + return true; +#endif +} From d97c56d4c10d9161d7d5b8bec43f67b8f291b67f Mon Sep 17 00:00:00 2001 From: Kirsten Lee Date: Mon, 6 Jan 2020 16:25:21 -0800 Subject: [PATCH 081/179] fix unintended diff --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index dcbdefef..93560951 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -234,7 +234,7 @@ if (MI_BUILD_TESTS MATCHES "ON") target_compile_options(mimalloc-test-stress PRIVATE ${mi_cflags}) target_include_directories(mimalloc-test-stress PRIVATE include) target_link_libraries(mimalloc-test-stress PRIVATE mimalloc-static ${mi_libraries}) - + enable_testing() add_test(test_api, mimalloc-test-api) add_test(test_stress, mimalloc-test-stress) From 0a2520490b951d791ebc9b34e8eae69e65fbeda6 Mon Sep 17 00:00:00 2001 From: Kirsten Lee Date: Mon, 6 Jan 2020 16:44:55 -0800 Subject: [PATCH 082/179] only include vector header when compiling c++ --- test/test-api.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/test-api.c b/test/test-api.c index 7a9ee785..f93884d0 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -25,7 +25,11 @@ we therefore test the API over various inputs. Please add more tests :-) #include #include #include + +#ifdef __cplusplus #include +#endif + #include "mimalloc.h" #include "mimalloc-internal.h" #include "mimalloc-stl-allocator.h" From 4223caac0fa95b900f89963d99f7c0d1d03a2217 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 6 Jan 2020 22:08:21 -0800 Subject: [PATCH 083/179] on Linux dynamically detect if getrandom is supported and fall back to /dev/urandom if needed --- src/random.c | 48 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 5 deletions(-) diff --git a/src/random.c b/src/random.c index af6cd876..c40a96da 100644 --- a/src/random.c +++ b/src/random.c @@ -155,9 +155,9 @@ uintptr_t _mi_random_next(mi_random_ctx_t* ctx) { /* ---------------------------------------------------------------------------- To initialize a fresh random context we rely on the OS: -- windows: BCryptGenRandom -- bsd,wasi: arc4random_buf -- linux: getrandom +- Windows : BCryptGenRandom +- osX,bsd,wasi: arc4random_buf +- Linux : getrandom,/dev/urandom If we cannot get good randomness, we fall back to weak randomness based on a timer and ASLR. -----------------------------------------------------------------------------*/ @@ -185,9 +185,47 @@ static bool os_random_buf(void* buf, size_t buf_len) { return true; } #elif defined(__linux__) -#include +#include +#include +#include +#include +#include +#include static bool os_random_buf(void* buf, size_t buf_len) { - return (getrandom(buf, buf_len, GRND_NONBLOCK) == (ssize_t)buf_len); + // Modern Linux provides `getrandom` but different distributions either use `sys/random.h` or `linux/random.h` + // and for the latter the actual `getrandom` call is not always defined. + // (see ) + // We therefore use a syscall directly and fall back dynamically to /dev/urandom when needed. +#ifdef SYS_getrandom + #ifndef GRND_NONBLOCK + #define GRND_NONBLOCK (1) + #endif + static volatile _Atomic(uintptr_t) no_getrandom; // = 0 + if (mi_atomic_read(&no_getrandom)==0) { + ssize_t ret = syscall(SYS_getrandom, buf, buf_len, GRND_NONBLOCK); + if (ret >= 0) return (buf_len == (size_t)ret); + if (ret != ENOSYS) return false; + mi_atomic_write(&no_getrandom,1); // don't call again, and fall back to /dev/urandom + } +#endif + int flags = O_RDONLY; + #if defined(O_CLOEXEC) + flags |= O_CLOEXEC; + #endif + int fd = open("/dev/urandom", flags, 0); + if (fd < 0) return false; + size_t count = 0; + while(count < buf_len) { + ssize_t ret = read(fd, (char*)buf + count, buf_len - count); + if (ret<=0) { + if (errno!=EAGAIN && errno!=EINTR) break; + } + else { + count += ret; + } + } + close(fd); + return (count==buf_len); } #else static bool os_random_buf(void* buf, size_t buf_len) { From d4ab0ff08c46bb87ec666e91cecd5b2675388be2 Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 7 Jan 2020 14:15:37 -0800 Subject: [PATCH 084/179] fix timeout on huge page reservation if set to 0 --- src/arena.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arena.c b/src/arena.c index 90ea2b40..b5d41a1a 100644 --- a/src/arena.c +++ b/src/arena.c @@ -325,7 +325,7 @@ int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t if (numa_count <= 0) numa_count = 1; const size_t pages_per = pages / numa_count; const size_t pages_mod = pages % numa_count; - const size_t timeout_per = (timeout_msecs / numa_count) + 50; + const size_t timeout_per = (timeout_msecs==0 ? 0 : (timeout_msecs / numa_count) + 50); // reserve evenly among numa nodes for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { From 50b3f6d7aef19abbe6a985d9be6fa0f7aeb11098 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 8 Jan 2020 12:58:07 -0800 Subject: [PATCH 085/179] fix assertion --- src/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/memory.c b/src/memory.c index 3d6a22f5..ee84f755 100644 --- a/src/memory.c +++ b/src/memory.c @@ -308,7 +308,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo if (mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)) { // some blocks are still reset mi_assert_internal(!info.is_large); - mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit); + mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit || mi_option_get(mi_option_eager_commit_delay) > 0); mi_bitmap_unclaim(®ion->reset, 1, blocks, bit_idx); if (*commit || !mi_option_is_enabled(mi_option_reset_decommits)) { // only if needed bool reset_zero = false; From 5d2f111f64a788108466e89797d6ddafde1163f4 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 8 Jan 2020 12:59:20 -0800 Subject: [PATCH 086/179] make the stress test do more iterations under a smaller load to stay under 1GiB committed and increase thread interaction --- test/test-stress.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/test-stress.c b/test/test-stress.c index b549e1b4..924dbce1 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -26,8 +26,8 @@ terms of the MIT license. // // argument defaults static int THREADS = 32; // more repeatable if THREADS <= #processors -static int SCALE = 50; // scaling factor -static int ITER = 10; // N full iterations destructing and re-creating all threads +static int SCALE = 10; // scaling factor +static int ITER = 50; // N full iterations destructing and re-creating all threads // static int THREADS = 8; // more repeatable if THREADS <= #processors // static int SCALE = 100; // scaling factor @@ -209,7 +209,7 @@ int main(int argc, char** argv) { } mi_collect(false); #ifndef NDEBUG - if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - n + 1); } + if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); } #endif } From 683d8998d4d56fbb92e447029f36d8ddbfbbf452 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 8 Jan 2020 17:45:38 -0800 Subject: [PATCH 087/179] fix potential A-B-A problem with segment abandonment; noticed by Manual Poeter and Sam Gross --- include/mimalloc-types.h | 2 +- src/segment.c | 80 ++++++++++++++++++++++++++++------------ test/test-stress.c | 6 +-- 3 files changed, 60 insertions(+), 28 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 68529c3f..da9bfbac 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -234,7 +234,7 @@ typedef struct mi_segment_s { // segment fields struct mi_segment_s* next; // must be the first segment field -- see `segment.c:segment_alloc` struct mi_segment_s* prev; - volatile _Atomic(struct mi_segment_s*) abandoned_next; + struct mi_segment_s* abandoned_next; size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) size_t used; // count of pages in use (`used <= capacity`) size_t capacity; // count of available pages (`#free + used`) diff --git a/src/segment.c b/src/segment.c index 676df00a..97859fa9 100644 --- a/src/segment.c +++ b/src/segment.c @@ -663,7 +663,28 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) // are "abandoned" and will be reclaimed by other threads to // reuse their pages and/or free them eventually static volatile _Atomic(mi_segment_t*) abandoned; // = NULL; -static volatile _Atomic(uintptr_t) abandoned_count; // = 0; +static volatile _Atomic(uintptr_t) abandoned_count; // = 0; approximate count of abandoned segments + +// prepend a list of abandoned segments atomically to the global abandoned list; O(n) +static void mi_segments_prepend_abandoned(mi_segment_t* first) { + if (first == NULL) return; + + // first try if the abandoned list happens to be NULL + if (mi_atomic_cas_ptr_weak(mi_atomic_cast(void*, &abandoned), first, NULL)) return; + + // if not, find the end of the list + mi_segment_t* last = first; + while (last->abandoned_next != NULL) { + last = last->abandoned_next; + } + + // and atomically prepend + mi_segment_t* next; + do { + next = (mi_segment_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &abandoned)); + last->abandoned_next = next; + } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*, &abandoned), first, next)); +} static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(segment->used == segment->abandoned); @@ -679,12 +700,9 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { _mi_stat_increase(&tld->stats->segments_abandoned, 1); mi_segments_track_size(-((long)segment->segment_size), tld); segment->thread_id = 0; - mi_segment_t* next; - do { - next = (mi_segment_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&abandoned)); - mi_atomic_write_ptr(mi_atomic_cast(void*,&segment->abandoned_next), next); - } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&abandoned), segment, next)); - mi_atomic_increment(&abandoned_count); + segment->abandoned_next = NULL; + mi_segments_prepend_abandoned(segment); // prepend one-element list + mi_atomic_increment(&abandoned_count); // keep approximate count } void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { @@ -701,24 +719,35 @@ void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { } bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld) { - uintptr_t reclaimed = 0; - uintptr_t atmost; - if (try_all) { - atmost = abandoned_count+16; // close enough - } - else { - atmost = abandoned_count/8; // at most 1/8th of all outstanding (estimated) + // To avoid the A-B-A problem, grab the entire list atomically + mi_segment_t* segment = (mi_segment_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &abandoned)); // pre-read to avoid expensive atomic operations + if (segment == NULL) return false; + segment = (mi_segment_t*)mi_atomic_exchange_ptr(mi_atomic_cast(void*, &abandoned), NULL); + if (segment == NULL) return false; + + // we got a non-empty list + if (!try_all) { + // take at most 1/8th of the list and append the rest back to the abandoned list again + // this is O(n) but simplifies the code a lot (as we don't have an A-B-A problem) + // and probably ok since the length will tend to be not too large. + uintptr_t atmost = mi_atomic_read(&abandoned_count)/8; // at most 1/8th of all outstanding (estimated) if (atmost < 8) atmost = 8; // but at least 8 + + // find the split point + mi_segment_t* last = segment; + while (last->abandoned_next != NULL && atmost > 0) { + last = last->abandoned_next; + atmost--; + } + // split the list and push back the remaining segments + mi_segment_t* next = last->abandoned_next; + last->abandoned_next = NULL; + mi_segments_prepend_abandoned(next); } - // for `atmost` `reclaimed` abandoned segments... - while(atmost > reclaimed) { - // try to claim the head of the abandoned segments - mi_segment_t* segment; - do { - segment = (mi_segment_t*)abandoned; - } while(segment != NULL && !mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&abandoned), (mi_segment_t*)segment->abandoned_next, segment)); - if (segment==NULL) break; // stop early if no more segments available + // reclaim all segments that we kept + while(segment != NULL) { + mi_segment_t* const next = segment->abandoned_next; // save the next segment // got it. mi_atomic_decrement(&abandoned_count); @@ -754,14 +783,17 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen mi_segment_free(segment,false,tld); } else { - reclaimed++; // add its free pages to the the current thread free small segment queue if (segment->page_kind <= MI_PAGE_MEDIUM && mi_segment_has_free(segment)) { mi_segment_insert_in_free_queue(segment,tld); } } + + // go on + segment = next; } - return (reclaimed>0); + + return true; } diff --git a/test/test-stress.c b/test/test-stress.c index 924dbce1..23137b97 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -135,9 +135,9 @@ static void stress(intptr_t tid) { allocs--; if (data_top >= data_size) { data_size += 100000; - data = (void**)custom_realloc(data, data_size * sizeof(void*)); + data = (void**)custom_realloc(data, data_size * sizeof(void*)); } - data[data_top++] = alloc_items( 1ULL << (pick(&r) % max_item_shift), &r); + data[data_top++] = alloc_items(1ULL << (pick(&r) % max_item_shift), &r); } else { // 25% retain @@ -209,7 +209,7 @@ int main(int argc, char** argv) { } mi_collect(false); #ifndef NDEBUG - if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); } + if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - n + 1); } #endif } From 940df53b0afc8b114676bf3fd41b9505db2abf0d Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 8 Jan 2020 17:51:11 -0800 Subject: [PATCH 088/179] fix iteration count display in stress test --- test/test-stress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test-stress.c b/test/test-stress.c index 23137b97..d295f741 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -209,7 +209,7 @@ int main(int argc, char** argv) { } mi_collect(false); #ifndef NDEBUG - if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - n + 1); } + if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); } #endif } From 12ef2816ed71be907647a190f4139c6639d49dde Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 8 Jan 2020 19:00:03 -0800 Subject: [PATCH 089/179] fix bug exposed by commit 59fa286 where reclaimed pages could be stuck to NEVER_DELAYED --- include/mimalloc-internal.h | 2 +- src/heap.c | 4 ++-- src/page.c | 13 +++++++++---- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index cfbd9782..3042e6f9 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -92,7 +92,7 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq); // void _mi_heap_delayed_free(mi_heap_t* heap); void _mi_heap_collect_retired(mi_heap_t* heap, bool force); -void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay); +void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never); size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append); void _mi_deferred_free(mi_heap_t* heap, bool force); diff --git a/src/heap.c b/src/heap.c index 963cb982..5c1f8d38 100644 --- a/src/heap.c +++ b/src/heap.c @@ -103,7 +103,7 @@ static bool mi_heap_page_never_delayed_free(mi_heap_t* heap, mi_page_queue_t* pq UNUSED(arg2); UNUSED(heap); UNUSED(pq); - _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE); + _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false); return true; // don't break } @@ -242,7 +242,7 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_ UNUSED(pq); // ensure no more thread_delayed_free will be added - _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE); + _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false); // stats if (page->block_size > MI_LARGE_OBJ_SIZE_MAX) { diff --git a/src/page.c b/src/page.c index 78570ab0..7491bd61 100644 --- a/src/page.c +++ b/src/page.c @@ -119,7 +119,7 @@ bool _mi_page_is_valid(mi_page_t* page) { } #endif -void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay) { +void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never) { mi_thread_free_t tfree; mi_thread_free_t tfreex; mi_delayed_t old_delay; @@ -133,11 +133,13 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay) { else if (delay == old_delay) { break; // avoid atomic operation if already equal } + else if (!override_never && old_delay == MI_NEVER_DELAYED_FREE) { + break; // leave never set + } } while ((old_delay == MI_DELAYED_FREEING) || !mi_atomic_cas_weak(mi_atomic_cast(uintptr_t, &page->thread_free), tfreex, tfree)); } - /* ----------------------------------------------------------- Page collect the `local_free` and `thread_free` lists ----------------------------------------------------------- */ @@ -229,9 +231,12 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { mi_assert_internal(page->heap == NULL); mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE); mi_assert_internal(!page->is_reset); + mi_assert_internal(mi_tf_delayed(page->thread_free) == MI_NEVER_DELAYED_FREE); _mi_page_free_collect(page,false); mi_page_queue_t* pq = mi_page_queue(heap, page->block_size); mi_page_queue_push(heap, pq, page); + mi_assert_internal(page->heap != NULL); + _mi_page_use_delayed_free(page, MI_NO_DELAYED_FREE, true); // override never (after push so heap is set) mi_assert_expensive(_mi_page_is_valid(page)); } @@ -308,7 +313,7 @@ void _mi_page_unfull(mi_page_t* page) { mi_assert_expensive(_mi_page_is_valid(page)); mi_assert_internal(mi_page_is_in_full(page)); - _mi_page_use_delayed_free(page, MI_NO_DELAYED_FREE); + _mi_page_use_delayed_free(page, MI_NO_DELAYED_FREE, false); if (!mi_page_is_in_full(page)) return; mi_heap_t* heap = page->heap; @@ -324,7 +329,7 @@ static void mi_page_to_full(mi_page_t* page, mi_page_queue_t* pq) { mi_assert_internal(!mi_page_immediate_available(page)); mi_assert_internal(!mi_page_is_in_full(page)); - _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE); + _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, false); if (mi_page_is_in_full(page)) return; mi_page_queue_enqueue_from(&page->heap->pages[MI_BIN_FULL], pq, page); From 8f75444e7a07d8a6a56302855ad1094121bd4c90 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 8 Jan 2020 23:21:32 -0800 Subject: [PATCH 090/179] fix windows debug build at MI_DEBUG=2 --- src/heap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/heap.c b/src/heap.c index 5c1f8d38..4a589e5c 100644 --- a/src/heap.c +++ b/src/heap.c @@ -45,7 +45,7 @@ static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void } -#if MI_DEBUG>=3 +#if MI_DEBUG>=2 static bool mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) { UNUSED(arg1); UNUSED(arg2); From 403276d11e10bebb1d20c93b210258de3f02d995 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 8 Jan 2020 23:27:18 -0800 Subject: [PATCH 091/179] build release and debug build on Windows --- azure-pipelines.yml | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 41d67f86..5056ee34 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -13,16 +13,24 @@ jobs: pool: vmImage: windows-2019 + strategy: + matrix: + Debug: + BuildType: debug + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON + Release: + BuildType: release + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release steps: - task: CMake@1 inputs: - workingDirectory: 'build' - cmakeArgs: .. + workingDirectory: $(BuildType) + cmakeArgs: .. $(cmakeExtraArgs) - task: MSBuild@1 inputs: - solution: build/libmimalloc.sln - - upload: $(Build.SourcesDirectory)/build - artifact: windows + solution: $(BuildType)/libmimalloc.sln + - upload: $(Build.SourcesDirectory)/$(BuildType) + artifact: mimalloc-windows-$(BuildType) - job: displayName: Linux @@ -75,7 +83,7 @@ jobs: displayName: Ctest - upload: $(Build.SourcesDirectory)/$(BuildType) - artifact: ubuntu-$(BuildType) + artifact: mimalloc-ubuntu-$(BuildType) - job: displayName: macOS @@ -89,4 +97,4 @@ jobs: cmakeArgs: .. - script: make -j$(sysctl -n hw.ncpu) -C build - upload: $(Build.SourcesDirectory)/build - artifact: macos + artifact: mimalloc-macos From ce3f327f211418aaaac874a961ea92fe1fb8e013 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 8 Jan 2020 23:40:57 -0800 Subject: [PATCH 092/179] add test pass to Windows build --- azure-pipelines.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 5056ee34..b9376e52 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -29,6 +29,7 @@ jobs: - task: MSBuild@1 inputs: solution: $(BuildType)/libmimalloc.sln + - task: CTest@1 - upload: $(Build.SourcesDirectory)/$(BuildType) artifact: mimalloc-windows-$(BuildType) From 7575b58d7ac4abe84b16c4befefdfe1618ce4347 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 8 Jan 2020 23:46:56 -0800 Subject: [PATCH 093/179] fix test on Windows in azure pipelines --- azure-pipelines.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index b9376e52..9da5ffa5 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -29,7 +29,10 @@ jobs: - task: MSBuild@1 inputs: solution: $(BuildType)/libmimalloc.sln - - task: CTest@1 + - displayName: CTest + script: | + cd $(BuildType) + ctest - upload: $(Build.SourcesDirectory)/$(BuildType) artifact: mimalloc-windows-$(BuildType) From 313d4b8ffd1bb741a3f4ab7b883b71e4913c8c5d Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 8 Jan 2020 23:47:40 -0800 Subject: [PATCH 094/179] fix test on Windows in azure pipelines --- azure-pipelines.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 9da5ffa5..ad5f42cb 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -29,10 +29,10 @@ jobs: - task: MSBuild@1 inputs: solution: $(BuildType)/libmimalloc.sln - - displayName: CTest - script: | + - script: | cd $(BuildType) ctest + displayName: CTest - upload: $(Build.SourcesDirectory)/$(BuildType) artifact: mimalloc-windows-$(BuildType) From be10ebea35652e7cde14c42a8a9ab972efaafb9c Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 8 Jan 2020 23:54:56 -0800 Subject: [PATCH 095/179] build debug and secure versions on macOS in Azure pipelines --- azure-pipelines.yml | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index ad5f42cb..f88b2e1a 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -21,6 +21,9 @@ jobs: Release: BuildType: release cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release + Secure: + BuildType: secure + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON steps: - task: CMake@1 inputs: @@ -32,7 +35,7 @@ jobs: - script: | cd $(BuildType) ctest - displayName: CTest + displayName: CTest - upload: $(Build.SourcesDirectory)/$(BuildType) artifact: mimalloc-windows-$(BuildType) @@ -73,19 +76,15 @@ jobs: CXX: clang++ BuildType: secure-clang cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON - steps: - task: CMake@1 inputs: workingDirectory: $(BuildType) cmakeArgs: .. $(cmakeExtraArgs) - - script: make -j$(nproc) -C $(BuildType) displayName: Make - - script: make test -C $(BuildType) - displayName: Ctest - + displayName: CTest - upload: $(Build.SourcesDirectory)/$(BuildType) artifact: mimalloc-ubuntu-$(BuildType) @@ -94,11 +93,25 @@ jobs: pool: vmImage: macOS-10.14 + strategy: + matrix: + Debug: + BuildType: debug + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON + Release: + BuildType: release + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release + Secure: + BuildType: secure + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON steps: - task: CMake@1 inputs: - workingDirectory: 'build' - cmakeArgs: .. - - script: make -j$(sysctl -n hw.ncpu) -C build - - upload: $(Build.SourcesDirectory)/build - artifact: mimalloc-macos + workingDirectory: $(BuildType) + cmakeArgs: .. $(cmakeExtraArgs) + - script: make -j$(sysctl -n hw.ncpu) -C $(BuildType) + displayName: Make + - script: make test -C $(BuildType) + displayName: CTest + - upload: $(Build.SourcesDirectory)/$(BuildType) + artifact: mimalloc-macos-$(BuildType) From 5f61a9e89673c6a361b4b34b4db258181e8e415b Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 9 Jan 2020 17:52:28 -0800 Subject: [PATCH 096/179] add mprotect error when the mmap limit might be reached in secure mode (see issue #77) --- src/os.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/os.c b/src/os.c index c9a04d27..b5bd0ad9 100644 --- a/src/os.c +++ b/src/os.c @@ -596,6 +596,18 @@ static void* mi_os_page_align_area_conservative(void* addr, size_t size, size_t* return mi_os_page_align_areax(true, addr, size, newsize); } +static void mi_mprotect_hint(int err) { +#if defined(MI_OS_USE_MMAP) && (MI_SECURE>=2) // guard page around every mimalloc page + if (err == ENOMEM) { + _mi_warning_message("the previous warning may have been caused by a low memory map limit.\n" + " On Linux this is controlled by the vm.max_map_count. For example:\n" + " > sudo sysctl -w vm.max_map_count=262144\n"); + } +#else + UNUSED(err); +#endif +} + // Commit/Decommit memory. // Usuelly commit is aligned liberal, while decommit is aligned conservative. // (but not for the reset version where we want commit to be conservative as well) @@ -644,6 +656,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ #endif if (err != 0) { _mi_warning_message("%s error: start: 0x%p, csize: 0x%x, err: %i\n", commit ? "commit" : "decommit", start, csize, err); + mi_mprotect_hint(err); } mi_assert_internal(err == 0); return (err == 0); @@ -762,6 +775,7 @@ static bool mi_os_protectx(void* addr, size_t size, bool protect) { #endif if (err != 0) { _mi_warning_message("mprotect error: start: 0x%p, csize: 0x%x, err: %i\n", start, csize, err); + mi_mprotect_hint(err); } return (err == 0); } From 65f4f5144bef1a7145ac95a147ac01c7751a9310 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 13 Jan 2020 17:06:25 -0800 Subject: [PATCH 097/179] fix out-of-bounds error in huge OS page bitmap --- src/arena.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/arena.c b/src/arena.c index b5d41a1a..7f1a1caf 100644 --- a/src/arena.c +++ b/src/arena.c @@ -282,10 +282,10 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec _mi_warning_message("failed to reserve %zu gb huge pages\n", pages); return ENOMEM; } - _mi_verbose_message("reserved %zu gb huge pages\n", pages_reserved); + _mi_verbose_message("reserved %zu gb huge pages (of the %zu gb requested)\n", pages_reserved, pages); size_t bcount = mi_block_count_of_size(hsize); - size_t fields = (bcount + MI_BITMAP_FIELD_BITS - 1) / MI_BITMAP_FIELD_BITS; + size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS); size_t asize = sizeof(mi_arena_t) + (2*fields*sizeof(mi_bitmap_field_t)); mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS? if (arena == NULL) { @@ -300,11 +300,12 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec arena->is_zero_init = true; arena->is_committed = true; arena->search_idx = 0; - arena->blocks_dirty = &arena->blocks_inuse[bcount]; + arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap arena->blocks_committed = NULL; // the bitmaps are already zero initialized due to os_alloc // just claim leftover blocks if needed - size_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount; + ptrdiff_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount; + mi_assert_internal(post >= 0); if (post > 0) { // don't use leftover bits at the end mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post); From 941c55ee42e1b3a14b27a1df1ceab3ebfcbcf46d Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 14 Jan 2020 21:47:18 -0800 Subject: [PATCH 098/179] wip: first implementation of page free list on segments for effecient delayed page reset --- include/mimalloc-types.h | 2 + src/init.c | 7 +- src/options.c | 2 +- src/segment.c | 307 +++++++++++++++++++++++++++++++-------- 4 files changed, 251 insertions(+), 67 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index da9bfbac..51306808 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -417,6 +417,8 @@ typedef struct mi_os_tld_s { typedef struct mi_segments_tld_s { mi_segment_queue_t small_free; // queue of segments with free small pages mi_segment_queue_t medium_free; // queue of segments with free medium pages + mi_page_queue_t small_pages_free; // page queue of free small pages + mi_page_queue_t medium_pages_free; // page queue of free medium pages size_t count; // current number of segments; size_t peak_count; // peak number of segments size_t current_size; // current size of all segments diff --git a/src/init.c b/src/init.c index 3df854cf..085a5011 100644 --- a/src/init.c +++ b/src/init.c @@ -105,9 +105,12 @@ mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; static mi_tld_t tld_main = { 0, false, &_mi_heap_main, - { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats, tld_main_os }, // segments + { { NULL, NULL }, {NULL ,NULL}, {NULL ,NULL, 0}, {NULL ,NULL, 0}, + 0, 0, 0, 0, 0, 0, NULL, + tld_main_stats, tld_main_os + }, // segments { 0, tld_main_stats }, // os - { MI_STATS_NULL } // stats + { MI_STATS_NULL } // stats }; #if MI_INTPTR_SIZE==8 diff --git a/src/options.c b/src/options.c index 0d3bd393..77205713 100644 --- a/src/options.c +++ b/src/options.c @@ -70,7 +70,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(page_reset) }, // reset pages on free { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed - { 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds + { 100, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output diff --git a/src/segment.c b/src/segment.c index 97859fa9..fb5ea0ec 100644 --- a/src/segment.c +++ b/src/segment.c @@ -43,7 +43,7 @@ static uint8_t* mi_segment_raw_page_start(const mi_segment_t* segment, const mi_ ----------------------------------------------------------- */ #if (MI_DEBUG>=3) -static bool mi_segment_queue_contains(const mi_segment_queue_t* queue, mi_segment_t* segment) { +static bool mi_segment_queue_contains(const mi_segment_queue_t* queue, const mi_segment_t* segment) { mi_assert_internal(segment != NULL); mi_segment_t* list = queue->first; while (list != NULL) { @@ -90,7 +90,7 @@ static mi_segment_queue_t* mi_segment_free_queue_of_kind(mi_page_kind_t kind, mi else return NULL; } -static mi_segment_queue_t* mi_segment_free_queue(mi_segment_t* segment, mi_segments_tld_t* tld) { +static mi_segment_queue_t* mi_segment_free_queue(const mi_segment_t* segment, mi_segments_tld_t* tld) { return mi_segment_free_queue_of_kind(segment->page_kind, tld); } @@ -113,7 +113,7 @@ static void mi_segment_insert_in_free_queue(mi_segment_t* segment, mi_segments_t ----------------------------------------------------------- */ #if (MI_DEBUG>=2) -static bool mi_segment_is_in_free_queue(mi_segment_t* segment, mi_segments_tld_t* tld) { +static bool mi_segment_is_in_free_queue(const mi_segment_t* segment, mi_segments_tld_t* tld) { mi_segment_queue_t* queue = mi_segment_free_queue(segment, tld); bool in_queue = (queue!=NULL && (segment->next != NULL || segment->prev != NULL || queue->first == segment)); if (in_queue) { @@ -123,7 +123,7 @@ static bool mi_segment_is_in_free_queue(mi_segment_t* segment, mi_segments_tld_t } #endif -static size_t mi_segment_page_size(mi_segment_t* segment) { +static size_t mi_segment_page_size(const mi_segment_t* segment) { if (segment->capacity > 1) { mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM); return ((size_t)1 << segment->page_shift); @@ -134,15 +134,39 @@ static size_t mi_segment_page_size(mi_segment_t* segment) { } } +static mi_page_queue_t* mi_segment_page_free_queue(mi_page_kind_t kind, mi_segments_tld_t* tld) { + if (kind==MI_PAGE_SMALL) return &tld->small_pages_free; + else if (kind==MI_PAGE_MEDIUM) return &tld->medium_pages_free; + else return NULL; +} + + #if (MI_DEBUG>=3) -static bool mi_segment_is_valid(mi_segment_t* segment) { +static bool mi_segment_page_free_contains(mi_page_kind_t kind, const mi_page_t* page, mi_segments_tld_t* tld) { + const mi_page_queue_t* const pq = mi_segment_page_free_queue(kind, tld); + if (pq == NULL) return false; + mi_page_t* p = pq->first; + while (p != NULL) { + if (p == page) return true; + p = p->next; + } + return false; +} + +static bool mi_segment_is_valid(const mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(segment != NULL); mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); mi_assert_internal(segment->used <= segment->capacity); mi_assert_internal(segment->abandoned <= segment->used); size_t nfree = 0; for (size_t i = 0; i < segment->capacity; i++) { - if (!segment->pages[i].segment_in_use) nfree++; + const mi_page_t* const page = &segment->pages[i]; + if (!page->segment_in_use) { + nfree++; + } + else { + mi_assert_expensive(!mi_segment_page_free_contains(segment->page_kind, page, tld)); + } } mi_assert_internal(nfree + segment->used == segment->capacity); mi_assert_internal(segment->thread_id == _mi_thread_id() || (segment->thread_id==0)); // or 0 @@ -152,6 +176,20 @@ static bool mi_segment_is_valid(mi_segment_t* segment) { } #endif +static bool mi_segment_page_free_not_in_queue(const mi_page_t* page, mi_segments_tld_t* tld) { + mi_page_kind_t kind = _mi_page_segment(page)->page_kind; + if (page->next != NULL || page->prev != NULL) { + mi_assert_internal(mi_segment_page_free_contains(kind, page, tld)); + return false; + } + if (kind > MI_PAGE_MEDIUM) return true; + // both next and prev are NULL, check for singleton list + const mi_page_queue_t* const pq = mi_segment_page_free_queue(kind, tld); + mi_assert_internal(pq!=NULL); + return (pq->first != page && pq->last != page); +} + + /* ----------------------------------------------------------- Guard pages ----------------------------------------------------------- */ @@ -232,6 +270,102 @@ static void mi_page_unreset(mi_segment_t* segment, mi_page_t* page, size_t size, } +/* ----------------------------------------------------------- + The free page queue +----------------------------------------------------------- */ + +static void mi_segment_page_free_set_expire(mi_page_t* page) { + *((intptr_t*)(&page->heap)) = _mi_clock_now() + mi_option_get(mi_option_reset_delay); +} + +static mi_msecs_t mi_segment_page_free_get_expire(mi_page_t* page) { + return *((intptr_t*)(&page->heap)); +} + +static void mi_segment_page_free_add(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { + mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM); + mi_assert_internal(!page->segment_in_use); + mi_assert_internal(_mi_page_segment(page) == segment); + mi_assert_internal(mi_segment_page_free_not_in_queue(page,tld)); + mi_assert_expensive(!mi_segment_page_free_contains(segment->page_kind, page, tld)); + mi_page_queue_t* pq = mi_segment_page_free_queue(segment->page_kind, tld); + // push on top + mi_segment_page_free_set_expire(page); + page->next = pq->first; + page->prev = NULL; + if (pq->first == NULL) { + mi_assert_internal(pq->last == NULL); + pq->first = pq->last = page; + } + else { + pq->first->prev = page; + pq->first = page; + } +} + +static void mi_segment_page_free_remove(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { + if (segment->page_kind > MI_PAGE_MEDIUM) return; + if (mi_segment_page_free_not_in_queue(page,tld)) return; + + mi_page_queue_t* pq = mi_segment_page_free_queue(segment->page_kind, tld); + mi_assert_internal(pq!=NULL); + mi_assert_internal(_mi_page_segment(page)==segment); + mi_assert_internal(!page->segment_in_use); + mi_assert_internal(mi_segment_page_free_contains(segment->page_kind, page, tld)); + if (page->prev != NULL) page->prev->next = page->next; + if (page->next != NULL) page->next->prev = page->prev; + if (page == pq->last) pq->last = page->prev; + if (page == pq->first) pq->first = page->next; + page->next = page->prev = NULL; + page->heap = NULL; +} + +static void mi_segment_page_free_remove_all(mi_segment_t* segment, mi_segments_tld_t* tld) { + if (segment->page_kind > MI_PAGE_MEDIUM) return; + for (size_t i = 0; i < segment->capacity; i++) { + mi_page_t* page = &segment->pages[i]; + if (!page->segment_in_use) { + mi_segment_page_free_remove(segment, page, tld); + } + } +} + +static mi_page_t* mi_segment_page_free_top(mi_page_kind_t kind, mi_segments_tld_t* tld) { + mi_assert_internal(kind <= MI_PAGE_MEDIUM); + mi_page_queue_t* pq = mi_segment_page_free_queue(kind, tld); + return pq->first; +} + +static void mi_segment_page_free_reset_delayedx(mi_msecs_t now, mi_page_kind_t kind, mi_segments_tld_t* tld) { + mi_page_queue_t* pq = mi_segment_page_free_queue(kind, tld); + mi_assert_internal(pq != NULL); + mi_page_t* page = pq->last; + while (page != NULL && (now - mi_segment_page_free_get_expire(page)) >= 0) { + mi_page_t* const prev = page->prev; + mi_page_reset(_mi_page_segment(page), page, 0, tld); + page->heap = NULL; + page->prev = page->next = NULL; + page = prev; + } + pq->last = page; + if (page != NULL){ + page->next = NULL; + } + else { + pq->first = NULL; + } +} + +static void mi_segment_page_free_reset_delayed(mi_segments_tld_t* tld) { + if (!mi_option_is_enabled(mi_option_page_reset)) return; + mi_msecs_t now = _mi_clock_now(); + mi_segment_page_free_reset_delayedx(now, MI_PAGE_SMALL, tld); + mi_segment_page_free_reset_delayedx(now, MI_PAGE_MEDIUM, tld); +} + + + + /* ----------------------------------------------------------- Segment size calculations ----------------------------------------------------------- */ @@ -407,6 +541,10 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) { } mi_assert_internal(tld->cache_count == 0); mi_assert_internal(tld->cache == NULL); + mi_assert_internal(tld->small_pages_free.first == NULL); + mi_assert_internal(tld->medium_pages_free.first == NULL); + mi_assert_internal(tld->small_free.first == NULL); + mi_assert_internal(tld->medium_free.first == NULL); } @@ -532,9 +670,9 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) { - UNUSED(force); - //fprintf(stderr,"mimalloc: free segment at %p\n", (void*)segment); + UNUSED(force); mi_assert(segment != NULL); + mi_segment_page_free_remove_all(segment, tld); mi_segment_remove_from_free_queue(segment,tld); mi_assert_expensive(!mi_segment_queue_contains(&tld->small_free, segment)); @@ -561,37 +699,38 @@ static bool mi_segment_has_free(const mi_segment_t* segment) { return (segment->used < segment->capacity); } -static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t* tld) { - mi_assert_internal(mi_segment_has_free(segment)); - mi_assert_expensive(mi_segment_is_valid(segment)); - for (size_t i = 0; i < segment->capacity; i++) { - mi_page_t* page = &segment->pages[i]; - if (!page->segment_in_use) { - // set in-use before doing unreset to prevent delayed reset - page->segment_in_use = true; - segment->used++; - if (!page->is_committed) { - mi_assert_internal(!segment->mem_is_fixed); - mi_assert_internal(!page->is_reset); - page->is_committed = true; - if (segment->page_kind < MI_PAGE_LARGE || !mi_option_is_enabled(mi_option_eager_page_commit)) { - size_t psize; - uint8_t* start = mi_segment_raw_page_start(segment, page, &psize); - bool is_zero = false; - const size_t gsize = (MI_SECURE >= 2 ? _mi_os_page_size() : 0); - _mi_mem_commit(start, psize + gsize, &is_zero, tld->os); - if (gsize > 0) { mi_segment_protect_range(start + psize, gsize, true); } - if (is_zero) { page->is_zero_init = true; } - } - } - if (page->is_reset) { - mi_page_unreset(segment, page, 0, tld); // todo: only unreset the part that was reset? - } - return page; +static void mi_segment_page_claim(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { + mi_assert_internal(_mi_page_segment(page) == segment); + mi_assert_internal(!page->segment_in_use); + // set in-use before doing unreset to prevent delayed reset + mi_segment_page_free_remove(segment, page, tld); + page->segment_in_use = true; + segment->used++; + if (!page->is_committed) { + mi_assert_internal(!segment->mem_is_fixed); + mi_assert_internal(!page->is_reset); + page->is_committed = true; + if (segment->page_kind < MI_PAGE_LARGE + || !mi_option_is_enabled(mi_option_eager_page_commit)) { + size_t psize; + uint8_t* start = mi_segment_raw_page_start(segment, page, &psize); + bool is_zero = false; + const size_t gsize = (MI_SECURE >= 2 ? _mi_os_page_size() : 0); + _mi_mem_commit(start, psize + gsize, &is_zero, tld->os); + if (gsize > 0) { mi_segment_protect_range(start + psize, gsize, true); } + if (is_zero) { page->is_zero_init = true; } } } - mi_assert(false); - return NULL; + if (page->is_reset) { + mi_page_unreset(segment, page, 0, tld); // todo: only unreset the part that was reset? + } + mi_assert_internal(page->segment_in_use); + mi_assert_internal(segment->used <= segment->capacity); + if (segment->used == segment->capacity && segment->page_kind <= MI_PAGE_MEDIUM) { + // if no more free pages, remove from the queue + mi_assert_internal(!mi_segment_has_free(segment)); + mi_segment_remove_from_free_queue(segment, tld); + } } @@ -605,6 +744,7 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg mi_assert_internal(page->segment_in_use); mi_assert_internal(mi_page_all_free(page)); mi_assert_internal(page->is_committed); + mi_assert_internal(mi_segment_page_free_not_in_queue(page, tld)); size_t inuse = page->capacity * page->block_size; _mi_stat_decrease(&tld->stats->page_committed, inuse); _mi_stat_decrease(&tld->stats->pages, 1); @@ -619,19 +759,27 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg // reset the page memory to reduce memory pressure? // note: must come after setting `segment_in_use` to false but before block_size becomes 0 - mi_page_reset(segment, page, 0 /*used_size*/, tld); + //mi_page_reset(segment, page, 0 /*used_size*/, tld); - // zero the page data, but not the segment fields + // zero the page data, but not the segment fields and block_size (for page size calculations) + size_t block_size = page->block_size; ptrdiff_t ofs = offsetof(mi_page_t,capacity); memset((uint8_t*)page + ofs, 0, sizeof(*page) - ofs); + page->block_size = block_size; segment->used--; + + // add to the free page list for reuse/reset + if (segment->page_kind <= MI_PAGE_MEDIUM) { + mi_segment_page_free_add(segment, page, tld); + } } void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) { mi_assert(page != NULL); mi_segment_t* segment = _mi_page_segment(page); - mi_assert_expensive(mi_segment_is_valid(segment)); + mi_assert_expensive(mi_segment_is_valid(segment,tld)); + mi_segment_page_free_reset_delayed(tld); // mark it as free now mi_segment_page_clear(segment, page, tld); @@ -690,10 +838,12 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(segment->used == segment->abandoned); mi_assert_internal(segment->used > 0); mi_assert_internal(segment->abandoned_next == NULL); - mi_assert_expensive(mi_segment_is_valid(segment)); + mi_assert_expensive(mi_segment_is_valid(segment,tld)); // remove the segment from the free page queue if needed - mi_segment_remove_from_free_queue(segment,tld); + mi_segment_page_free_reset_delayed(tld); + mi_segment_page_free_remove_all(segment, tld); + mi_segment_remove_from_free_queue(segment, tld); mi_assert_internal(segment->next == NULL && segment->prev == NULL); // all pages in the segment are abandoned; add it to the abandoned list @@ -708,7 +858,8 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { mi_assert(page != NULL); mi_segment_t* segment = _mi_page_segment(page); - mi_assert_expensive(mi_segment_is_valid(segment)); + mi_assert_expensive(!mi_segment_page_free_contains(segment->page_kind, page, tld)); + mi_assert_expensive(mi_segment_is_valid(segment,tld)); segment->abandoned++; _mi_stat_increase(&tld->stats->pages_abandoned, 1); mi_assert_internal(segment->abandoned <= segment->used); @@ -755,7 +906,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen segment->abandoned_next = NULL; mi_segments_track_size((long)segment->segment_size,tld); mi_assert_internal(segment->next == NULL && segment->prev == NULL); - mi_assert_expensive(mi_segment_is_valid(segment)); + mi_assert_expensive(mi_segment_is_valid(segment,tld)); _mi_stat_decrease(&tld->stats->segments_abandoned,1); // add its abandoned pages to the current thread @@ -765,6 +916,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen if (page->segment_in_use) { mi_assert_internal(!page->is_reset); mi_assert_internal(page->is_committed); + mi_assert_internal(mi_segment_page_free_not_in_queue(page, tld)); segment->abandoned--; mi_assert(page->next == NULL); _mi_stat_decrease(&tld->stats->pages_abandoned, 1); @@ -801,30 +953,55 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen Small page allocation ----------------------------------------------------------- */ -// Allocate a small page inside a segment. -// Requires that the page has free pages -static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tld_t* tld) { + +static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(mi_segment_has_free(segment)); - mi_page_t* page = mi_segment_find_free(segment, tld); - mi_assert_internal(page->segment_in_use); - mi_assert_internal(segment->used <= segment->capacity); - if (segment->used == segment->capacity) { - // if no more free pages, remove from the queue - mi_assert_internal(!mi_segment_has_free(segment)); - mi_segment_remove_from_free_queue(segment,tld); + mi_assert_expensive(mi_segment_is_valid(segment, tld)); + for (size_t i = 0; i < segment->capacity; i++) { + mi_page_t* page = &segment->pages[i]; + if (!page->segment_in_use) { + mi_segment_page_claim(segment, page, tld); + return page; + } } - return page; + mi_assert(false); + return NULL; } -static mi_page_t* mi_segment_page_alloc(mi_page_kind_t kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { - mi_segment_queue_t* free_queue = mi_segment_free_queue_of_kind(kind,tld); - if (mi_segment_queue_is_empty(free_queue)) { - mi_segment_t* segment = mi_segment_alloc(0,kind,page_shift,tld,os_tld); - if (segment == NULL) return NULL; - mi_segment_enqueue(free_queue, segment); + +// Allocate a page inside a segment. Requires that the page has free pages +static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tld_t* tld) { + mi_assert_internal(mi_segment_has_free(segment)); + return mi_segment_find_free(segment, tld); +} + +static mi_page_t* mi_segment_page_alloc(mi_page_kind_t kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { + mi_page_t* page = NULL; + mi_segment_queue_t* const free_queue = mi_segment_free_queue_of_kind(kind, tld); + if (free_queue->first != NULL && free_queue->first->used < free_queue->first->capacity) { + // prefer to allocate from an available segment + // (to allow more chance of other segments to become completely freed) + page = mi_segment_page_alloc_in(free_queue->first, tld); } - mi_assert_internal(free_queue->first != NULL); - mi_page_t* page = mi_segment_page_alloc_in(free_queue->first,tld); + else { + // otherwise try to pop from the page free list + page = mi_segment_page_free_top(kind, tld); + if (page != NULL) { + mi_segment_page_claim(_mi_page_segment(page), page, tld); + } + else { + // if that failed, find an available segment the segment free queue again + if (mi_segment_queue_is_empty(free_queue)) { + // possibly allocate a fresh segment + mi_segment_t* segment = mi_segment_alloc(0, kind, page_shift, tld, os_tld); + if (segment == NULL) return NULL; // return NULL if out-of-memory + mi_segment_enqueue(free_queue, segment); + } + mi_assert_internal(free_queue->first != NULL); + page = mi_segment_page_alloc_in(free_queue->first, tld); + } + } + mi_assert_internal(page != NULL); #if MI_DEBUG>=2 _mi_segment_page_start(_mi_page_segment(page), page, sizeof(void*), NULL, NULL)[0] = 0; #endif @@ -883,7 +1060,9 @@ mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_ else { page = mi_segment_huge_page_alloc(block_size,tld,os_tld); } - mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page))); + mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld)); mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size); + mi_segment_page_free_reset_delayed(tld); + mi_assert_internal(mi_segment_page_free_not_in_queue(page, tld)); return page; } From f92a2a72649b568a7d359f6b05f315c2919bc8c8 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 15 Jan 2020 10:18:32 -0800 Subject: [PATCH 099/179] add argument pointer to the register output routine --- include/mimalloc.h | 4 ++-- src/options.c | 37 ++++++++++++++++++++++--------------- 2 files changed, 24 insertions(+), 17 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 94d9edfc..08af2eb9 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -111,8 +111,8 @@ mi_decl_export size_t mi_good_size(size_t size) mi_attr_noexcept; typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat); mi_decl_export void mi_register_deferred_free(mi_deferred_free_fun* deferred_free) mi_attr_noexcept; -typedef void (mi_output_fun)(const char* msg); -mi_decl_export void mi_register_output(mi_output_fun* out) mi_attr_noexcept; +typedef void (mi_output_fun)(const char* msg, void* arg); +mi_decl_export void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept; mi_decl_export void mi_collect(bool force) mi_attr_noexcept; mi_decl_export int mi_version(void) mi_attr_noexcept; diff --git a/src/options.c b/src/options.c index 0d3bd393..ed1237d1 100644 --- a/src/options.c +++ b/src/options.c @@ -140,7 +140,8 @@ void mi_option_disable(mi_option_t option) { } -static void mi_out_stderr(const char* msg) { +static void mi_out_stderr(const char* msg, void* arg) { + UNUSED(arg); #ifdef _WIN32 // on windows with redirection, the C runtime cannot handle locale dependent output // after the main thread closes so we use direct console output. @@ -160,7 +161,8 @@ static void mi_out_stderr(const char* msg) { static char out_buf[MI_MAX_DELAY_OUTPUT+1]; static _Atomic(uintptr_t) out_len; -static void mi_out_buf(const char* msg) { +static void mi_out_buf(const char* msg, void* arg) { + UNUSED(arg); if (msg==NULL) return; if (mi_atomic_read_relaxed(&out_len)>=MI_MAX_DELAY_OUTPUT) return; size_t n = strlen(msg); @@ -175,14 +177,14 @@ static void mi_out_buf(const char* msg) { memcpy(&out_buf[start], msg, n); } -static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf) { +static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf, void* arg) { if (out==NULL) return; // claim (if `no_more_buf == true`, no more output will be added after this point) size_t count = mi_atomic_addu(&out_len, (no_more_buf ? MI_MAX_DELAY_OUTPUT : 1)); // and output the current contents if (count>MI_MAX_DELAY_OUTPUT) count = MI_MAX_DELAY_OUTPUT; out_buf[count] = 0; - out(out_buf); + out(out_buf,arg); if (!no_more_buf) { out_buf[count] = '\n'; // if continue with the buffer, insert a newline } @@ -191,9 +193,9 @@ static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf) { // Once this module is loaded, switch to this routine // which outputs to stderr and the delayed output buffer. -static void mi_out_buf_stderr(const char* msg) { - mi_out_stderr(msg); - mi_out_buf(msg); +static void mi_out_buf_stderr(const char* msg, void* arg) { + mi_out_stderr(msg,arg); + mi_out_buf(msg,arg); } @@ -206,21 +208,25 @@ static void mi_out_buf_stderr(const char* msg) { // For now, don't register output from multiple threads. #pragma warning(suppress:4180) static mi_output_fun* volatile mi_out_default; // = NULL +static volatile _Atomic(void*) mi_out_arg; // = NULL -static mi_output_fun* mi_out_get_default(void) { +static mi_output_fun* mi_out_get_default(void** parg) { + if (parg != NULL) { *parg = mi_atomic_read_ptr(&mi_out_arg); } mi_output_fun* out = mi_out_default; return (out == NULL ? &mi_out_buf : out); } -void mi_register_output(mi_output_fun* out) mi_attr_noexcept { +void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept { mi_out_default = (out == NULL ? &mi_out_stderr : out); // stop using the delayed output buffer - if (out!=NULL) mi_out_buf_flush(out,true); // output all the delayed output now + mi_atomic_write_ptr(&mi_out_arg, arg); + if (out!=NULL) mi_out_buf_flush(out,true,arg); // output all the delayed output now } // add stderr to the delayed output after the module is loaded static void mi_add_stderr_output() { - mi_out_buf_flush(&mi_out_stderr, false); // flush current contents to stderr - mi_out_default = &mi_out_buf_stderr; // and add stderr to the delayed output + mi_assert_internal(mi_out_default == NULL); + mi_out_buf_flush(&mi_out_stderr, false, NULL); // flush current contents to stderr + mi_out_default = &mi_out_buf_stderr; // and add stderr to the delayed output } // -------------------------------------------------------- @@ -234,10 +240,11 @@ static mi_decl_thread bool recurse = false; void _mi_fputs(mi_output_fun* out, const char* prefix, const char* message) { if (recurse) return; - if (out==NULL || (FILE*)out==stdout || (FILE*)out==stderr) out = mi_out_get_default(); + void* arg = NULL; + if (out==NULL || (FILE*)out==stdout || (FILE*)out==stderr) out = mi_out_get_default(&arg); recurse = true; - if (prefix != NULL) out(prefix); - out(message); + if (prefix != NULL) out(prefix,arg); + out(message,arg); recurse = false; return; } From 0956a05bf6fc731e811a8696364caffd5b7e6da3 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 15 Jan 2020 10:21:32 -0800 Subject: [PATCH 100/179] add argument pointer to the register deferred free callback --- include/mimalloc.h | 4 ++-- src/page.c | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 08af2eb9..1f6f1ef7 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -108,8 +108,8 @@ mi_decl_export mi_decl_allocator void* mi_reallocf(void* p, size_t newsize) mi_decl_export size_t mi_usable_size(const void* p) mi_attr_noexcept; mi_decl_export size_t mi_good_size(size_t size) mi_attr_noexcept; -typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat); -mi_decl_export void mi_register_deferred_free(mi_deferred_free_fun* deferred_free) mi_attr_noexcept; +typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat, void* arg); +mi_decl_export void mi_register_deferred_free(mi_deferred_free_fun* deferred_free, void* arg) mi_attr_noexcept; typedef void (mi_output_fun)(const char* msg, void* arg); mi_decl_export void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept; diff --git a/src/page.c b/src/page.c index 7491bd61..6a6e09d6 100644 --- a/src/page.c +++ b/src/page.c @@ -764,18 +764,20 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) { ----------------------------------------------------------- */ static mi_deferred_free_fun* volatile deferred_free = NULL; +static volatile _Atomic(void*) deferred_arg; // = NULL void _mi_deferred_free(mi_heap_t* heap, bool force) { heap->tld->heartbeat++; if (deferred_free != NULL && !heap->tld->recurse) { heap->tld->recurse = true; - deferred_free(force, heap->tld->heartbeat); + deferred_free(force, heap->tld->heartbeat, mi_atomic_read_ptr_relaxed(&deferred_arg)); heap->tld->recurse = false; } } -void mi_register_deferred_free(mi_deferred_free_fun* fn) mi_attr_noexcept { +void mi_register_deferred_free(mi_deferred_free_fun* fn, void* arg) mi_attr_noexcept { deferred_free = fn; + mi_atomic_write_ptr(&deferred_arg, arg); } From 783e3377f79ee82af43a0793910a9f2d01ac7863 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 15 Jan 2020 10:53:54 -0800 Subject: [PATCH 101/179] add output argument to stat printing --- include/mimalloc-internal.h | 4 +- include/mimalloc.h | 5 +- src/init.c | 2 +- src/options.c | 27 +++--- src/stats.c | 160 ++++++++++++++++++------------------ test/main-override-static.c | 4 +- test/test-stress.c | 4 +- 7 files changed, 104 insertions(+), 102 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 3042e6f9..d5ce9f59 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -33,8 +33,8 @@ terms of the MIT license. A copy of the license can be found in the file // "options.c" -void _mi_fputs(mi_output_fun* out, const char* prefix, const char* message); -void _mi_fprintf(mi_output_fun* out, const char* fmt, ...); +void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message); +void _mi_fprintf(mi_output_fun* out, void* arg, const char* fmt, ...); void _mi_error_message(const char* fmt, ...); void _mi_warning_message(const char* fmt, ...); void _mi_verbose_message(const char* fmt, ...); diff --git a/include/mimalloc.h b/include/mimalloc.h index 1f6f1ef7..51d96609 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -118,12 +118,13 @@ mi_decl_export void mi_collect(bool force) mi_attr_noexcept; mi_decl_export int mi_version(void) mi_attr_noexcept; mi_decl_export void mi_stats_reset(void) mi_attr_noexcept; mi_decl_export void mi_stats_merge(void) mi_attr_noexcept; -mi_decl_export void mi_stats_print(mi_output_fun* out) mi_attr_noexcept; +mi_decl_export void mi_stats_print(void* out) mi_attr_noexcept; // backward compatibility: `out` is ignored and should be NULL +mi_decl_export void mi_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept; mi_decl_export void mi_process_init(void) mi_attr_noexcept; mi_decl_export void mi_thread_init(void) mi_attr_noexcept; mi_decl_export void mi_thread_done(void) mi_attr_noexcept; -mi_decl_export void mi_thread_stats_print(mi_output_fun* out) mi_attr_noexcept; +mi_decl_export void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept; // ------------------------------------------------------------------------------------- diff --git a/src/init.c b/src/init.c index 3df854cf..79e1e044 100644 --- a/src/init.c +++ b/src/init.c @@ -390,7 +390,7 @@ static void mi_process_load(void) { const char* msg = NULL; mi_allocator_init(&msg); if (msg != NULL && (mi_option_is_enabled(mi_option_verbose) || mi_option_is_enabled(mi_option_show_errors))) { - _mi_fputs(NULL,NULL,msg); + _mi_fputs(NULL,NULL,NULL,msg); } } diff --git a/src/options.c b/src/options.c index ed1237d1..017b9d59 100644 --- a/src/options.c +++ b/src/options.c @@ -238,10 +238,11 @@ static volatile _Atomic(uintptr_t) error_count; // = 0; // when MAX_ERROR_COUNT // inside the C runtime causes another message. static mi_decl_thread bool recurse = false; -void _mi_fputs(mi_output_fun* out, const char* prefix, const char* message) { +void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message) { if (recurse) return; - void* arg = NULL; - if (out==NULL || (FILE*)out==stdout || (FILE*)out==stderr) out = mi_out_get_default(&arg); + if (out==NULL || (FILE*)out==stdout || (FILE*)out==stderr) { // TODO: use mi_out_stderr for stderr? + out = mi_out_get_default(&arg); + } recurse = true; if (prefix != NULL) out(prefix,arg); out(message,arg); @@ -251,21 +252,21 @@ void _mi_fputs(mi_output_fun* out, const char* prefix, const char* message) { // Define our own limited `fprintf` that avoids memory allocation. // We do this using `snprintf` with a limited buffer. -static void mi_vfprintf( mi_output_fun* out, const char* prefix, const char* fmt, va_list args ) { +static void mi_vfprintf( mi_output_fun* out, void* arg, const char* prefix, const char* fmt, va_list args ) { char buf[512]; if (fmt==NULL) return; if (recurse) return; recurse = true; vsnprintf(buf,sizeof(buf)-1,fmt,args); recurse = false; - _mi_fputs(out,prefix,buf); + _mi_fputs(out,arg,prefix,buf); } -void _mi_fprintf( mi_output_fun* out, const char* fmt, ... ) { +void _mi_fprintf( mi_output_fun* out, void* arg, const char* fmt, ... ) { va_list args; va_start(args,fmt); - mi_vfprintf(out,NULL,fmt,args); + mi_vfprintf(out,arg,NULL,fmt,args); va_end(args); } @@ -273,7 +274,7 @@ void _mi_trace_message(const char* fmt, ...) { if (mi_option_get(mi_option_verbose) <= 1) return; // only with verbose level 2 or higher va_list args; va_start(args, fmt); - mi_vfprintf(NULL, "mimalloc: ", fmt, args); + mi_vfprintf(NULL, NULL, "mimalloc: ", fmt, args); va_end(args); } @@ -281,7 +282,7 @@ void _mi_verbose_message(const char* fmt, ...) { if (!mi_option_is_enabled(mi_option_verbose)) return; va_list args; va_start(args,fmt); - mi_vfprintf(NULL, "mimalloc: ", fmt, args); + mi_vfprintf(NULL, NULL, "mimalloc: ", fmt, args); va_end(args); } @@ -290,7 +291,7 @@ void _mi_error_message(const char* fmt, ...) { if (mi_atomic_increment(&error_count) > mi_max_error_count) return; va_list args; va_start(args,fmt); - mi_vfprintf(NULL, "mimalloc: error: ", fmt, args); + mi_vfprintf(NULL, NULL, "mimalloc: error: ", fmt, args); va_end(args); mi_assert(false); } @@ -300,14 +301,14 @@ void _mi_warning_message(const char* fmt, ...) { if (mi_atomic_increment(&error_count) > mi_max_error_count) return; va_list args; va_start(args,fmt); - mi_vfprintf(NULL, "mimalloc: warning: ", fmt, args); + mi_vfprintf(NULL, NULL, "mimalloc: warning: ", fmt, args); va_end(args); } #if MI_DEBUG void _mi_assert_fail(const char* assertion, const char* fname, unsigned line, const char* func ) { - _mi_fprintf(NULL,"mimalloc: assertion failed: at \"%s\":%u, %s\n assertion: \"%s\"\n", fname, line, (func==NULL?"":func), assertion); + _mi_fprintf(NULL, NULL, "mimalloc: assertion failed: at \"%s\":%u, %s\n assertion: \"%s\"\n", fname, line, (func==NULL?"":func), assertion); abort(); } #endif @@ -315,7 +316,7 @@ void _mi_assert_fail(const char* assertion, const char* fname, unsigned line, co mi_attr_noreturn void _mi_fatal_error(const char* fmt, ...) { va_list args; va_start(args, fmt); - mi_vfprintf(NULL, "mimalloc: fatal: ", fmt, args); + mi_vfprintf(NULL, NULL, "mimalloc: fatal: ", fmt, args); va_end(args); #if (MI_SECURE>=0) abort(); diff --git a/src/stats.c b/src/stats.c index cb6d8866..57599821 100644 --- a/src/stats.c +++ b/src/stats.c @@ -126,7 +126,7 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) { // unit > 0 : size in binary bytes // unit == 0: count as decimal // unit < 0 : count in binary -static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, const char* fmt) { +static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, void* arg, const char* fmt) { char buf[32]; int len = 32; const char* suffix = (unit <= 0 ? " " : "b"); @@ -147,75 +147,75 @@ static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, const const long frac1 = (long)(tens%10); snprintf(buf, len, "%ld.%ld %s%s", whole, frac1, magnitude, suffix); } - _mi_fprintf(out, (fmt==NULL ? "%11s" : fmt), buf); + _mi_fprintf(out, arg, (fmt==NULL ? "%11s" : fmt), buf); } -static void mi_print_amount(int64_t n, int64_t unit, mi_output_fun* out) { - mi_printf_amount(n,unit,out,NULL); +static void mi_print_amount(int64_t n, int64_t unit, mi_output_fun* out, void* arg) { + mi_printf_amount(n,unit,out,arg,NULL); } -static void mi_print_count(int64_t n, int64_t unit, mi_output_fun* out) { - if (unit==1) _mi_fprintf(out,"%11s"," "); - else mi_print_amount(n,0,out); +static void mi_print_count(int64_t n, int64_t unit, mi_output_fun* out, void* arg) { + if (unit==1) _mi_fprintf(out, arg, "%11s"," "); + else mi_print_amount(n,0,out,arg); } -static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out ) { - _mi_fprintf(out,"%10s:", msg); +static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg ) { + _mi_fprintf(out, arg,"%10s:", msg); if (unit>0) { - mi_print_amount(stat->peak, unit, out); - mi_print_amount(stat->allocated, unit, out); - mi_print_amount(stat->freed, unit, out); - mi_print_amount(unit, 1, out); - mi_print_count(stat->allocated, unit, out); + mi_print_amount(stat->peak, unit, out, arg); + mi_print_amount(stat->allocated, unit, out, arg); + mi_print_amount(stat->freed, unit, out, arg); + mi_print_amount(unit, 1, out, arg); + mi_print_count(stat->allocated, unit, out, arg); if (stat->allocated > stat->freed) - _mi_fprintf(out, " not all freed!\n"); + _mi_fprintf(out, arg, " not all freed!\n"); else - _mi_fprintf(out, " ok\n"); + _mi_fprintf(out, arg, " ok\n"); } else if (unit<0) { - mi_print_amount(stat->peak, -1, out); - mi_print_amount(stat->allocated, -1, out); - mi_print_amount(stat->freed, -1, out); + mi_print_amount(stat->peak, -1, out, arg); + mi_print_amount(stat->allocated, -1, out, arg); + mi_print_amount(stat->freed, -1, out, arg); if (unit==-1) { - _mi_fprintf(out, "%22s", ""); + _mi_fprintf(out, arg, "%22s", ""); } else { - mi_print_amount(-unit, 1, out); - mi_print_count((stat->allocated / -unit), 0, out); + mi_print_amount(-unit, 1, out, arg); + mi_print_count((stat->allocated / -unit), 0, out, arg); } if (stat->allocated > stat->freed) - _mi_fprintf(out, " not all freed!\n"); + _mi_fprintf(out, arg, " not all freed!\n"); else - _mi_fprintf(out, " ok\n"); + _mi_fprintf(out, arg, " ok\n"); } else { - mi_print_amount(stat->peak, 1, out); - mi_print_amount(stat->allocated, 1, out); - _mi_fprintf(out, "\n"); + mi_print_amount(stat->peak, 1, out, arg); + mi_print_amount(stat->allocated, 1, out, arg); + _mi_fprintf(out, arg, "\n"); } } -static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out ) { - _mi_fprintf(out, "%10s:", msg); - mi_print_amount(stat->total, -1, out); - _mi_fprintf(out, "\n"); +static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg ) { + _mi_fprintf(out, arg, "%10s:", msg); + mi_print_amount(stat->total, -1, out, arg); + _mi_fprintf(out, arg, "\n"); } -static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out) { +static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg) { const int64_t avg_tens = (stat->count == 0 ? 0 : (stat->total*10 / stat->count)); const long avg_whole = (long)(avg_tens/10); const long avg_frac1 = (long)(avg_tens%10); - _mi_fprintf(out, "%10s: %5ld.%ld avg\n", msg, avg_whole, avg_frac1); + _mi_fprintf(out, arg, "%10s: %5ld.%ld avg\n", msg, avg_whole, avg_frac1); } -static void mi_print_header(mi_output_fun* out ) { - _mi_fprintf(out,"%10s: %10s %10s %10s %10s %10s\n", "heap stats", "peak ", "total ", "freed ", "unit ", "count "); +static void mi_print_header(mi_output_fun* out, void* arg ) { + _mi_fprintf(out, arg, "%10s: %10s %10s %10s %10s %10s\n", "heap stats", "peak ", "total ", "freed ", "unit ", "count "); } #if MI_STAT>1 -static void mi_stats_print_bins(mi_stat_count_t* all, const mi_stat_count_t* bins, size_t max, const char* fmt, mi_output_fun* out) { +static void mi_stats_print_bins(mi_stat_count_t* all, const mi_stat_count_t* bins, size_t max, const char* fmt, mi_output_fun* out, void* arg) { bool found = false; char buf[64]; for (size_t i = 0; i <= max; i++) { @@ -224,14 +224,14 @@ static void mi_stats_print_bins(mi_stat_count_t* all, const mi_stat_count_t* bin int64_t unit = _mi_bin_size((uint8_t)i); snprintf(buf, 64, "%s %3zu", fmt, i); mi_stat_add(all, &bins[i], unit); - mi_stat_print(&bins[i], buf, unit, out); + mi_stat_print(&bins[i], buf, unit, out, arg); } } //snprintf(buf, 64, "%s all", fmt); //mi_stat_print(all, buf, 1); if (found) { - _mi_fprintf(out, "\n"); - mi_print_header(out); + _mi_fprintf(out, arg, "\n"); + mi_print_header(out, arg); } } #endif @@ -239,40 +239,40 @@ static void mi_stats_print_bins(mi_stat_count_t* all, const mi_stat_count_t* bin static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit); -static void _mi_stats_print(mi_stats_t* stats, mi_msecs_t elapsed, mi_output_fun* out) mi_attr_noexcept { - mi_print_header(out); +static void _mi_stats_print(mi_stats_t* stats, mi_msecs_t elapsed, mi_output_fun* out, void* arg) mi_attr_noexcept { + mi_print_header(out,arg); #if MI_STAT>1 mi_stat_count_t normal = { 0,0,0,0 }; - mi_stats_print_bins(&normal, stats->normal, MI_BIN_HUGE, "normal",out); - mi_stat_print(&normal, "normal", 1, out); - mi_stat_print(&stats->huge, "huge", (stats->huge_count.count == 0 ? 1 : -(stats->huge.allocated / stats->huge_count.count)), out); - mi_stat_print(&stats->giant, "giant", (stats->giant_count.count == 0 ? 1 : -(stats->giant.allocated / stats->giant_count.count)), out); + mi_stats_print_bins(&normal, stats->normal, MI_BIN_HUGE, "normal",out,arg); + mi_stat_print(&normal, "normal", 1, out, arg); + mi_stat_print(&stats->huge, "huge", (stats->huge_count.count == 0 ? 1 : -(stats->huge.allocated / stats->huge_count.count)), out, arg); + mi_stat_print(&stats->giant, "giant", (stats->giant_count.count == 0 ? 1 : -(stats->giant.allocated / stats->giant_count.count)), out, arg); mi_stat_count_t total = { 0,0,0,0 }; mi_stat_add(&total, &normal, 1); mi_stat_add(&total, &stats->huge, 1); mi_stat_add(&total, &stats->giant, 1); - mi_stat_print(&total, "total", 1, out); - _mi_fprintf(out, "malloc requested: "); - mi_print_amount(stats->malloc.allocated, 1, out); - _mi_fprintf(out, "\n\n"); + mi_stat_print(&total, "total", 1, out, arg); + _mi_fprintf(out, arg, "malloc requested: "); + mi_print_amount(stats->malloc.allocated, 1, out, arg); + _mi_fprintf(out, arg, "\n\n"); #endif - mi_stat_print(&stats->reserved, "reserved", 1, out); - mi_stat_print(&stats->committed, "committed", 1, out); - mi_stat_print(&stats->reset, "reset", 1, out); - mi_stat_print(&stats->page_committed, "touched", 1, out); - mi_stat_print(&stats->segments, "segments", -1, out); - mi_stat_print(&stats->segments_abandoned, "-abandoned", -1, out); - mi_stat_print(&stats->segments_cache, "-cached", -1, out); - mi_stat_print(&stats->pages, "pages", -1, out); - mi_stat_print(&stats->pages_abandoned, "-abandoned", -1, out); - mi_stat_counter_print(&stats->pages_extended, "-extended", out); - mi_stat_counter_print(&stats->page_no_retire, "-noretire", out); - mi_stat_counter_print(&stats->mmap_calls, "mmaps", out); - mi_stat_counter_print(&stats->commit_calls, "commits", out); - mi_stat_print(&stats->threads, "threads", -1, out); - mi_stat_counter_print_avg(&stats->searches, "searches", out); - _mi_fprintf(out, "%10s: %7i\n", "numa nodes", _mi_os_numa_node_count()); - if (elapsed > 0) _mi_fprintf(out, "%10s: %7ld.%03ld s\n", "elapsed", elapsed/1000, elapsed%1000); + mi_stat_print(&stats->reserved, "reserved", 1, out, arg); + mi_stat_print(&stats->committed, "committed", 1, out, arg); + mi_stat_print(&stats->reset, "reset", 1, out, arg); + mi_stat_print(&stats->page_committed, "touched", 1, out, arg); + mi_stat_print(&stats->segments, "segments", -1, out, arg); + mi_stat_print(&stats->segments_abandoned, "-abandoned", -1, out, arg); + mi_stat_print(&stats->segments_cache, "-cached", -1, out, arg); + mi_stat_print(&stats->pages, "pages", -1, out, arg); + mi_stat_print(&stats->pages_abandoned, "-abandoned", -1, out, arg); + mi_stat_counter_print(&stats->pages_extended, "-extended", out, arg); + mi_stat_counter_print(&stats->page_no_retire, "-noretire", out, arg); + mi_stat_counter_print(&stats->mmap_calls, "mmaps", out, arg); + mi_stat_counter_print(&stats->commit_calls, "commits", out, arg); + mi_stat_print(&stats->threads, "threads", -1, out, arg); + mi_stat_counter_print_avg(&stats->searches, "searches", out, arg); + _mi_fprintf(out, arg, "%10s: %7i\n", "numa nodes", _mi_os_numa_node_count()); + if (elapsed > 0) _mi_fprintf(out, arg, "%10s: %7ld.%03ld s\n", "elapsed", elapsed/1000, elapsed%1000); mi_msecs_t user_time; mi_msecs_t sys_time; @@ -281,13 +281,13 @@ static void _mi_stats_print(mi_stats_t* stats, mi_msecs_t elapsed, mi_output_fun size_t page_reclaim; size_t peak_commit; mi_process_info(&user_time, &sys_time, &peak_rss, &page_faults, &page_reclaim, &peak_commit); - _mi_fprintf(out,"%10s: user: %ld.%03ld s, system: %ld.%03ld s, faults: %lu, reclaims: %lu, rss: ", "process", user_time/1000, user_time%1000, sys_time/1000, sys_time%1000, (unsigned long)page_faults, (unsigned long)page_reclaim ); - mi_printf_amount((int64_t)peak_rss, 1, out, "%s"); + _mi_fprintf(out, arg, "%10s: user: %ld.%03ld s, system: %ld.%03ld s, faults: %lu, reclaims: %lu, rss: ", "process", user_time/1000, user_time%1000, sys_time/1000, sys_time%1000, (unsigned long)page_faults, (unsigned long)page_reclaim ); + mi_printf_amount((int64_t)peak_rss, 1, out, arg, "%s"); if (peak_commit > 0) { - _mi_fprintf(out,", commit charge: "); - mi_printf_amount((int64_t)peak_commit, 1, out, "%s"); + _mi_fprintf(out, arg, ", commit charge: "); + mi_printf_amount((int64_t)peak_commit, 1, out, arg, "%s"); } - _mi_fprintf(out,"\n"); + _mi_fprintf(out, arg, "\n"); } static mi_msecs_t mi_time_start; // = 0 @@ -319,20 +319,20 @@ void _mi_stats_done(mi_stats_t* stats) { // called from `mi_thread_done` mi_stats_merge_from(stats); } - -static void mi_stats_print_ex(mi_stats_t* stats, mi_msecs_t elapsed, mi_output_fun* out) { - mi_stats_merge_from(stats); - _mi_stats_print(&_mi_stats_main, elapsed, out); +void mi_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept { + mi_msecs_t elapsed = _mi_clock_end(mi_time_start); + mi_stats_merge_from(mi_stats_get_default()); + _mi_stats_print(&_mi_stats_main, elapsed, out, arg); } -void mi_stats_print(mi_output_fun* out) mi_attr_noexcept { - mi_msecs_t elapsed = _mi_clock_end(mi_time_start); - mi_stats_print_ex(mi_stats_get_default(),elapsed,out); +void mi_stats_print(void* out) mi_attr_noexcept { + // for compatibility there is an `out` parameter (which can be `stdout` or `stderr`) + mi_stats_print_out((mi_output_fun*)out, NULL); } -void mi_thread_stats_print(mi_output_fun* out) mi_attr_noexcept { +void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept { mi_msecs_t elapsed = _mi_clock_end(mi_time_start); - _mi_stats_print(mi_stats_get_default(), elapsed, out); + _mi_stats_print(mi_stats_get_default(), elapsed, out, arg); } diff --git a/test/main-override-static.c b/test/main-override-static.c index b04bfeef..54a5ea66 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -13,7 +13,7 @@ static void corrupt_free(); int main() { mi_version(); - + // detect double frees and heap corruption // double_free1(); // double_free2(); @@ -106,4 +106,4 @@ static void corrupt_free() { for (int i = 0; i < 4096; i++) { malloc(SZ); } -} \ No newline at end of file +} diff --git a/test/test-stress.c b/test/test-stress.c index d295f741..42628d7c 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -119,7 +119,7 @@ static void free_items(void* p) { static void stress(intptr_t tid) { //bench_start_thread(); uintptr_t r = tid * 43; - const size_t max_item_shift = 5; // 128 + const size_t max_item_shift = 5; // 128 const size_t max_item_retained_shift = max_item_shift + 2; size_t allocs = 100 * ((size_t)SCALE) * (tid % 8 + 1); // some threads do more size_t retain = allocs / 2; @@ -135,7 +135,7 @@ static void stress(intptr_t tid) { allocs--; if (data_top >= data_size) { data_size += 100000; - data = (void**)custom_realloc(data, data_size * sizeof(void*)); + data = (void**)custom_realloc(data, data_size * sizeof(void*)); } data[data_top++] = alloc_items(1ULL << (pick(&r) % max_item_shift), &r); } From c9b5ac80b3a22a2456035651afcae1966ce6d3ee Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 15 Jan 2020 12:00:44 -0800 Subject: [PATCH 102/179] update page reset queue to just do delayed page resets --- include/mimalloc-types.h | 3 +- src/init.c | 2 +- src/options.c | 2 +- src/segment.c | 192 ++++++++++++++++++--------------------- 4 files changed, 89 insertions(+), 110 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 51306808..5d5f6dfc 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -417,8 +417,7 @@ typedef struct mi_os_tld_s { typedef struct mi_segments_tld_s { mi_segment_queue_t small_free; // queue of segments with free small pages mi_segment_queue_t medium_free; // queue of segments with free medium pages - mi_page_queue_t small_pages_free; // page queue of free small pages - mi_page_queue_t medium_pages_free; // page queue of free medium pages + mi_page_queue_t pages_reset; // queue of freed pages that can be reset size_t count; // current number of segments; size_t peak_count; // peak number of segments size_t current_size; // current size of all segments diff --git a/src/init.c b/src/init.c index 085a5011..debc2517 100644 --- a/src/init.c +++ b/src/init.c @@ -105,7 +105,7 @@ mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; static mi_tld_t tld_main = { 0, false, &_mi_heap_main, - { { NULL, NULL }, {NULL ,NULL}, {NULL ,NULL, 0}, {NULL ,NULL, 0}, + { { NULL, NULL }, {NULL ,NULL}, {NULL ,NULL, 0}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats, tld_main_os }, // segments diff --git a/src/options.c b/src/options.c index 77205713..17e3a836 100644 --- a/src/options.c +++ b/src/options.c @@ -67,7 +67,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread - { 0, UNINIT, MI_OPTION(page_reset) }, // reset pages on free + { 1, UNINIT, MI_OPTION(page_reset) }, // reset pages on free { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 100, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds diff --git a/src/segment.c b/src/segment.c index fb5ea0ec..a2cd945c 100644 --- a/src/segment.c +++ b/src/segment.c @@ -134,18 +134,10 @@ static size_t mi_segment_page_size(const mi_segment_t* segment) { } } -static mi_page_queue_t* mi_segment_page_free_queue(mi_page_kind_t kind, mi_segments_tld_t* tld) { - if (kind==MI_PAGE_SMALL) return &tld->small_pages_free; - else if (kind==MI_PAGE_MEDIUM) return &tld->medium_pages_free; - else return NULL; -} - #if (MI_DEBUG>=3) -static bool mi_segment_page_free_contains(mi_page_kind_t kind, const mi_page_t* page, mi_segments_tld_t* tld) { - const mi_page_queue_t* const pq = mi_segment_page_free_queue(kind, tld); - if (pq == NULL) return false; - mi_page_t* p = pq->first; +static bool mi_pages_reset_contains(const mi_page_t* page, mi_segments_tld_t* tld) { + mi_page_t* p = tld->pages_reset.first; while (p != NULL) { if (p == page) return true; p = p->next; @@ -164,8 +156,8 @@ static bool mi_segment_is_valid(const mi_segment_t* segment, mi_segments_tld_t* if (!page->segment_in_use) { nfree++; } - else { - mi_assert_expensive(!mi_segment_page_free_contains(segment->page_kind, page, tld)); + if (page->segment_in_use || page->is_reset) { + mi_assert_expensive(!mi_pages_reset_contains(page, tld)); } } mi_assert_internal(nfree + segment->used == segment->capacity); @@ -176,17 +168,15 @@ static bool mi_segment_is_valid(const mi_segment_t* segment, mi_segments_tld_t* } #endif -static bool mi_segment_page_free_not_in_queue(const mi_page_t* page, mi_segments_tld_t* tld) { - mi_page_kind_t kind = _mi_page_segment(page)->page_kind; +static bool mi_page_not_in_queue(const mi_page_t* page, mi_segments_tld_t* tld) { if (page->next != NULL || page->prev != NULL) { - mi_assert_internal(mi_segment_page_free_contains(kind, page, tld)); + mi_assert_internal(mi_pages_reset_contains(page, tld)); return false; } - if (kind > MI_PAGE_MEDIUM) return true; - // both next and prev are NULL, check for singleton list - const mi_page_queue_t* const pq = mi_segment_page_free_queue(kind, tld); - mi_assert_internal(pq!=NULL); - return (pq->first != page && pq->last != page); + else { + // both next and prev are NULL, check for singleton list + return (tld->pages_reset.first != page && tld->pages_reset.last != page); + } } @@ -274,44 +264,57 @@ static void mi_page_unreset(mi_segment_t* segment, mi_page_t* page, size_t size, The free page queue ----------------------------------------------------------- */ -static void mi_segment_page_free_set_expire(mi_page_t* page) { - *((intptr_t*)(&page->heap)) = _mi_clock_now() + mi_option_get(mi_option_reset_delay); +// we re-use the heap field for the expiration counter. Since this is a +// pointer, it can be 32-bit while the clock is always 64-bit. To guard +// against overflow, we use substraction to check for expiry which work +// as long as the reset delay is under (2^30 - 1) milliseconds (~12 days) +static void mi_page_reset_set_expire(mi_page_t* page) { + intptr_t expire = (intptr_t)(_mi_clock_now() + mi_option_get(mi_option_reset_delay)); + page->heap = (mi_heap_t*)expire; } -static mi_msecs_t mi_segment_page_free_get_expire(mi_page_t* page) { - return *((intptr_t*)(&page->heap)); +static bool mi_page_reset_is_expired(mi_page_t* page, mi_msecs_t now) { + intptr_t expire = (intptr_t)(page->heap); + return (((intptr_t)now - expire) >= 0); } -static void mi_segment_page_free_add(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { - mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM); +static void mi_pages_reset_add(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { mi_assert_internal(!page->segment_in_use); - mi_assert_internal(_mi_page_segment(page) == segment); - mi_assert_internal(mi_segment_page_free_not_in_queue(page,tld)); - mi_assert_expensive(!mi_segment_page_free_contains(segment->page_kind, page, tld)); - mi_page_queue_t* pq = mi_segment_page_free_queue(segment->page_kind, tld); - // push on top - mi_segment_page_free_set_expire(page); - page->next = pq->first; - page->prev = NULL; - if (pq->first == NULL) { - mi_assert_internal(pq->last == NULL); - pq->first = pq->last = page; + mi_assert_internal(mi_page_not_in_queue(page,tld)); + mi_assert_expensive(!mi_pages_reset_contains(page, tld)); + mi_assert_internal(_mi_page_segment(page)==segment); + if (!mi_option_is_enabled(mi_option_page_reset)) return; + if (segment->mem_is_fixed || page->segment_in_use || page->is_reset) return; + + if (mi_option_get(mi_option_reset_delay) == 0) { + // reset immediately? + mi_page_reset(segment, page, 0, tld); } else { - pq->first->prev = page; - pq->first = page; + // otherwise push on the delayed page reset queue + mi_page_queue_t* pq = &tld->pages_reset; + // push on top + mi_page_reset_set_expire(page); + page->next = pq->first; + page->prev = NULL; + if (pq->first == NULL) { + mi_assert_internal(pq->last == NULL); + pq->first = pq->last = page; + } + else { + pq->first->prev = page; + pq->first = page; + } } } -static void mi_segment_page_free_remove(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { - if (segment->page_kind > MI_PAGE_MEDIUM) return; - if (mi_segment_page_free_not_in_queue(page,tld)) return; +static void mi_pages_reset_remove(mi_page_t* page, mi_segments_tld_t* tld) { + if (mi_page_not_in_queue(page,tld)) return; - mi_page_queue_t* pq = mi_segment_page_free_queue(segment->page_kind, tld); + mi_page_queue_t* pq = &tld->pages_reset; mi_assert_internal(pq!=NULL); - mi_assert_internal(_mi_page_segment(page)==segment); mi_assert_internal(!page->segment_in_use); - mi_assert_internal(mi_segment_page_free_contains(segment->page_kind, page, tld)); + mi_assert_internal(mi_pages_reset_contains(page, tld)); if (page->prev != NULL) page->prev->next = page->next; if (page->next != NULL) page->next->prev = page->prev; if (page == pq->last) pq->last = page->prev; @@ -320,33 +323,33 @@ static void mi_segment_page_free_remove(mi_segment_t* segment, mi_page_t* page, page->heap = NULL; } -static void mi_segment_page_free_remove_all(mi_segment_t* segment, mi_segments_tld_t* tld) { - if (segment->page_kind > MI_PAGE_MEDIUM) return; +static void mi_pages_reset_remove_all_in_segment(mi_segment_t* segment, mi_segments_tld_t* tld) { + if (segment->mem_is_fixed) return; for (size_t i = 0; i < segment->capacity; i++) { mi_page_t* page = &segment->pages[i]; - if (!page->segment_in_use) { - mi_segment_page_free_remove(segment, page, tld); + if (!page->segment_in_use && !page->is_reset) { + mi_pages_reset_remove(page, tld); } + else { + mi_assert_internal(mi_page_not_in_queue(page,tld)); + } } } -static mi_page_t* mi_segment_page_free_top(mi_page_kind_t kind, mi_segments_tld_t* tld) { - mi_assert_internal(kind <= MI_PAGE_MEDIUM); - mi_page_queue_t* pq = mi_segment_page_free_queue(kind, tld); - return pq->first; -} - -static void mi_segment_page_free_reset_delayedx(mi_msecs_t now, mi_page_kind_t kind, mi_segments_tld_t* tld) { - mi_page_queue_t* pq = mi_segment_page_free_queue(kind, tld); - mi_assert_internal(pq != NULL); +static void mi_reset_delayed(mi_segments_tld_t* tld) { + if (!mi_option_is_enabled(mi_option_page_reset)) return; + mi_msecs_t now = _mi_clock_now(); + mi_page_queue_t* pq = &tld->pages_reset; + // from oldest up to the first that has not expired yet mi_page_t* page = pq->last; - while (page != NULL && (now - mi_segment_page_free_get_expire(page)) >= 0) { - mi_page_t* const prev = page->prev; + while (page != NULL && mi_page_reset_is_expired(page,now)) { + mi_page_t* const prev = page->prev; // save previous field mi_page_reset(_mi_page_segment(page), page, 0, tld); page->heap = NULL; page->prev = page->next = NULL; page = prev; } + // discard the reset pages from the queue pq->last = page; if (page != NULL){ page->next = NULL; @@ -356,12 +359,6 @@ static void mi_segment_page_free_reset_delayedx(mi_msecs_t now, mi_page_kind_t k } } -static void mi_segment_page_free_reset_delayed(mi_segments_tld_t* tld) { - if (!mi_option_is_enabled(mi_option_page_reset)) return; - mi_msecs_t now = _mi_clock_now(); - mi_segment_page_free_reset_delayedx(now, MI_PAGE_SMALL, tld); - mi_segment_page_free_reset_delayedx(now, MI_PAGE_MEDIUM, tld); -} @@ -541,10 +538,8 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) { } mi_assert_internal(tld->cache_count == 0); mi_assert_internal(tld->cache == NULL); - mi_assert_internal(tld->small_pages_free.first == NULL); - mi_assert_internal(tld->medium_pages_free.first == NULL); - mi_assert_internal(tld->small_free.first == NULL); - mi_assert_internal(tld->medium_free.first == NULL); + mi_assert_internal(tld->pages_reset.first == NULL); + mi_assert_internal(tld->pages_reset.last == NULL); } @@ -672,7 +667,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) { UNUSED(force); mi_assert(segment != NULL); - mi_segment_page_free_remove_all(segment, tld); + mi_pages_reset_remove_all_in_segment(segment, tld); mi_segment_remove_from_free_queue(segment,tld); mi_assert_expensive(!mi_segment_queue_contains(&tld->small_free, segment)); @@ -703,7 +698,7 @@ static void mi_segment_page_claim(mi_segment_t* segment, mi_page_t* page, mi_seg mi_assert_internal(_mi_page_segment(page) == segment); mi_assert_internal(!page->segment_in_use); // set in-use before doing unreset to prevent delayed reset - mi_segment_page_free_remove(segment, page, tld); + mi_pages_reset_remove(page, tld); page->segment_in_use = true; segment->used++; if (!page->is_committed) { @@ -744,7 +739,7 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg mi_assert_internal(page->segment_in_use); mi_assert_internal(mi_page_all_free(page)); mi_assert_internal(page->is_committed); - mi_assert_internal(mi_segment_page_free_not_in_queue(page, tld)); + mi_assert_internal(mi_page_not_in_queue(page, tld)); size_t inuse = page->capacity * page->block_size; _mi_stat_decrease(&tld->stats->page_committed, inuse); _mi_stat_decrease(&tld->stats->pages, 1); @@ -770,7 +765,7 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg // add to the free page list for reuse/reset if (segment->page_kind <= MI_PAGE_MEDIUM) { - mi_segment_page_free_add(segment, page, tld); + mi_pages_reset_add(segment, page, tld); } } @@ -779,7 +774,7 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) mi_assert(page != NULL); mi_segment_t* segment = _mi_page_segment(page); mi_assert_expensive(mi_segment_is_valid(segment,tld)); - mi_segment_page_free_reset_delayed(tld); + mi_reset_delayed(tld); // mark it as free now mi_segment_page_clear(segment, page, tld); @@ -841,8 +836,8 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_expensive(mi_segment_is_valid(segment,tld)); // remove the segment from the free page queue if needed - mi_segment_page_free_reset_delayed(tld); - mi_segment_page_free_remove_all(segment, tld); + mi_reset_delayed(tld); + mi_pages_reset_remove_all_in_segment(segment, tld); // do not force reset on free pages in an abandoned segment, as it is already done in segment_thread_collect mi_segment_remove_from_free_queue(segment, tld); mi_assert_internal(segment->next == NULL && segment->prev == NULL); @@ -858,7 +853,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { mi_assert(page != NULL); mi_segment_t* segment = _mi_page_segment(page); - mi_assert_expensive(!mi_segment_page_free_contains(segment->page_kind, page, tld)); + mi_assert_expensive(!mi_pages_reset_contains(page, tld)); mi_assert_expensive(mi_segment_is_valid(segment,tld)); segment->abandoned++; _mi_stat_increase(&tld->stats->pages_abandoned, 1); @@ -916,7 +911,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen if (page->segment_in_use) { mi_assert_internal(!page->is_reset); mi_assert_internal(page->is_committed); - mi_assert_internal(mi_segment_page_free_not_in_queue(page, tld)); + mi_assert_internal(mi_page_not_in_queue(page, tld)); segment->abandoned--; mi_assert(page->next == NULL); _mi_stat_decrease(&tld->stats->pages_abandoned, 1); @@ -957,7 +952,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(mi_segment_has_free(segment)); mi_assert_expensive(mi_segment_is_valid(segment, tld)); - for (size_t i = 0; i < segment->capacity; i++) { + for (size_t i = 0; i < segment->capacity; i++) { // TODO: use a bitmap instead of search? mi_page_t* page = &segment->pages[i]; if (!page->segment_in_use) { mi_segment_page_claim(segment, page, tld); @@ -968,7 +963,6 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t* return NULL; } - // Allocate a page inside a segment. Requires that the page has free pages static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(mi_segment_has_free(segment)); @@ -976,33 +970,19 @@ static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tl } static mi_page_t* mi_segment_page_alloc(mi_page_kind_t kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { - mi_page_t* page = NULL; + // find an available segment the segment free queue mi_segment_queue_t* const free_queue = mi_segment_free_queue_of_kind(kind, tld); - if (free_queue->first != NULL && free_queue->first->used < free_queue->first->capacity) { - // prefer to allocate from an available segment - // (to allow more chance of other segments to become completely freed) - page = mi_segment_page_alloc_in(free_queue->first, tld); - } - else { - // otherwise try to pop from the page free list - page = mi_segment_page_free_top(kind, tld); - if (page != NULL) { - mi_segment_page_claim(_mi_page_segment(page), page, tld); - } - else { - // if that failed, find an available segment the segment free queue again - if (mi_segment_queue_is_empty(free_queue)) { - // possibly allocate a fresh segment - mi_segment_t* segment = mi_segment_alloc(0, kind, page_shift, tld, os_tld); - if (segment == NULL) return NULL; // return NULL if out-of-memory - mi_segment_enqueue(free_queue, segment); - } - mi_assert_internal(free_queue->first != NULL); - page = mi_segment_page_alloc_in(free_queue->first, tld); - } + if (mi_segment_queue_is_empty(free_queue)) { + // possibly allocate a fresh segment + mi_segment_t* segment = mi_segment_alloc(0, kind, page_shift, tld, os_tld); + if (segment == NULL) return NULL; // return NULL if out-of-memory + mi_segment_enqueue(free_queue, segment); } + mi_assert_internal(free_queue->first != NULL); + mi_page_t* const page = mi_segment_page_alloc_in(free_queue->first, tld); mi_assert_internal(page != NULL); #if MI_DEBUG>=2 + // verify it is committed _mi_segment_page_start(_mi_page_segment(page), page, sizeof(void*), NULL, NULL)[0] = 0; #endif return page; @@ -1062,7 +1042,7 @@ mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_ } mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld)); mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size); - mi_segment_page_free_reset_delayed(tld); - mi_assert_internal(mi_segment_page_free_not_in_queue(page, tld)); + mi_reset_delayed(tld); + mi_assert_internal(mi_page_not_in_queue(page, tld)); return page; } From 202246425b5c0f2f0dc68a6de9fc9fa6628d7822 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 15 Jan 2020 12:16:01 -0800 Subject: [PATCH 103/179] bump version to 1.4 for further development --- cmake/mimalloc-config-version.cmake | 2 +- include/mimalloc.h | 2 +- readme.md | 3 +++ test/CMakeLists.txt | 2 +- 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake index f64948d3..0a982bdf 100644 --- a/cmake/mimalloc-config-version.cmake +++ b/cmake/mimalloc-config-version.cmake @@ -1,5 +1,5 @@ set(mi_version_major 1) -set(mi_version_minor 3) +set(mi_version_minor 4) set(mi_version ${mi_version_major}.${mi_version_minor}) set(PACKAGE_VERSION ${mi_version}) diff --git a/include/mimalloc.h b/include/mimalloc.h index 51d96609..fe09c7f2 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_H #define MIMALLOC_H -#define MI_MALLOC_VERSION 130 // major + 2 digits minor +#define MI_MALLOC_VERSION 140 // major + 2 digits minor // ------------------------------------------------------ // Compiler specific attributes diff --git a/readme.md b/readme.md index 9d3974c9..0a096b5e 100644 --- a/readme.md +++ b/readme.md @@ -56,6 +56,9 @@ Enjoy! ### Releases + +* 2020-01-15, `v1.3.0`: stable release 1.3: bug fixes, improved randomness and stronger +free list encoding in secure mode. * 2019-11-22, `v1.2.0`: stable release 1.2: bug fixes, improved secure mode (free list corruption checks, double free mitigation). Improved dynamic overriding on Windows. * 2019-10-07, `v1.1.0`: stable release 1.1. * 2019-09-01, `v1.0.8`: pre-release 8: more robust windows dynamic overriding, initial huge page support. diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index ed204888..4862c0ec 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -13,7 +13,7 @@ if (NOT CMAKE_BUILD_TYPE) endif() # Import mimalloc (if installed) -find_package(mimalloc 1.3 REQUIRED NO_SYSTEM_ENVIRONMENT_PATH) +find_package(mimalloc 1.4 REQUIRED NO_SYSTEM_ENVIRONMENT_PATH) message(STATUS "Found mimalloc installed at: ${MIMALLOC_TARGET_DIR}") # overriding with a dynamic library From 0099707af905cddaab3d51a5639a1a2ae21ecf3c Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 15 Jan 2020 17:19:01 -0800 Subject: [PATCH 104/179] use delayed free for all pages; reduce size of the page structure for improved address calculation --- include/mimalloc-internal.h | 45 +++++++-- include/mimalloc-types.h | 57 ++++++----- src/alloc.c | 131 +++++++++++++------------ src/heap.c | 67 +++++++------ src/init.c | 11 +-- src/page-queue.c | 44 ++++----- src/page.c | 190 +++++++++++++++--------------------- src/segment.c | 19 ++-- 8 files changed, 296 insertions(+), 268 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index d5ce9f59..a9391a40 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -308,7 +308,7 @@ static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const // Quick page start for initialized pages static inline uint8_t* _mi_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) { - const size_t bsize = page->block_size; + const size_t bsize = page->xblock_size; mi_assert_internal(bsize > 0 && (bsize%sizeof(void*)) == 0); return _mi_segment_page_start(segment, page, bsize, page_size, NULL); } @@ -318,7 +318,40 @@ static inline mi_page_t* _mi_ptr_page(void* p) { return _mi_segment_page_of(_mi_ptr_segment(p), p); } +// Get the block size of a page (special cased for huge objects) +static inline size_t mi_page_block_size(const mi_page_t* page) { + const size_t bsize = page->xblock_size; + mi_assert_internal(bsize > 0); + if (mi_likely(bsize < MI_HUGE_BLOCK_SIZE)) { + return bsize; + } + else { + size_t psize; + _mi_segment_page_start(_mi_page_segment(page), page, bsize, &psize, NULL); + return psize; + } +} + // Thread free access +static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) { + return (mi_block_t*)(mi_atomic_read_relaxed(&page->xthread_free) & ~3); +} + +static inline mi_delayed_t mi_page_thread_free_flag(const mi_page_t* page) { + return (mi_delayed_t)(mi_atomic_read_relaxed(&page->xthread_free) & 3); +} + +// Heap access +static inline mi_heap_t* mi_page_heap(const mi_page_t* page) { + return (mi_heap_t*)(mi_atomic_read_relaxed(&page->xheap)); +} + +static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) { + mi_assert_internal(mi_page_thread_free_flag(page) != MI_DELAYED_FREEING); + mi_atomic_write(&page->xheap,(uintptr_t)heap); +} + +// Thread free flag helpers static inline mi_block_t* mi_tf_block(mi_thread_free_t tf) { return (mi_block_t*)(tf & ~0x03); } @@ -338,7 +371,7 @@ static inline mi_thread_free_t mi_tf_set_block(mi_thread_free_t tf, mi_block_t* // are all blocks in a page freed? static inline bool mi_page_all_free(const mi_page_t* page) { mi_assert_internal(page != NULL); - return (page->used - page->thread_freed == 0); + return (page->used == 0); } // are there immediately available blocks @@ -349,8 +382,8 @@ static inline bool mi_page_immediate_available(const mi_page_t* page) { // are there free blocks in this page? static inline bool mi_page_has_free(mi_page_t* page) { mi_assert_internal(page != NULL); - bool hasfree = (mi_page_immediate_available(page) || page->local_free != NULL || (mi_tf_block(page->thread_free) != NULL)); - mi_assert_internal(hasfree || page->used - page->thread_freed == page->capacity); + bool hasfree = (mi_page_immediate_available(page) || page->local_free != NULL || (mi_page_thread_free(page) != NULL)); + mi_assert_internal(hasfree || page->used == page->capacity); return hasfree; } @@ -364,7 +397,7 @@ static inline bool mi_page_all_used(mi_page_t* page) { static inline bool mi_page_mostly_used(const mi_page_t* page) { if (page==NULL) return true; uint16_t frac = page->reserved / 8U; - return (page->reserved - page->used + page->thread_freed <= frac); + return (page->reserved - page->used <= frac); } static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size) { @@ -467,7 +500,7 @@ static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* // check for free list corruption: is `next` at least in the same page? // TODO: check if `next` is `page->block_size` aligned? if (mi_unlikely(next!=NULL && !mi_is_in_same_page(block, next))) { - _mi_fatal_error("corrupted free list entry of size %zub at %p: value 0x%zx\n", page->block_size, block, (uintptr_t)next); + _mi_fatal_error("corrupted free list entry of size %zub at %p: value 0x%zx\n", mi_page_block_size(page), block, (uintptr_t)next); next = NULL; } return next; diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index da9bfbac..bf288d60 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -124,6 +124,9 @@ terms of the MIT license. A copy of the license can be found in the file #error "define more bins" #endif +// Used as a special value to encode block sizes in 32 bits. +#define MI_HUGE_BLOCK_SIZE ((uint32_t)MI_HUGE_OBJ_SIZE_MAX) + // The free lists use encoded next fields // (Only actually encodes when MI_ENCODED_FREELIST is defined.) typedef uintptr_t mi_encoded_t; @@ -136,10 +139,10 @@ typedef struct mi_block_s { // The delayed flags are used for efficient multi-threaded free-ing typedef enum mi_delayed_e { - MI_NO_DELAYED_FREE = 0, - MI_USE_DELAYED_FREE = 1, - MI_DELAYED_FREEING = 2, - MI_NEVER_DELAYED_FREE = 3 + MI_USE_DELAYED_FREE = 0, // push on the owning heap thread delayed list + MI_DELAYED_FREEING = 1, // temporary: another thread is accessing the owning heap + MI_NO_DELAYED_FREE = 2, // optimize: push on page local thread free queue if another block is already in the heap thread delayed free list + MI_NEVER_DELAYED_FREE = 3 // sticky, only resets on page reclaim } mi_delayed_t; @@ -167,14 +170,28 @@ typedef uintptr_t mi_thread_free_t; // implement a monotonic heartbeat. The `thread_free` list is needed for // avoiding atomic operations in the common case. // -// `used - thread_freed` == actual blocks that are in use (alive) -// `used - thread_freed + |free| + |local_free| == capacity` // -// note: we don't count `freed` (as |free|) instead of `used` to reduce -// the number of memory accesses in the `mi_page_all_free` function(s). -// note: the funny layout here is due to: -// - access is optimized for `mi_free` and `mi_page_alloc` -// - using `uint16_t` does not seem to slow things down +// `used - |thread_free|` == actual blocks that are in use (alive) +// `used - |thread_free| + |free| + |local_free| == capacity` +// +// We don't count `freed` (as |free|) but use `used` to reduce +// the number of memory accesses in the `mi_page_all_free` function(s). +// +// Notes: +// - Access is optimized for `mi_free` and `mi_page_alloc` (in `alloc.c`) +// - Using `uint16_t` does not seem to slow things down +// - The size is 8 words on 64-bit which helps the page index calculations +// (and 10 words on 32-bit, and encoded free lists add 2 words. Sizes 10 +// and 12 are still good for address calculation) +// - To limit the structure size, the `xblock_size` is 32-bits only; for +// blocks > MI_HUGE_BLOCK_SIZE the size is determined from the segment page size +// - `thread_free` uses the bottom bits as a delayed-free flags to optimize +// concurrent frees where only the first concurrent free adds to the owning +// heap `thread_delayed_free` list (see `alloc.c:mi_free_block_mt`). +// The invariant is that no-delayed-free is only set if there is +// at least one block that will be added, or as already been added, to +// the owning heap `thread_delayed_free` list. This guarantees that pages +// will be freed correctly even if only other threads free blocks. typedef struct mi_page_s { // "owned" by the segment uint8_t segment_idx; // index in the segment `pages` array, `page == &segment->pages[page->segment_idx]` @@ -194,23 +211,15 @@ typedef struct mi_page_s { #ifdef MI_ENCODE_FREELIST uintptr_t key[2]; // two random keys to encode the free lists (see `_mi_block_next`) #endif - size_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`) + uint32_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`) + uint32_t xblock_size; // size available in each block (always `>0`) mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) - volatile _Atomic(uintptr_t) thread_freed; // at least this number of blocks are in `thread_free` - volatile _Atomic(mi_thread_free_t) thread_free; // list of deferred free blocks freed by other threads - - // less accessed info - size_t block_size; // size available in each block (always `>0`) - mi_heap_t* heap; // the owning heap + volatile _Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads + volatile _Atomic(uintptr_t) xheap; + struct mi_page_s* next; // next page owned by this thread with the same `block_size` struct mi_page_s* prev; // previous page owned by this thread with the same `block_size` - - // improve page index calculation - // without padding: 10 words on 64-bit, 11 on 32-bit. Secure adds two words - #if (MI_INTPTR_SIZE==4) - void* padding[1]; // 12/14 words on 32-bit plain - #endif } mi_page_t; diff --git a/src/alloc.c b/src/alloc.c index bd81aba0..621fb0db 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -22,7 +22,7 @@ terms of the MIT license. A copy of the license can be found in the file // Fast allocation in a page: just pop from the free list. // Fall back to generic allocation only if the list is empty. extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept { - mi_assert_internal(page->block_size==0||page->block_size >= size); + mi_assert_internal(page->xblock_size==0||mi_page_block_size(page) >= size); mi_block_t* block = page->free; if (mi_unlikely(block == NULL)) { return _mi_malloc_generic(heap, size); // slow path @@ -94,16 +94,16 @@ void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size) { // or the recalloc/rezalloc functions cannot safely expand in place (see issue #63) UNUSED(size); mi_assert_internal(p != NULL); - mi_assert_internal(size > 0 && page->block_size >= size); + mi_assert_internal(size > 0 && mi_page_block_size(page) >= size); mi_assert_internal(_mi_ptr_page(p)==page); if (page->is_zero) { // already zero initialized memory? ((mi_block_t*)p)->next = 0; // clear the free list pointer - mi_assert_expensive(mi_mem_is_zero(p,page->block_size)); + mi_assert_expensive(mi_mem_is_zero(p, mi_page_block_size(page))); } else { // otherwise memset - memset(p, 0, page->block_size); + memset(p, 0, mi_page_block_size(page)); } } @@ -141,13 +141,12 @@ static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, cons static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block) { // The decoded value is in the same page (or NULL). - // Walk the free lists to verify positively if it is already freed - mi_thread_free_t tf = (mi_thread_free_t)mi_atomic_read_relaxed(mi_atomic_cast(uintptr_t, &page->thread_free)); + // Walk the free lists to verify positively if it is already freed if (mi_list_contains(page, page->free, block) || mi_list_contains(page, page->local_free, block) || - mi_list_contains(page, mi_tf_block(tf), block)) + mi_list_contains(page, mi_page_thread_free(page), block)) { - _mi_fatal_error("double free detected of block %p with size %zu\n", block, page->block_size); + _mi_fatal_error("double free detected of block %p with size %zu\n", block, mi_page_block_size(page)); return true; } return false; @@ -177,44 +176,50 @@ static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block // Free // ------------------------------------------------------ +// free huge block from another thread +static mi_decl_noinline void mi_free_huge_block_mt(mi_segment_t* segment, mi_page_t* page, mi_block_t* block) { + // huge page segments are always abandoned and can be freed immediately + mi_assert_internal(segment->page_kind==MI_PAGE_HUGE); + mi_assert_internal(segment == _mi_page_segment(page)); + mi_assert_internal(mi_atomic_read_relaxed(&segment->thread_id)==0); + + // claim it and free + mi_heap_t* heap = mi_get_default_heap(); + // paranoia: if this it the last reference, the cas should always succeed + if (mi_atomic_cas_strong(&segment->thread_id, heap->thread_id, 0)) { + mi_block_set_next(page, block, page->free); + page->free = block; + page->used--; + page->is_zero = false; + mi_assert(page->used == 0); + mi_tld_t* tld = heap->tld; + const size_t bsize = mi_page_block_size(page); + if (bsize > MI_HUGE_OBJ_SIZE_MAX) { + _mi_stat_decrease(&tld->stats.giant, bsize); + } + else { + _mi_stat_decrease(&tld->stats.huge, bsize); + } + _mi_segment_page_free(page, true, &tld->segments); + } +} + // multi-threaded free static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* block) { - mi_thread_free_t tfree; - mi_thread_free_t tfreex; - bool use_delayed; - + // huge page segments are always abandoned and can be freed immediately mi_segment_t* segment = _mi_page_segment(page); if (segment->page_kind==MI_PAGE_HUGE) { - // huge page segments are always abandoned and can be freed immediately - mi_assert_internal(mi_atomic_read_relaxed(&segment->thread_id)==0); - mi_assert_internal(mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&segment->abandoned_next))==NULL); - // claim it and free - mi_heap_t* heap = mi_get_default_heap(); - // paranoia: if this it the last reference, the cas should always succeed - if (mi_atomic_cas_strong(&segment->thread_id,heap->thread_id,0)) { - mi_block_set_next(page, block, page->free); - page->free = block; - page->used--; - page->is_zero = false; - mi_assert(page->used == 0); - mi_tld_t* tld = heap->tld; - if (page->block_size > MI_HUGE_OBJ_SIZE_MAX) { - _mi_stat_decrease(&tld->stats.giant, page->block_size); - } - else { - _mi_stat_decrease(&tld->stats.huge, page->block_size); - } - _mi_segment_page_free(page,true,&tld->segments); - } + mi_free_huge_block_mt(segment, page, block); return; } + mi_thread_free_t tfree; + mi_thread_free_t tfreex; + bool use_delayed; do { - tfree = page->thread_free; - use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE || - (mi_tf_delayed(tfree) == MI_NO_DELAYED_FREE && page->used == mi_atomic_read_relaxed(&page->thread_freed)+1) // data-race but ok, just optimizes early release of the page - ); + tfree = mi_atomic_read_relaxed(&page->xthread_free); + use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE); if (mi_unlikely(use_delayed)) { // unlikely: this only happens on the first concurrent free in a page that is in the full list tfreex = mi_tf_set_delayed(tfree,MI_DELAYED_FREEING); @@ -224,15 +229,11 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc mi_block_set_next(page, block, mi_tf_block(tfree)); tfreex = mi_tf_set_block(tfree,block); } - } while (!mi_atomic_cas_weak(mi_atomic_cast(uintptr_t,&page->thread_free), tfreex, tfree)); + } while (!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree)); - if (mi_likely(!use_delayed)) { - // increment the thread free count and return - mi_atomic_increment(&page->thread_freed); - } - else { + if (mi_unlikely(use_delayed)) { // racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`) - mi_heap_t* heap = (mi_heap_t*)mi_atomic_read_ptr(mi_atomic_cast(void*, &page->heap)); + mi_heap_t* heap = mi_page_heap(page); mi_assert_internal(heap != NULL); if (heap != NULL) { // add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity) @@ -245,10 +246,10 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc // and reset the MI_DELAYED_FREEING flag do { - tfreex = tfree = page->thread_free; - mi_assert_internal(mi_tf_delayed(tfree) == MI_NEVER_DELAYED_FREE || mi_tf_delayed(tfree) == MI_DELAYED_FREEING); - if (mi_tf_delayed(tfree) != MI_NEVER_DELAYED_FREE) tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE); - } while (!mi_atomic_cas_weak(mi_atomic_cast(uintptr_t,&page->thread_free), tfreex, tfree)); + tfreex = tfree = mi_atomic_read_relaxed(&page->xthread_free); + mi_assert_internal(mi_tf_delayed(tfree) == MI_DELAYED_FREEING); + tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE); + } while (!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree)); } } @@ -257,7 +258,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block) { #if (MI_DEBUG) - memset(block, MI_DEBUG_FREED, page->block_size); + memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); #endif // and push it on the free list @@ -284,7 +285,7 @@ static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p) { mi_assert_internal(page!=NULL && p!=NULL); size_t diff = (uint8_t*)p - _mi_page_start(segment, page, NULL); - size_t adjust = (diff % page->block_size); + size_t adjust = (diff % mi_page_block_size(page)); return (mi_block_t*)((uintptr_t)p - adjust); } @@ -329,8 +330,8 @@ void mi_free(void* p) mi_attr_noexcept #if (MI_STAT>1) mi_heap_t* heap = mi_heap_get_default(); mi_heap_stat_decrease(heap, malloc, mi_usable_size(p)); - if (page->block_size <= MI_LARGE_OBJ_SIZE_MAX) { - mi_heap_stat_decrease(heap, normal[_mi_bin(page->block_size)], 1); + if (page->xblock_size <= MI_LARGE_OBJ_SIZE_MAX) { + mi_heap_stat_decrease(heap, normal[_mi_bin(page->xblock_size)], 1); } // huge page stat is accounted for in `_mi_page_retire` #endif @@ -342,7 +343,9 @@ void mi_free(void* p) mi_attr_noexcept mi_block_set_next(page, block, page->local_free); page->local_free = block; page->used--; - if (mi_unlikely(mi_page_all_free(page))) { _mi_page_retire(page); } + if (mi_unlikely(mi_page_all_free(page))) { + _mi_page_retire(page); + } } else { // non-local, aligned blocks, or a full page; use the more generic path @@ -356,13 +359,19 @@ bool _mi_free_delayed_block(mi_block_t* block) { mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); mi_assert_internal(_mi_thread_id() == segment->thread_id); mi_page_t* page = _mi_segment_page_of(segment, block); - if (mi_tf_delayed(page->thread_free) == MI_DELAYED_FREEING) { - // we might already start delayed freeing while another thread has not yet - // reset the delayed_freeing flag; in that case don't free it quite yet if - // this is the last block remaining. - if (page->used - page->thread_freed == 1) return false; - } - _mi_free_block(page,true,block); + + // Clear the no-delayed flag so delayed freeing is used again for this page. + // This must be done before collecting the free lists on this page -- otherwise + // some blocks may end up in the page `thread_free` list with no blocks in the + // heap `thread_delayed_free` list which may cause the page to be never freed! + // (it would only be freed if we happen to scan it in `mi_page_queue_find_free_ex`) + _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, false /* dont overwrite never delayed */); + + // collect all other non-local frees to ensure up-to-date `used` count + _mi_page_free_collect(page, false); + + // and free the block (possibly freeing the page as well since used is updated) + _mi_free_block(page, true, block); return true; } @@ -371,7 +380,7 @@ size_t mi_usable_size(const void* p) mi_attr_noexcept { if (p==NULL) return 0; const mi_segment_t* segment = _mi_ptr_segment(p); const mi_page_t* page = _mi_segment_page_of(segment,p); - size_t size = page->block_size; + size_t size = mi_page_block_size(page); if (mi_unlikely(mi_page_has_aligned(page))) { ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)_mi_page_ptr_unalign(segment,page,p); mi_assert_internal(adjust >= 0 && (size_t)adjust <= size); diff --git a/src/heap.c b/src/heap.c index 4a589e5c..9f2a4457 100644 --- a/src/heap.c +++ b/src/heap.c @@ -34,7 +34,7 @@ static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void mi_page_t* page = pq->first; while(page != NULL) { mi_page_t* next = page->next; // save next in case the page gets removed from the queue - mi_assert_internal(page->heap == heap); + mi_assert_internal(mi_page_heap(page) == heap); count++; if (!fn(heap, pq, page, arg1, arg2)) return false; page = next; // and continue @@ -50,7 +50,7 @@ static bool mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_ UNUSED(arg1); UNUSED(arg2); UNUSED(pq); - mi_assert_internal(page->heap == heap); + mi_assert_internal(mi_page_heap(page) == heap); mi_segment_t* segment = _mi_page_segment(page); mi_assert_internal(segment->thread_id == heap->thread_id); mi_assert_expensive(_mi_page_is_valid(page)); @@ -118,13 +118,18 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) // this may free some segments (but also take ownership of abandoned pages) _mi_segment_try_reclaim_abandoned(heap, false, &heap->tld->segments); } - #if MI_DEBUG - else if (collect == ABANDON && _mi_is_main_thread() && mi_heap_is_backing(heap)) { + else if ( + #ifdef NDEBUG + collect == FORCE + #else + collect >= FORCE + #endif + && _mi_is_main_thread() && mi_heap_is_backing(heap)) + { // the main thread is abandoned, try to free all abandoned segments. // if all memory is freed by now, all segments should be freed. _mi_segment_try_reclaim_abandoned(heap, true, &heap->tld->segments); } - #endif } // if abandoning, mark all pages to no longer add to delayed_free @@ -245,25 +250,27 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_ _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false); // stats - if (page->block_size > MI_LARGE_OBJ_SIZE_MAX) { - if (page->block_size > MI_HUGE_OBJ_SIZE_MAX) { - _mi_stat_decrease(&heap->tld->stats.giant,page->block_size); + const size_t bsize = mi_page_block_size(page); + if (bsize > MI_LARGE_OBJ_SIZE_MAX) { + if (bsize > MI_HUGE_OBJ_SIZE_MAX) { + _mi_stat_decrease(&heap->tld->stats.giant, bsize); } else { - _mi_stat_decrease(&heap->tld->stats.huge, page->block_size); + _mi_stat_decrease(&heap->tld->stats.huge, bsize); } } - #if (MI_STAT>1) - size_t inuse = page->used - page->thread_freed; - if (page->block_size <= MI_LARGE_OBJ_SIZE_MAX) { - mi_heap_stat_decrease(heap,normal[_mi_bin(page->block_size)], inuse); +#if (MI_STAT>1) + _mi_page_free_collect(page, false); // update used count + const size_t inuse = page->used; + if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { + mi_heap_stat_decrease(heap, normal[_mi_bin(bsize)], inuse); } - mi_heap_stat_decrease(heap,malloc, page->block_size * inuse); // todo: off for aligned blocks... - #endif + mi_heap_stat_decrease(heap, malloc, bsize * inuse); // todo: off for aligned blocks... +#endif - // pretend it is all free now - mi_assert_internal(page->thread_freed<=0xFFFF); - page->used = (uint16_t)page->thread_freed; + /// pretend it is all free now + mi_assert_internal(mi_page_thread_free(page) == NULL); + page->used = 0; // and free the page _mi_segment_page_free(page,false /* no force? */, &heap->tld->segments); @@ -374,7 +381,7 @@ static mi_heap_t* mi_heap_of_block(const void* p) { bool valid = (_mi_ptr_cookie(segment) == segment->cookie); mi_assert_internal(valid); if (mi_unlikely(!valid)) return NULL; - return _mi_segment_page_of(segment,p)->heap; + return mi_page_heap(_mi_segment_page_of(segment,p)); } bool mi_heap_contains_block(mi_heap_t* heap, const void* p) { @@ -390,7 +397,7 @@ static bool mi_heap_page_check_owned(mi_heap_t* heap, mi_page_queue_t* pq, mi_pa bool* found = (bool*)vfound; mi_segment_t* segment = _mi_page_segment(page); void* start = _mi_page_start(segment, page, NULL); - void* end = (uint8_t*)start + (page->capacity * page->block_size); + void* end = (uint8_t*)start + (page->capacity * mi_page_block_size(page)); *found = (p >= start && p < end); return (!*found); // continue if not found } @@ -432,13 +439,14 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v mi_assert_internal(page->local_free == NULL); if (page->used == 0) return true; + const size_t bsize = mi_page_block_size(page); size_t psize; uint8_t* pstart = _mi_page_start(_mi_page_segment(page), page, &psize); if (page->capacity == 1) { // optimize page with one block mi_assert_internal(page->used == 1 && page->free == NULL); - return visitor(page->heap, area, pstart, page->block_size, arg); + return visitor(mi_page_heap(page), area, pstart, bsize, arg); } // create a bitmap of free blocks. @@ -451,8 +459,8 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v free_count++; mi_assert_internal((uint8_t*)block >= pstart && (uint8_t*)block < (pstart + psize)); size_t offset = (uint8_t*)block - pstart; - mi_assert_internal(offset % page->block_size == 0); - size_t blockidx = offset / page->block_size; // Todo: avoid division? + mi_assert_internal(offset % bsize == 0); + size_t blockidx = offset / bsize; // Todo: avoid division? mi_assert_internal( blockidx < MI_MAX_BLOCKS); size_t bitidx = (blockidx / sizeof(uintptr_t)); size_t bit = blockidx - (bitidx * sizeof(uintptr_t)); @@ -471,8 +479,8 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v } else if ((m & ((uintptr_t)1 << bit)) == 0) { used_count++; - uint8_t* block = pstart + (i * page->block_size); - if (!visitor(page->heap, area, block, page->block_size, arg)) return false; + uint8_t* block = pstart + (i * bsize); + if (!visitor(mi_page_heap(page), area, block, bsize, arg)) return false; } } mi_assert_internal(page->used == used_count); @@ -487,12 +495,13 @@ static bool mi_heap_visit_areas_page(mi_heap_t* heap, mi_page_queue_t* pq, mi_pa UNUSED(pq); mi_heap_area_visit_fun* fun = (mi_heap_area_visit_fun*)vfun; mi_heap_area_ex_t xarea; + const size_t bsize = mi_page_block_size(page); xarea.page = page; - xarea.area.reserved = page->reserved * page->block_size; - xarea.area.committed = page->capacity * page->block_size; + xarea.area.reserved = page->reserved * bsize; + xarea.area.committed = page->capacity * bsize; xarea.area.blocks = _mi_page_start(_mi_page_segment(page), page, NULL); - xarea.area.used = page->used - page->thread_freed; // race is ok - xarea.area.block_size = page->block_size; + xarea.area.used = page->used; + xarea.area.block_size = bsize; return fun(heap, &xarea, arg); } diff --git a/src/init.c b/src/init.c index 79e1e044..d81d7459 100644 --- a/src/init.c +++ b/src/init.c @@ -23,12 +23,11 @@ const mi_page_t _mi_page_empty = { { 0, 0 }, #endif 0, // used - NULL, - ATOMIC_VAR_INIT(0), ATOMIC_VAR_INIT(0), - 0, NULL, NULL, NULL - #if (MI_INTPTR_SIZE==4) - , { NULL } // padding - #endif + 0, // xblock_size + NULL, // local_free + ATOMIC_VAR_INIT(0), // xthread_free + ATOMIC_VAR_INIT(0), // xheap + NULL, NULL }; #define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty) diff --git a/src/page-queue.c b/src/page-queue.c index 95443a69..68e2aaa4 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -178,20 +178,20 @@ static bool mi_heap_contains_queue(const mi_heap_t* heap, const mi_page_queue_t* #endif static mi_page_queue_t* mi_page_queue_of(const mi_page_t* page) { - uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : _mi_bin(page->block_size)); - mi_heap_t* heap = page->heap; + uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : _mi_bin(page->xblock_size)); + mi_heap_t* heap = mi_page_heap(page); mi_assert_internal(heap != NULL && bin <= MI_BIN_FULL); mi_page_queue_t* pq = &heap->pages[bin]; - mi_assert_internal(bin >= MI_BIN_HUGE || page->block_size == pq->block_size); + mi_assert_internal(bin >= MI_BIN_HUGE || page->xblock_size == pq->block_size); mi_assert_expensive(mi_page_queue_contains(pq, page)); return pq; } static mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page) { - uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : _mi_bin(page->block_size)); + uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : _mi_bin(page->xblock_size)); mi_assert_internal(bin <= MI_BIN_FULL); mi_page_queue_t* pq = &heap->pages[bin]; - mi_assert_internal(mi_page_is_in_full(page) || page->block_size == pq->block_size); + mi_assert_internal(mi_page_is_in_full(page) || page->xblock_size == pq->block_size); return pq; } @@ -246,35 +246,35 @@ static bool mi_page_queue_is_empty(mi_page_queue_t* queue) { static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) { mi_assert_internal(page != NULL); mi_assert_expensive(mi_page_queue_contains(queue, page)); - mi_assert_internal(page->block_size == queue->block_size || (page->block_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue)) || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); + mi_assert_internal(page->xblock_size == queue->block_size || (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue)) || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); + mi_heap_t* heap = mi_page_heap(page); if (page->prev != NULL) page->prev->next = page->next; if (page->next != NULL) page->next->prev = page->prev; if (page == queue->last) queue->last = page->prev; if (page == queue->first) { queue->first = page->next; // update first - mi_heap_t* heap = page->heap; mi_assert_internal(mi_heap_contains_queue(heap, queue)); mi_heap_queue_first_update(heap,queue); } - page->heap->page_count--; + heap->page_count--; page->next = NULL; page->prev = NULL; - mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL); + // mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL); mi_page_set_in_full(page,false); } static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_t* page) { - mi_assert_internal(page->heap == NULL); + mi_assert_internal(mi_page_heap(page) == heap); mi_assert_internal(!mi_page_queue_contains(queue, page)); mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE); - mi_assert_internal(page->block_size == queue->block_size || - (page->block_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue)) || + mi_assert_internal(page->xblock_size == queue->block_size || + (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue)) || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); mi_page_set_in_full(page, mi_page_queue_is_full(queue)); - mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), heap); + // mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), heap); page->next = queue->first; page->prev = NULL; if (queue->first != NULL) { @@ -296,19 +296,19 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro mi_assert_internal(page != NULL); mi_assert_expensive(mi_page_queue_contains(from, page)); mi_assert_expensive(!mi_page_queue_contains(to, page)); - mi_assert_internal((page->block_size == to->block_size && page->block_size == from->block_size) || - (page->block_size == to->block_size && mi_page_queue_is_full(from)) || - (page->block_size == from->block_size && mi_page_queue_is_full(to)) || - (page->block_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(to)) || - (page->block_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_full(to))); + mi_assert_internal((page->xblock_size == to->block_size && page->xblock_size == from->block_size) || + (page->xblock_size == to->block_size && mi_page_queue_is_full(from)) || + (page->xblock_size == from->block_size && mi_page_queue_is_full(to)) || + (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(to)) || + (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_full(to))); + mi_heap_t* heap = mi_page_heap(page); if (page->prev != NULL) page->prev->next = page->next; if (page->next != NULL) page->next->prev = page->prev; if (page == from->last) from->last = page->prev; if (page == from->first) { from->first = page->next; // update first - mi_heap_t* heap = page->heap; mi_assert_internal(mi_heap_contains_queue(heap, from)); mi_heap_queue_first_update(heap, from); } @@ -316,14 +316,14 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro page->prev = to->last; page->next = NULL; if (to->last != NULL) { - mi_assert_internal(page->heap == to->last->heap); + mi_assert_internal(heap == mi_page_heap(to->last)); to->last->next = page; to->last = page; } else { to->first = page; to->last = page; - mi_heap_queue_first_update(page->heap, to); + mi_heap_queue_first_update(heap, to); } mi_page_set_in_full(page, mi_page_queue_is_full(to)); @@ -338,7 +338,7 @@ size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue // set append pages to new heap and count size_t count = 0; for (mi_page_t* page = append->first; page != NULL; page = page->next) { - mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), heap); + mi_page_set_heap(page,heap); count++; } diff --git a/src/page.c b/src/page.c index 6a6e09d6..40aec0c6 100644 --- a/src/page.c +++ b/src/page.c @@ -29,10 +29,11 @@ terms of the MIT license. A copy of the license can be found in the file ----------------------------------------------------------- */ // Index a block in a page -static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_start, size_t i) { +static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_start, size_t block_size, size_t i) { + UNUSED(page); mi_assert_internal(page != NULL); mi_assert_internal(i <= page->reserved); - return (mi_block_t*)((uint8_t*)page_start + (i * page->block_size)); + return (mi_block_t*)((uint8_t*)page_start + (i * block_size)); } static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_tld_t* tld); @@ -69,13 +70,14 @@ static bool mi_page_list_is_valid(mi_page_t* page, mi_block_t* p) { } static bool mi_page_is_valid_init(mi_page_t* page) { - mi_assert_internal(page->block_size > 0); + mi_assert_internal(page->xblock_size > 0); mi_assert_internal(page->used <= page->capacity); mi_assert_internal(page->capacity <= page->reserved); + const size_t bsize = mi_page_block_size(page); mi_segment_t* segment = _mi_page_segment(page); uint8_t* start = _mi_page_start(segment,page,NULL); - mi_assert_internal(start == _mi_segment_page_start(segment,page,page->block_size,NULL,NULL)); + mi_assert_internal(start == _mi_segment_page_start(segment,page,bsize,NULL,NULL)); //mi_assert_internal(start + page->capacity*page->block_size == page->top); mi_assert_internal(mi_page_list_is_valid(page,page->free)); @@ -89,10 +91,10 @@ static bool mi_page_is_valid_init(mi_page_t* page) { } #endif - mi_block_t* tfree = mi_tf_block(page->thread_free); + mi_block_t* tfree = mi_page_thread_free(page); mi_assert_internal(mi_page_list_is_valid(page, tfree)); - size_t tfree_count = mi_page_list_count(page, tfree); - mi_assert_internal(tfree_count <= page->thread_freed + 1); + //size_t tfree_count = mi_page_list_count(page, tfree); + //mi_assert_internal(tfree_count <= page->thread_freed + 1); size_t free_count = mi_page_list_count(page, page->free) + mi_page_list_count(page, page->local_free); mi_assert_internal(page->used + free_count == page->capacity); @@ -105,14 +107,14 @@ bool _mi_page_is_valid(mi_page_t* page) { #if MI_SECURE mi_assert_internal(page->key != 0); #endif - if (page->heap!=NULL) { + if (mi_page_heap(page)!=NULL) { mi_segment_t* segment = _mi_page_segment(page); - mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == page->heap->thread_id || segment->thread_id==0); + mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == mi_page_heap(page)->thread_id || segment->thread_id==0); if (segment->page_kind != MI_PAGE_HUGE) { mi_page_queue_t* pq = mi_page_queue_of(page); mi_assert_internal(mi_page_queue_contains(pq, page)); - mi_assert_internal(pq->block_size==page->block_size || page->block_size > MI_LARGE_OBJ_SIZE_MAX || mi_page_is_in_full(page)); - mi_assert_internal(mi_heap_contains_queue(page->heap,pq)); + mi_assert_internal(pq->block_size==mi_page_block_size(page) || mi_page_block_size(page) > MI_LARGE_OBJ_SIZE_MAX || mi_page_is_in_full(page)); + mi_assert_internal(mi_heap_contains_queue(mi_page_heap(page),pq)); } } return true; @@ -124,20 +126,20 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool overrid mi_thread_free_t tfreex; mi_delayed_t old_delay; do { - tfree = mi_atomic_read_relaxed(&page->thread_free); + tfree = mi_atomic_read(&page->xthread_free); tfreex = mi_tf_set_delayed(tfree, delay); old_delay = mi_tf_delayed(tfree); if (mi_unlikely(old_delay == MI_DELAYED_FREEING)) { - mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done. + // mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done. + tfree = mi_tf_set_delayed(tfree, MI_NO_DELAYED_FREE); // will cause CAS to busy fail } else if (delay == old_delay) { break; // avoid atomic operation if already equal } else if (!override_never && old_delay == MI_NEVER_DELAYED_FREE) { - break; // leave never set + break; // leave never-delayed flag set } - } while ((old_delay == MI_DELAYED_FREEING) || - !mi_atomic_cas_weak(mi_atomic_cast(uintptr_t, &page->thread_free), tfreex, tfree)); + } while (!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree)); } /* ----------------------------------------------------------- @@ -154,17 +156,17 @@ static void _mi_page_thread_free_collect(mi_page_t* page) mi_thread_free_t tfree; mi_thread_free_t tfreex; do { - tfree = page->thread_free; + tfree = mi_atomic_read_relaxed(&page->xthread_free); head = mi_tf_block(tfree); tfreex = mi_tf_set_block(tfree,NULL); - } while (!mi_atomic_cas_weak(mi_atomic_cast(uintptr_t,&page->thread_free), tfreex, tfree)); + } while (!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree)); // return if the list is empty if (head == NULL) return; // find the tail -- also to get a proper count (without data races) - uintptr_t max_count = page->capacity; // cannot collect more than capacity - uintptr_t count = 1; + uint32_t max_count = page->capacity; // cannot collect more than capacity + uint32_t count = 1; mi_block_t* tail = head; mi_block_t* next; while ((next = mi_block_next(page,tail)) != NULL && count <= max_count) { @@ -182,7 +184,6 @@ static void _mi_page_thread_free_collect(mi_page_t* page) page->local_free = head; // update counts now - mi_atomic_subu(&page->thread_freed, count); page->used -= count; } @@ -190,7 +191,7 @@ void _mi_page_free_collect(mi_page_t* page, bool force) { mi_assert_internal(page!=NULL); // collect the thread free list - if (force || mi_tf_block(page->thread_free) != NULL) { // quick test to avoid an atomic operation + if (force || mi_page_thread_free(page) != NULL) { // quick test to avoid an atomic operation _mi_page_thread_free_collect(page); } @@ -228,15 +229,16 @@ void _mi_page_free_collect(mi_page_t* page, bool force) { // called from segments when reclaiming abandoned pages void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { mi_assert_expensive(mi_page_is_valid_init(page)); - mi_assert_internal(page->heap == NULL); + mi_assert_internal(mi_page_heap(page) == NULL); mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE); mi_assert_internal(!page->is_reset); - mi_assert_internal(mi_tf_delayed(page->thread_free) == MI_NEVER_DELAYED_FREE); - _mi_page_free_collect(page,false); - mi_page_queue_t* pq = mi_page_queue(heap, page->block_size); + mi_assert_internal(mi_page_thread_free_flag(page) == MI_NEVER_DELAYED_FREE); + mi_page_set_heap(page, heap); + mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page)); mi_page_queue_push(heap, pq, page); - mi_assert_internal(page->heap != NULL); - _mi_page_use_delayed_free(page, MI_NO_DELAYED_FREE, true); // override never (after push so heap is set) + _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set) + // _mi_page_free_collect(page,false); // no need, as it is just done before reclaim + mi_assert_internal(mi_page_heap(page)!= NULL); mi_assert_expensive(_mi_page_is_valid(page)); } @@ -270,8 +272,8 @@ static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) { // otherwise allocate the page page = mi_page_fresh_alloc(heap, pq, pq->block_size); if (page==NULL) return NULL; - mi_assert_internal(pq->block_size==page->block_size); - mi_assert_internal(pq==mi_page_queue(heap,page->block_size)); + mi_assert_internal(pq->block_size==mi_page_block_size(page)); + mi_assert_internal(pq==mi_page_queue(heap, mi_page_block_size(page))); return page; } @@ -312,11 +314,9 @@ void _mi_page_unfull(mi_page_t* page) { mi_assert_internal(page != NULL); mi_assert_expensive(_mi_page_is_valid(page)); mi_assert_internal(mi_page_is_in_full(page)); - - _mi_page_use_delayed_free(page, MI_NO_DELAYED_FREE, false); if (!mi_page_is_in_full(page)) return; - mi_heap_t* heap = page->heap; + mi_heap_t* heap = mi_page_heap(page); mi_page_queue_t* pqfull = &heap->pages[MI_BIN_FULL]; mi_page_set_in_full(page, false); // to get the right queue mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page); @@ -329,10 +329,8 @@ static void mi_page_to_full(mi_page_t* page, mi_page_queue_t* pq) { mi_assert_internal(!mi_page_immediate_available(page)); mi_assert_internal(!mi_page_is_in_full(page)); - _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, false); if (mi_page_is_in_full(page)) return; - - mi_page_queue_enqueue_from(&page->heap->pages[MI_BIN_FULL], pq, page); + mi_page_queue_enqueue_from(&mi_page_heap(page)->pages[MI_BIN_FULL], pq, page); _mi_page_free_collect(page,false); // try to collect right away in case another thread freed just before MI_USE_DELAYED_FREE was set } @@ -345,18 +343,17 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) { mi_assert_internal(page != NULL); mi_assert_expensive(_mi_page_is_valid(page)); mi_assert_internal(pq == mi_page_queue_of(page)); - mi_assert_internal(page->heap != NULL); + mi_assert_internal(mi_page_heap(page) != NULL); -#if MI_DEBUG > 1 - mi_heap_t* pheap = (mi_heap_t*)mi_atomic_read_ptr(mi_atomic_cast(void*, &page->heap)); -#endif + mi_heap_t* pheap = mi_page_heap(page); // remove from our page list - mi_segments_tld_t* segments_tld = &page->heap->tld->segments; + mi_segments_tld_t* segments_tld = &pheap->tld->segments; mi_page_queue_remove(pq, page); // page is no longer associated with our heap - mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL); + mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE); + mi_page_set_heap(page, NULL); #if MI_DEBUG>1 // check there are no references left.. @@ -366,7 +363,7 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) { #endif // and abandon it - mi_assert_internal(page->heap == NULL); + mi_assert_internal(mi_page_heap(page) == NULL); _mi_segment_page_abandon(page,segments_tld); } @@ -377,33 +374,18 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { mi_assert_expensive(_mi_page_is_valid(page)); mi_assert_internal(pq == mi_page_queue_of(page)); mi_assert_internal(mi_page_all_free(page)); - #if MI_DEBUG>1 - // check if we can safely free - mi_thread_free_t free = mi_tf_set_delayed(page->thread_free,MI_NEVER_DELAYED_FREE); - free = mi_atomic_exchange(&page->thread_free, free); - mi_assert_internal(mi_tf_delayed(free) != MI_DELAYED_FREEING); - #endif + mi_assert_internal(mi_page_thread_free_flag(page)!=MI_DELAYED_FREEING); + // no more aligned blocks in here mi_page_set_has_aligned(page, false); - // account for huge pages here - // (note: no longer necessary as huge pages are always abandoned) - if (page->block_size > MI_LARGE_OBJ_SIZE_MAX) { - if (page->block_size > MI_HUGE_OBJ_SIZE_MAX) { - _mi_stat_decrease(&page->heap->tld->stats.giant, page->block_size); - } - else { - _mi_stat_decrease(&page->heap->tld->stats.huge, page->block_size); - } - } - // remove from the page list // (no need to do _mi_heap_delayed_free first as all blocks are already free) - mi_segments_tld_t* segments_tld = &page->heap->tld->segments; + mi_segments_tld_t* segments_tld = &mi_page_heap(page)->tld->segments; mi_page_queue_remove(pq, page); // and free it - mi_assert_internal(page->heap == NULL); + mi_page_set_heap(page,NULL); _mi_segment_page_free(page, force, segments_tld); } @@ -427,7 +409,7 @@ void _mi_page_retire(mi_page_t* page) { // how to check this efficiently though... // for now, we don't retire if it is the only page left of this size class. mi_page_queue_t* pq = mi_page_queue_of(page); - if (mi_likely(page->block_size <= MI_SMALL_SIZE_MAX)) { + if (mi_likely(page->xblock_size <= MI_SMALL_SIZE_MAX && !mi_page_is_in_full(page))) { if (pq->last==page && pq->first==page) { // the only page in the queue? mi_stat_counter_increase(_mi_stats_main.page_no_retire,1); page->retire_expire = 4; @@ -469,15 +451,15 @@ void _mi_heap_collect_retired(mi_heap_t* heap, bool force) { #define MI_MAX_SLICES (1UL << MI_MAX_SLICE_SHIFT) #define MI_MIN_SLICES (2) -static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* const page, const size_t extend, mi_stats_t* const stats) { +static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* const page, const size_t bsize, const size_t extend, mi_stats_t* const stats) { UNUSED(stats); #if (MI_SECURE<=2) mi_assert_internal(page->free == NULL); mi_assert_internal(page->local_free == NULL); #endif mi_assert_internal(page->capacity + extend <= page->reserved); + mi_assert_internal(bsize == mi_page_block_size(page)); void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL); - const size_t bsize = page->block_size; // initialize a randomized free list // set up `slice_count` slices to alternate between @@ -491,7 +473,7 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co mi_block_t* blocks[MI_MAX_SLICES]; // current start of the slice size_t counts[MI_MAX_SLICES]; // available objects in the slice for (size_t i = 0; i < slice_count; i++) { - blocks[i] = mi_page_block_at(page, page_area, page->capacity + i*slice_extend); + blocks[i] = mi_page_block_at(page, page_area, bsize, page->capacity + i*slice_extend); counts[i] = slice_extend; } counts[slice_count-1] += (extend % slice_count); // final slice holds the modulus too (todo: distribute evenly?) @@ -526,7 +508,7 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co page->free = free_start; } -static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t extend, mi_stats_t* const stats) +static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t bsize, const size_t extend, mi_stats_t* const stats) { UNUSED(stats); #if (MI_SECURE <= 2) @@ -534,12 +516,13 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co mi_assert_internal(page->local_free == NULL); #endif mi_assert_internal(page->capacity + extend <= page->reserved); + mi_assert_internal(bsize == mi_page_block_size(page)); void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL ); - const size_t bsize = page->block_size; - mi_block_t* const start = mi_page_block_at(page, page_area, page->capacity); + + mi_block_t* const start = mi_page_block_at(page, page_area, bsize, page->capacity); // initialize a sequential free list - mi_block_t* const last = mi_page_block_at(page, page_area, page->capacity + extend - 1); + mi_block_t* const last = mi_page_block_at(page, page_area, bsize, page->capacity + extend - 1); mi_block_t* block = start; while(block <= last) { mi_block_t* next = (mi_block_t*)((uint8_t*)block + bsize); @@ -581,8 +564,9 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) mi_stat_counter_increase(tld->stats.pages_extended, 1); // calculate the extend count + const size_t bsize = (page->xblock_size < MI_HUGE_BLOCK_SIZE ? page->xblock_size : page_size); size_t extend = page->reserved - page->capacity; - size_t max_extend = (page->block_size >= MI_MAX_EXTEND_SIZE ? MI_MIN_EXTEND : MI_MAX_EXTEND_SIZE/(uint32_t)page->block_size); + size_t max_extend = (bsize >= MI_MAX_EXTEND_SIZE ? MI_MIN_EXTEND : MI_MAX_EXTEND_SIZE/(uint32_t)bsize); if (max_extend < MI_MIN_EXTEND) max_extend = MI_MIN_EXTEND; if (extend > max_extend) { @@ -596,20 +580,20 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) // commit on-demand for large and huge pages? if (_mi_page_segment(page)->page_kind >= MI_PAGE_LARGE && !mi_option_is_enabled(mi_option_eager_page_commit)) { - uint8_t* start = page_start + (page->capacity * page->block_size); - _mi_mem_commit(start, extend * page->block_size, NULL, &tld->os); + uint8_t* start = page_start + (page->capacity * bsize); + _mi_mem_commit(start, extend * bsize, NULL, &tld->os); } // and append the extend the free list if (extend < MI_MIN_SLICES || MI_SECURE==0) { //!mi_option_is_enabled(mi_option_secure)) { - mi_page_free_list_extend(page, extend, &tld->stats ); + mi_page_free_list_extend(page, bsize, extend, &tld->stats ); } else { - mi_page_free_list_extend_secure(heap, page, extend, &tld->stats); + mi_page_free_list_extend_secure(heap, page, bsize, extend, &tld->stats); } // enable the new free list page->capacity += (uint16_t)extend; - mi_stat_increase(tld->stats.page_committed, extend * page->block_size); + mi_stat_increase(tld->stats.page_committed, extend * bsize); // extension into zero initialized memory preserves the zero'd free list if (!page->is_zero_init) { @@ -625,9 +609,10 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert(segment != NULL); mi_assert_internal(block_size > 0); // set fields + mi_page_set_heap(page, heap); size_t page_size; _mi_segment_page_start(segment, page, block_size, &page_size, NULL); - page->block_size = block_size; + page->xblock_size = (block_size < MI_HUGE_BLOCK_SIZE ? (uint32_t)block_size : MI_HUGE_BLOCK_SIZE); mi_assert_internal(page_size / block_size < (1L<<16)); page->reserved = (uint16_t)(page_size / block_size); #ifdef MI_ENCODE_FREELIST @@ -639,14 +624,14 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(page->capacity == 0); mi_assert_internal(page->free == NULL); mi_assert_internal(page->used == 0); - mi_assert_internal(page->thread_free == 0); - mi_assert_internal(page->thread_freed == 0); + mi_assert_internal(page->xthread_free == 0); mi_assert_internal(page->next == NULL); mi_assert_internal(page->prev == NULL); mi_assert_internal(page->retire_expire == 0); mi_assert_internal(!mi_page_has_aligned(page)); #if (MI_ENCODE_FREELIST) - mi_assert_internal(page->key != 0); + mi_assert_internal(page->key[1] != 0); + mi_assert_internal(page->key[2] != 0); #endif mi_assert_expensive(mi_page_is_valid_init(page)); @@ -664,34 +649,19 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq) { // search through the pages in "next fit" order - mi_page_t* rpage = NULL; size_t count = 0; - size_t page_free_count = 0; mi_page_t* page = pq->first; - while( page != NULL) + while (page != NULL) { mi_page_t* next = page->next; // remember next count++; // 0. collect freed blocks by us and other threads - _mi_page_free_collect(page,false); + _mi_page_free_collect(page, false); // 1. if the page contains free blocks, we are done if (mi_page_immediate_available(page)) { - // If all blocks are free, we might retire this page instead. - // do this at most 8 times to bound allocation time. - // (note: this can happen if a page was earlier not retired due - // to having neighbours that were mostly full or due to concurrent frees) - if (page_free_count < 8 && mi_page_all_free(page)) { - page_free_count++; - if (rpage != NULL) _mi_page_free(rpage,pq,false); - rpage = page; - page = next; - continue; // and keep looking - } - else { - break; // pick this one - } + break; // pick this one } // 2. Try to extend @@ -704,20 +674,12 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p // 3. If the page is completely full, move it to the `mi_pages_full` // queue so we don't visit long-lived pages too often. mi_assert_internal(!mi_page_is_in_full(page) && !mi_page_immediate_available(page)); - mi_page_to_full(page,pq); + mi_page_to_full(page, pq); page = next; } // for each page - mi_stat_counter_increase(heap->tld->stats.searches,count); - - if (page == NULL) { - page = rpage; - rpage = NULL; - } - if (rpage != NULL) { - _mi_page_free(rpage,pq,false); - } + mi_stat_counter_increase(heap->tld->stats.searches, count); if (page == NULL) { page = mi_page_fresh(heap, pq); @@ -729,11 +691,12 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p mi_assert_internal(page == NULL || mi_page_immediate_available(page)); // finally collect retired pages - _mi_heap_collect_retired(heap,false); + _mi_heap_collect_retired(heap, false); return page; } + // Find a page with free blocks of `size`. static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) { mi_page_queue_t* pq = mi_page_queue(heap,size); @@ -794,14 +757,15 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) { mi_assert_internal(_mi_bin(block_size) == MI_BIN_HUGE); mi_page_t* page = mi_page_fresh_alloc(heap,NULL,block_size); if (page != NULL) { + const size_t bsize = mi_page_block_size(page); mi_assert_internal(mi_page_immediate_available(page)); - mi_assert_internal(page->block_size == block_size); + mi_assert_internal(bsize >= size); mi_assert_internal(_mi_page_segment(page)->page_kind==MI_PAGE_HUGE); mi_assert_internal(_mi_page_segment(page)->used==1); mi_assert_internal(_mi_page_segment(page)->thread_id==0); // abandoned, not in the huge queue - mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL); + mi_page_set_heap(page, NULL); - if (page->block_size > MI_HUGE_OBJ_SIZE_MAX) { + if (bsize > MI_HUGE_OBJ_SIZE_MAX) { _mi_stat_increase(&heap->tld->stats.giant, block_size); _mi_stat_counter_increase(&heap->tld->stats.giant_count, 1); } @@ -849,7 +813,7 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept if (page == NULL) return NULL; // out of memory mi_assert_internal(mi_page_immediate_available(page)); - mi_assert_internal(page->block_size >= size); + mi_assert_internal(mi_page_block_size(page) >= size); // and try again, this time succeeding! (i.e. this should never recurse) return _mi_page_malloc(heap, page, size); diff --git a/src/segment.c b/src/segment.c index 97859fa9..4fb3e28b 100644 --- a/src/segment.c +++ b/src/segment.c @@ -208,8 +208,8 @@ static void mi_page_reset(mi_segment_t* segment, mi_page_t* page, size_t size, m mi_assert_internal(size <= psize); size_t reset_size = (size == 0 || size > psize ? psize : size); if (size == 0 && segment->page_kind >= MI_PAGE_LARGE && !mi_option_is_enabled(mi_option_eager_page_commit)) { - mi_assert_internal(page->block_size > 0); - reset_size = page->capacity * page->block_size; + mi_assert_internal(page->xblock_size > 0); + reset_size = page->capacity * mi_page_block_size(page); } _mi_mem_reset(start, reset_size, tld->os); } @@ -223,8 +223,8 @@ static void mi_page_unreset(mi_segment_t* segment, mi_page_t* page, size_t size, uint8_t* start = mi_segment_raw_page_start(segment, page, &psize); size_t unreset_size = (size == 0 || size > psize ? psize : size); if (size == 0 && segment->page_kind >= MI_PAGE_LARGE && !mi_option_is_enabled(mi_option_eager_page_commit)) { - mi_assert_internal(page->block_size > 0); - unreset_size = page->capacity * page->block_size; + mi_assert_internal(page->xblock_size > 0); + unreset_size = page->capacity * mi_page_block_size(page); } bool is_zero = false; _mi_mem_unreset(start, unreset_size, &is_zero, tld->os); @@ -255,7 +255,7 @@ static uint8_t* mi_segment_raw_page_start(const mi_segment_t* segment, const mi_ } if (page_size != NULL) *page_size = psize; - mi_assert_internal(page->block_size == 0 || _mi_ptr_page(p) == page); + mi_assert_internal(page->xblock_size == 0 || _mi_ptr_page(p) == page); mi_assert_internal(_mi_ptr_segment(p) == segment); return p; } @@ -278,7 +278,7 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa } if (page_size != NULL) *page_size = psize; - mi_assert_internal(page->block_size==0 || _mi_ptr_page(p) == page); + mi_assert_internal(page->xblock_size==0 || _mi_ptr_page(p) == page); mi_assert_internal(_mi_ptr_segment(p) == segment); return p; } @@ -605,7 +605,7 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg mi_assert_internal(page->segment_in_use); mi_assert_internal(mi_page_all_free(page)); mi_assert_internal(page->is_committed); - size_t inuse = page->capacity * page->block_size; + size_t inuse = page->capacity * mi_page_block_size(page); _mi_stat_decrease(&tld->stats->page_committed, inuse); _mi_stat_decrease(&tld->stats->pages, 1); @@ -707,6 +707,8 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { mi_assert(page != NULL); + mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE); + mi_assert_internal(mi_page_heap(page) == NULL); mi_segment_t* segment = _mi_page_segment(page); mi_assert_expensive(mi_segment_is_valid(segment)); segment->abandoned++; @@ -765,9 +767,12 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen if (page->segment_in_use) { mi_assert_internal(!page->is_reset); mi_assert_internal(page->is_committed); + mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE); + mi_assert_internal(mi_page_heap(page) == NULL); segment->abandoned--; mi_assert(page->next == NULL); _mi_stat_decrease(&tld->stats->pages_abandoned, 1); + _mi_page_free_collect(page, false); // ensure used count is up to date if (mi_page_all_free(page)) { // if everything free by now, free the page mi_segment_page_clear(segment,page,tld); From ad32eb1dfb2b73ed8eaecfdc14e01cbbf43d05b2 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 15 Jan 2020 17:57:19 -0800 Subject: [PATCH 105/179] eager collect on page reclamation --- src/page.c | 10 +++------- src/segment.c | 7 +++++-- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/page.c b/src/page.c index 40aec0c6..02f10238 100644 --- a/src/page.c +++ b/src/page.c @@ -229,16 +229,12 @@ void _mi_page_free_collect(mi_page_t* page, bool force) { // called from segments when reclaiming abandoned pages void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { mi_assert_expensive(mi_page_is_valid_init(page)); - mi_assert_internal(mi_page_heap(page) == NULL); + mi_assert_internal(mi_page_heap(page) == heap); + mi_assert_internal(mi_page_thread_free_flag(page) != MI_NEVER_DELAYED_FREE); mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE); mi_assert_internal(!page->is_reset); - mi_assert_internal(mi_page_thread_free_flag(page) == MI_NEVER_DELAYED_FREE); - mi_page_set_heap(page, heap); mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page)); - mi_page_queue_push(heap, pq, page); - _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set) - // _mi_page_free_collect(page,false); // no need, as it is just done before reclaim - mi_assert_internal(mi_page_heap(page)!= NULL); + mi_page_queue_push(heap, pq, page); mi_assert_expensive(_mi_page_is_valid(page)); } diff --git a/src/segment.c b/src/segment.c index 4fb3e28b..d27a7c13 100644 --- a/src/segment.c +++ b/src/segment.c @@ -772,13 +772,16 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen segment->abandoned--; mi_assert(page->next == NULL); _mi_stat_decrease(&tld->stats->pages_abandoned, 1); + // set the heap again and allow delayed free again + mi_page_set_heap(page, heap); + _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set) _mi_page_free_collect(page, false); // ensure used count is up to date if (mi_page_all_free(page)) { - // if everything free by now, free the page + // if everything free already, clear the page directly mi_segment_page_clear(segment,page,tld); } else { - // otherwise reclaim it + // otherwise reclaim it into the heap _mi_page_reclaim(heap,page); } } From 9629d731888f64db99e43016c916268a73a5f02f Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 15 Jan 2020 18:07:29 -0800 Subject: [PATCH 106/179] fix options --- src/options.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/options.c b/src/options.c index ce21309d..f1d8205f 100644 --- a/src/options.c +++ b/src/options.c @@ -67,6 +67,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread + { 0, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 100, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds From b8072aaacb581b9655545b9960456c239b7c59af Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 16 Jan 2020 03:54:51 -0800 Subject: [PATCH 107/179] fix debug build --- src/heap.c | 34 +++++++++++++++++----------------- src/segment.c | 7 +++++-- 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/src/heap.c b/src/heap.c index 9f2a4457..12aa0840 100644 --- a/src/heap.c +++ b/src/heap.c @@ -56,7 +56,8 @@ static bool mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_ mi_assert_expensive(_mi_page_is_valid(page)); return true; } - +#endif +#if MI_DEBUG>=3 static bool mi_heap_is_valid(mi_heap_t* heap) { mi_assert_internal(heap!=NULL); mi_heap_visit_pages(heap, &mi_heap_page_is_valid, NULL, NULL); @@ -111,7 +112,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) { if (!mi_heap_is_initialized(heap)) return; _mi_deferred_free(heap, collect > NORMAL); - + // collect (some) abandoned pages if (collect >= NORMAL && !heap->no_reclaim) { if (collect == NORMAL) { @@ -123,8 +124,8 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) collect == FORCE #else collect >= FORCE - #endif - && _mi_is_main_thread() && mi_heap_is_backing(heap)) + #endif + && _mi_is_main_thread() && mi_heap_is_backing(heap)) { // the main thread is abandoned, try to free all abandoned segments. // if all memory is freed by now, all segments should be freed. @@ -135,19 +136,19 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) // if abandoning, mark all pages to no longer add to delayed_free if (collect == ABANDON) { //for (mi_page_t* page = heap->pages[MI_BIN_FULL].first; page != NULL; page = page->next) { - // _mi_page_use_delayed_free(page, false); // set thread_free.delayed to MI_NO_DELAYED_FREE - //} + // _mi_page_use_delayed_free(page, false); // set thread_free.delayed to MI_NO_DELAYED_FREE + //} mi_heap_visit_pages(heap, &mi_heap_page_never_delayed_free, NULL, NULL); } - // free thread delayed blocks. + // free thread delayed blocks. // (if abandoning, after this there are no more local references into the pages.) _mi_heap_delayed_free(heap); // collect all pages owned by this thread mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL); mi_assert_internal( collect != ABANDON || heap->thread_delayed_free == NULL ); - + // collect segment caches if (collect >= FORCE) { _mi_segment_thread_collect(&heap->tld->segments); @@ -177,7 +178,7 @@ void mi_collect(bool force) mi_attr_noexcept { ----------------------------------------------------------- */ mi_heap_t* mi_heap_get_default(void) { - mi_thread_init(); + mi_thread_init(); return mi_get_default_heap(); } @@ -198,7 +199,7 @@ mi_heap_t* mi_heap_new(void) { heap->tld = bheap->tld; heap->thread_id = _mi_thread_id(); _mi_random_split(&bheap->random, &heap->random); - heap->cookie = _mi_heap_random_next(heap) | 1; + heap->cookie = _mi_heap_random_next(heap) | 1; heap->key[0] = _mi_heap_random_next(heap); heap->key[1] = _mi_heap_random_next(heap); heap->no_reclaim = true; // don't reclaim abandoned pages or otherwise destroy is unsafe @@ -226,7 +227,7 @@ static void mi_heap_reset_pages(mi_heap_t* heap) { static void mi_heap_free(mi_heap_t* heap) { mi_assert_internal(mi_heap_is_initialized(heap)); if (mi_heap_is_backing(heap)) return; // dont free the backing heap - + // reset default if (mi_heap_is_default(heap)) { _mi_heap_set_default_direct(heap->tld->heap_backing); @@ -247,7 +248,7 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_ UNUSED(pq); // ensure no more thread_delayed_free will be added - _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false); + _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false); // stats const size_t bsize = mi_page_block_size(page); @@ -311,7 +312,7 @@ static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) { if (from==NULL || from->page_count == 0) return; // unfull all full pages in the `from` heap - mi_page_t* page = from->pages[MI_BIN_FULL].first; + mi_page_t* page = from->pages[MI_BIN_FULL].first; while (page != NULL) { mi_page_t* next = page->next; _mi_page_unfull(page); @@ -323,7 +324,7 @@ static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) { _mi_heap_delayed_free(from); // transfer all pages by appending the queues; this will set - // a new heap field which is ok as all pages are unfull'd and thus + // a new heap field which is ok as all pages are unfull'd and thus // other threads won't access this field anymore (see `mi_free_block_mt`) for (size_t i = 0; i < MI_BIN_FULL; i++) { mi_page_queue_t* pq = &heap->pages[i]; @@ -334,7 +335,7 @@ static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) { } mi_assert_internal(from->thread_delayed_free == NULL); mi_assert_internal(from->page_count == 0); - + // and reset the `from` heap mi_heap_reset_pages(from); } @@ -362,7 +363,7 @@ mi_heap_t* mi_heap_set_default(mi_heap_t* heap) { mi_assert(mi_heap_is_initialized(heap)); if (!mi_heap_is_initialized(heap)) return NULL; mi_assert_expensive(mi_heap_is_valid(heap)); - mi_heap_t* old = mi_get_default_heap(); + mi_heap_t* old = mi_get_default_heap(); _mi_heap_set_default_direct(heap); return old; } @@ -534,4 +535,3 @@ bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_blocks, mi_block_vis mi_visit_blocks_args_t args = { visit_blocks, visitor, arg }; return mi_heap_visit_areas(heap, &mi_heap_area_visitor, &args); } - diff --git a/src/segment.c b/src/segment.c index ee1de005..3f99c790 100644 --- a/src/segment.c +++ b/src/segment.c @@ -135,7 +135,7 @@ static size_t mi_segment_page_size(const mi_segment_t* segment) { } -#if (MI_DEBUG>=3) +#if (MI_DEBUG>=2) static bool mi_pages_reset_contains(const mi_page_t* page, mi_segments_tld_t* tld) { mi_page_t* p = tld->pages_reset.first; while (p != NULL) { @@ -144,7 +144,9 @@ static bool mi_pages_reset_contains(const mi_page_t* page, mi_segments_tld_t* tl } return false; } +#endif +#if (MI_DEBUG>=3) static bool mi_segment_is_valid(const mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(segment != NULL); mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); @@ -169,6 +171,7 @@ static bool mi_segment_is_valid(const mi_segment_t* segment, mi_segments_tld_t* #endif static bool mi_page_not_in_queue(const mi_page_t* page, mi_segments_tld_t* tld) { + mi_assert_internal(page != NULL); if (page->next != NULL || page->prev != NULL) { mi_assert_internal(mi_pages_reset_contains(page, tld)); return false; @@ -1052,6 +1055,6 @@ mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_ mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld)); mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size); mi_reset_delayed(tld); - mi_assert_internal(mi_page_not_in_queue(page, tld)); + mi_assert_internal(page == NULL || mi_page_not_in_queue(page, tld)); return page; } From 8d8f355ed0190702edcce7d16d9fdad7466ae2b7 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 16 Jan 2020 11:25:02 -0800 Subject: [PATCH 108/179] add option to reset eagerly when a segment is abandoned --- include/mimalloc.h | 1 + src/options.c | 1 + src/segment.c | 19 ++++++++++++------- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index fe09c7f2..e45b7e4d 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -273,6 +273,7 @@ typedef enum mi_option_e { mi_option_reserve_huge_os_pages, mi_option_segment_cache, mi_option_page_reset, + mi_option_abandoned_page_reset, mi_option_segment_reset, mi_option_eager_commit_delay, mi_option_reset_delay, diff --git a/src/options.c b/src/options.c index f1d8205f..c12c77e0 100644 --- a/src/options.c +++ b/src/options.c @@ -68,6 +68,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread { 0, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free + { 0, UNINIT, MI_OPTION(abandoned_page_reset) },// reset free page memory when a thread terminates { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 100, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds diff --git a/src/segment.c b/src/segment.c index 3f99c790..ea030d7a 100644 --- a/src/segment.c +++ b/src/segment.c @@ -326,12 +326,15 @@ static void mi_pages_reset_remove(mi_page_t* page, mi_segments_tld_t* tld) { page->used = 0; } -static void mi_pages_reset_remove_all_in_segment(mi_segment_t* segment, mi_segments_tld_t* tld) { - if (segment->mem_is_fixed) return; +static void mi_pages_reset_remove_all_in_segment(mi_segment_t* segment, bool force_reset, mi_segments_tld_t* tld) { + if (segment->mem_is_fixed) return; // never reset in huge OS pages for (size_t i = 0; i < segment->capacity; i++) { mi_page_t* page = &segment->pages[i]; if (!page->segment_in_use && !page->is_reset) { mi_pages_reset_remove(page, tld); + if (force_reset) { + mi_page_reset(segment, page, 0, tld); + } } else { mi_assert_internal(mi_page_not_in_queue(page,tld)); @@ -668,9 +671,11 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) { - UNUSED(force); - mi_assert(segment != NULL); - mi_pages_reset_remove_all_in_segment(segment, tld); + UNUSED(force); + mi_assert(segment != NULL); + // note: don't reset pages even on abandon as the whole segment is freed? (and ready for reuse) + bool force_reset = (force && mi_option_is_enabled(mi_option_abandoned_page_reset)); + mi_pages_reset_remove_all_in_segment(segment, force_reset, tld); mi_segment_remove_from_free_queue(segment,tld); mi_assert_expensive(!mi_segment_queue_contains(&tld->small_free, segment)); @@ -840,8 +845,8 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_expensive(mi_segment_is_valid(segment,tld)); // remove the segment from the free page queue if needed - mi_reset_delayed(tld); - mi_pages_reset_remove_all_in_segment(segment, tld); // do not force reset on free pages in an abandoned segment, as it is already done in segment_thread_collect + mi_reset_delayed(tld); + mi_pages_reset_remove_all_in_segment(segment, mi_option_is_enabled(mi_option_abandoned_page_reset), tld); mi_segment_remove_from_free_queue(segment, tld); mi_assert_internal(segment->next == NULL && segment->prev == NULL); From 4e91eab8fca9dfa95f74a7205f8f216dd9f22f02 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 16 Jan 2020 14:12:37 -0800 Subject: [PATCH 109/179] specialize mi_mallocn for count=1 --- src/alloc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/alloc.c b/src/alloc.c index 621fb0db..be63f86a 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -443,7 +443,12 @@ mi_decl_allocator void* mi_calloc(size_t count, size_t size) mi_attr_noexcept { // Uninitialized `calloc` extern mi_decl_allocator void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept { size_t total; - if (mi_mul_overflow(count,size,&total)) return NULL; + if (count==1) { + total = size; + } + else if (mi_mul_overflow(count, size, &total)) { + return NULL; + } return mi_heap_malloc(heap, total); } From 24f8bcbc8f4236b2bd37b1c8bfc169ec9a941942 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 16 Jan 2020 14:25:09 -0800 Subject: [PATCH 110/179] add explicit calling convention to registered functions --- include/mimalloc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index e45b7e4d..de4282da 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -108,10 +108,10 @@ mi_decl_export mi_decl_allocator void* mi_reallocf(void* p, size_t newsize) mi_decl_export size_t mi_usable_size(const void* p) mi_attr_noexcept; mi_decl_export size_t mi_good_size(size_t size) mi_attr_noexcept; -typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat, void* arg); +typedef void (mi_cdecl mi_deferred_free_fun)(bool force, unsigned long long heartbeat, void* arg); mi_decl_export void mi_register_deferred_free(mi_deferred_free_fun* deferred_free, void* arg) mi_attr_noexcept; -typedef void (mi_output_fun)(const char* msg, void* arg); +typedef void (mi_cdecl mi_output_fun)(const char* msg, void* arg); mi_decl_export void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept; mi_decl_export void mi_collect(bool force) mi_attr_noexcept; From 1b2b7404f7770022ec806a294fa35e145cb93849 Mon Sep 17 00:00:00 2001 From: Kirsten Lee Date: Thu, 16 Jan 2020 14:54:13 -0800 Subject: [PATCH 111/179] flip the order of includes for ease of use --- include/mimalloc-stl-allocator.h | 1 - include/mimalloc.h | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/include/mimalloc-stl-allocator.h b/include/mimalloc-stl-allocator.h index 11ba30fb..a98e398b 100644 --- a/include/mimalloc-stl-allocator.h +++ b/include/mimalloc-stl-allocator.h @@ -7,7 +7,6 @@ This header can be used to hook mimalloc into STL containers in place of std::allocator. -----------------------------------------------------------------------------*/ -#include #include // true_type #pragma warning(disable: 4100) diff --git a/include/mimalloc.h b/include/mimalloc.h index 7f26896c..988a080d 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -73,6 +73,8 @@ terms of the MIT license. A copy of the license can be found in the file #include // bool #ifdef __cplusplus +#include + extern "C" { #endif From 526bee6843e2d80a57671f68115c504138791cd0 Mon Sep 17 00:00:00 2001 From: Kirsten Lee Date: Thu, 16 Jan 2020 15:17:15 -0800 Subject: [PATCH 112/179] merge stl and main header --- CMakeLists.txt | 1 - ide/vs2017/mimalloc-override.vcxproj | 1 - ide/vs2017/mimalloc.vcxproj | 1 - ide/vs2019/mimalloc-override.vcxproj | 1 - ide/vs2019/mimalloc.vcxproj | 1 - include/mimalloc-stl-allocator.h | 43 ---------------------------- include/mimalloc.h | 38 +++++++++++++++++++++++- test/test-api.c | 1 - 8 files changed, 37 insertions(+), 50 deletions(-) delete mode 100644 include/mimalloc-stl-allocator.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 93560951..467fad95 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -187,7 +187,6 @@ install(TARGETS mimalloc-static EXPORT mimalloc DESTINATION ${mi_install_dir}) install(FILES include/mimalloc.h DESTINATION ${mi_install_dir}/include) install(FILES include/mimalloc-override.h DESTINATION ${mi_install_dir}/include) install(FILES include/mimalloc-new-delete.h DESTINATION ${mi_install_dir}/include) -install(FILES include/mimalloc-stl-allocator.h DESTINATION ${mi_install_dir}/include) install(FILES cmake/mimalloc-config.cmake DESTINATION ${mi_install_dir}/cmake) install(FILES cmake/mimalloc-config-version.cmake DESTINATION ${mi_install_dir}/cmake) install(EXPORT mimalloc DESTINATION ${mi_install_dir}/cmake) diff --git a/ide/vs2017/mimalloc-override.vcxproj b/ide/vs2017/mimalloc-override.vcxproj index e0a6d85b..863195a3 100644 --- a/ide/vs2017/mimalloc-override.vcxproj +++ b/ide/vs2017/mimalloc-override.vcxproj @@ -214,7 +214,6 @@ - diff --git a/ide/vs2017/mimalloc.vcxproj b/ide/vs2017/mimalloc.vcxproj index ff6c8edb..064a13dc 100644 --- a/ide/vs2017/mimalloc.vcxproj +++ b/ide/vs2017/mimalloc.vcxproj @@ -239,7 +239,6 @@ - diff --git a/ide/vs2019/mimalloc-override.vcxproj b/ide/vs2019/mimalloc-override.vcxproj index e6416e05..950a0a1a 100644 --- a/ide/vs2019/mimalloc-override.vcxproj +++ b/ide/vs2019/mimalloc-override.vcxproj @@ -214,7 +214,6 @@ - diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index ffede6ca..17adc958 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -239,7 +239,6 @@ - diff --git a/include/mimalloc-stl-allocator.h b/include/mimalloc-stl-allocator.h deleted file mode 100644 index a98e398b..00000000 --- a/include/mimalloc-stl-allocator.h +++ /dev/null @@ -1,43 +0,0 @@ -#pragma once -#ifndef MIMALLOC_STL_ALLOCATOR_H -#define MIMALLOC_STL_ALLOCATOR_H - -#ifdef __cplusplus -/* ---------------------------------------------------------------------------- -This header can be used to hook mimalloc into STL containers in place of -std::allocator. ------------------------------------------------------------------------------*/ -#include // true_type - -#pragma warning(disable: 4100) - -template -struct mi_stl_allocator { - typedef T value_type; - - using propagate_on_container_copy_assignment = std::true_type; - using propagate_on_container_move_assignment = std::true_type; - using propagate_on_container_swap = std::true_type; - using is_always_equal = std::true_type; - - mi_stl_allocator() noexcept {} - mi_stl_allocator(const mi_stl_allocator& other) noexcept {} - template - mi_stl_allocator(const mi_stl_allocator& other) noexcept {} - - T* allocate(size_t n, const void* hint = 0) { - return (T*)mi_mallocn(n, sizeof(T)); - } - - void deallocate(T* p, size_t n) { - mi_free(p); - } -}; - -template -bool operator==(const mi_stl_allocator& lhs, const mi_stl_allocator& rhs) noexcept { return true; } -template -bool operator!=(const mi_stl_allocator& lhs, const mi_stl_allocator& rhs) noexcept { return false; } - -#endif // __cplusplus -#endif // MIMALLOC_STL_ALLOCATOR_H diff --git a/include/mimalloc.h b/include/mimalloc.h index 988a080d..e664b668 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -73,7 +73,7 @@ terms of the MIT license. A copy of the license can be found in the file #include // bool #ifdef __cplusplus -#include +#include // true_type extern "C" { #endif @@ -328,5 +328,41 @@ mi_decl_export void* mi_new_aligned_nothrow(size_t n, size_t alignment) mi_attr_ } #endif +#ifdef __cplusplus + +// ------------------------------------------------------ +// STL allocator - an extension to hook mimalloc into STL +// containers in place of std::allocator. +// ------------------------------------------------------ + +#pragma warning(disable: 4100) +template +struct mi_stl_allocator { + typedef T value_type; + + using propagate_on_container_copy_assignment = std::true_type; + using propagate_on_container_move_assignment = std::true_type; + using propagate_on_container_swap = std::true_type; + using is_always_equal = std::true_type; + + mi_stl_allocator() noexcept {} + mi_stl_allocator(const mi_stl_allocator& other) noexcept {} + template + mi_stl_allocator(const mi_stl_allocator& other) noexcept {} + + T* allocate(size_t n, const void* hint = 0) { + return (T*)mi_mallocn(n, sizeof(T)); + } + + void deallocate(T* p, size_t n) { + mi_free(p); + } +}; + +template +bool operator==(const mi_stl_allocator& lhs, const mi_stl_allocator& rhs) noexcept { return true; } +template +bool operator!=(const mi_stl_allocator& lhs, const mi_stl_allocator& rhs) noexcept { return false; } +#endif #endif diff --git a/test/test-api.c b/test/test-api.c index f93884d0..060efc44 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -32,7 +32,6 @@ we therefore test the API over various inputs. Please add more tests :-) #include "mimalloc.h" #include "mimalloc-internal.h" -#include "mimalloc-stl-allocator.h" // --------------------------------------------------------------------------- // Test macros: CHECK(name,predicate) and CHECK_BODY(name,body) From 7a9502973d4c20bd5ac962a9b6e5869494990025 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 16 Jan 2020 15:57:11 -0800 Subject: [PATCH 113/179] rearrange STL allocator code: remove pragma, ifdef for C++11 --- ide/vs2019/mimalloc.vcxproj | 2 +- include/mimalloc-internal.h | 5 ++++ include/mimalloc.h | 46 +++++++++++++++---------------------- src/alloc.c | 7 +----- 4 files changed, 25 insertions(+), 35 deletions(-) diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index f59de292..037e380d 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -111,7 +111,7 @@ - Level2 + Level4 Disabled true true diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index a9391a40..500764ed 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -167,6 +167,11 @@ bool _mi_page_is_valid(mi_page_t* page); // Overflow detecting multiply static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { + // quick check for the case where count is one (common for C++ allocators) + if (count==1) { + *total = size; + return false; + } #if __has_builtin(__builtin_umul_overflow) || __GNUC__ >= 5 #include // UINT_MAX, ULONG_MAX #if (SIZE_MAX == UINT_MAX) diff --git a/include/mimalloc.h b/include/mimalloc.h index 29481c80..4c5b0cad 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -73,8 +73,6 @@ terms of the MIT license. A copy of the license can be found in the file #include // bool #ifdef __cplusplus -#include // true_type - extern "C" { #endif @@ -337,41 +335,33 @@ mi_decl_export void* mi_new_aligned_nothrow(size_t n, size_t alignment) mi_attr_ } #endif +// --------------------------------------------------------------------------------------------- +// Implement the C++ std::allocator interface for use in STL containers. +// (note: see `mimalloc-new-delete.h` for overriding the new/delete operators globally) +// --------------------------------------------------------------------------------------------- #ifdef __cplusplus -// ------------------------------------------------------ -// STL allocator - an extension to hook mimalloc into STL -// containers in place of std::allocator. -// ------------------------------------------------------ +#if (__cplusplus >= 201103L) || (_MSC_VER > 1900) // C++11 +#include // true_type +#endif -#pragma warning(disable: 4100) -template -struct mi_stl_allocator { +template struct mi_stl_allocator { typedef T value_type; - +#if (__cplusplus >= 201103L) || (_MSC_VER > 1900) // C++11 using propagate_on_container_copy_assignment = std::true_type; using propagate_on_container_move_assignment = std::true_type; using propagate_on_container_swap = std::true_type; using is_always_equal = std::true_type; - - mi_stl_allocator() noexcept {} - mi_stl_allocator(const mi_stl_allocator& other) noexcept {} - template - mi_stl_allocator(const mi_stl_allocator& other) noexcept {} - - T* allocate(size_t n, const void* hint = 0) { - return (T*)mi_mallocn(n, sizeof(T)); - } - - void deallocate(T* p, size_t n) { - mi_free(p); - } +#endif + mi_stl_allocator() mi_attr_noexcept {} + mi_stl_allocator(const mi_stl_allocator& other) mi_attr_noexcept { (void)other; } + template mi_stl_allocator(const mi_stl_allocator& other) mi_attr_noexcept { (void)other; } + T* allocate(size_t n, const void* hint = 0) { (void)hint; return (T*)mi_mallocn(n, sizeof(T)); } + void deallocate(T* p, size_t n) { mi_free_size(p,n); } }; -template -bool operator==(const mi_stl_allocator& lhs, const mi_stl_allocator& rhs) noexcept { return true; } -template -bool operator!=(const mi_stl_allocator& lhs, const mi_stl_allocator& rhs) noexcept { return false; } -#endif +template bool operator==(const mi_stl_allocator& lhs, const mi_stl_allocator& rhs) mi_attr_noexcept { (void)lhs; (void)rhs; return true; } +template bool operator!=(const mi_stl_allocator& lhs, const mi_stl_allocator& rhs) mi_attr_noexcept { (void)lhs; (void)rhs; return false; } +#endif // __cplusplus #endif diff --git a/src/alloc.c b/src/alloc.c index be63f86a..d66c629b 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -443,12 +443,7 @@ mi_decl_allocator void* mi_calloc(size_t count, size_t size) mi_attr_noexcept { // Uninitialized `calloc` extern mi_decl_allocator void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept { size_t total; - if (count==1) { - total = size; - } - else if (mi_mul_overflow(count, size, &total)) { - return NULL; - } + if (mi_mul_overflow(count, size, &total)) return NULL; return mi_heap_malloc(heap, total); } From a0bee081dde0ed7ef102f484c1436f9dd292522b Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 16 Jan 2020 15:58:48 -0800 Subject: [PATCH 114/179] use proper C++11 check for noexcept attribute --- ide/vs2019/mimalloc.vcxproj | 2 +- include/mimalloc.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index 037e380d..f59de292 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -111,7 +111,7 @@ - Level4 + Level2 Disabled true true diff --git a/include/mimalloc.h b/include/mimalloc.h index 4c5b0cad..59f394a7 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -15,10 +15,10 @@ terms of the MIT license. A copy of the license can be found in the file // ------------------------------------------------------ #ifdef __cplusplus - #if (__GNUC__ <= 5) || (_MSC_VER <= 1900) - #define mi_attr_noexcept throw() - #else + #if (__cplusplus >= 201103L) || (_MSC_VER > 1900) // C++11 #define mi_attr_noexcept noexcept + #else + #define mi_attr_noexcept throw() #endif #else #define mi_attr_noexcept From 3447debf26e41e25e2f18908ebeeb3b99ca93fa0 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 16 Jan 2020 16:08:23 -0800 Subject: [PATCH 115/179] add Linux gcc C++ build to azure pipeline --- azure-pipelines.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index f88b2e1a..844a4d08 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -61,6 +61,11 @@ jobs: CXX: g++ BuildType: secure cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON + Debug++: + CC: gcc + CXX: g++ + BuildType: debug + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -DMI_USE_CXX=ON Debug Clang: CC: clang CXX: clang++ From 7a98a461a333b195b8ab090484a4e4be4fcd05fb Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 16 Jan 2020 16:21:59 -0800 Subject: [PATCH 116/179] fix type of bitmap field to avoid C++ error on older gcc --- src/bitmap.inc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bitmap.inc.c b/src/bitmap.inc.c index 11ada472..c3813a44 100644 --- a/src/bitmap.inc.c +++ b/src/bitmap.inc.c @@ -113,7 +113,7 @@ static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t bitmap_f mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); mi_assert_internal(bitidx + count <= MI_BITMAP_FIELD_BITS); - mi_bitmap_field_t field = mi_atomic_read_relaxed(&bitmap[idx]); + uintptr_t field = mi_atomic_read_relaxed(&bitmap[idx]); if ((field & mask) == 0) { // free? if (mi_atomic_cas_strong(&bitmap[idx], (field|mask), field)) { // claimed! @@ -221,7 +221,7 @@ static inline bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_field const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); const uintptr_t mask = mi_bitmap_mask_(count, bitidx); mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); - mi_bitmap_field_t field = mi_atomic_read_relaxed(&bitmap[idx]); + uintptr_t field = mi_atomic_read_relaxed(&bitmap[idx]); if (any_ones != NULL) *any_ones = ((field & mask) != 0); return ((field & mask) == mask); } From da709bcf110c74356ba5f0fc3267e7ccb47bf91c Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 16 Jan 2020 16:30:44 -0800 Subject: [PATCH 117/179] test for gcc version for aligned new/delete overrides --- src/alloc-override.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/alloc-override.c b/src/alloc-override.c index 002374bb..89c5126a 100644 --- a/src/alloc-override.c +++ b/src/alloc-override.c @@ -98,7 +98,7 @@ terms of the MIT license. A copy of the license can be found in the file void operator delete[](void* p, std::size_t n) MI_FORWARD02(mi_free_size,p,n); #endif - #if (__cplusplus > 201402L || defined(__cpp_aligned_new)) + #if (__cplusplus > 201402L || defined(__cpp_aligned_new)) && (!defined(__GNUC__) || (__GNUC__ > 5)) void operator delete (void* p, std::align_val_t al) noexcept { mi_free_aligned(p, static_cast(al)); } void operator delete[](void* p, std::align_val_t al) noexcept { mi_free_aligned(p, static_cast(al)); } void operator delete (void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast(al)); }; From 3f8b8b7a55b2113f75c9eb8173841f1d9c0de676 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 16 Jan 2020 16:45:34 -0800 Subject: [PATCH 118/179] initialize thread_delayed_free field atomically --- src/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/init.c b/src/init.c index d99eeb40..b8422c2f 100644 --- a/src/init.c +++ b/src/init.c @@ -122,7 +122,7 @@ mi_heap_t _mi_heap_main = { &tld_main, MI_SMALL_PAGES_EMPTY, MI_PAGE_QUEUES_EMPTY, - NULL, + ATOMIC_VAR_INIT(NULL), 0, // thread id MI_INIT_COOKIE, // initial cookie { MI_INIT_COOKIE, MI_INIT_COOKIE }, // the key of the main heap can be fixed (unlike page keys that need to be secure!) From 5c82b459d6401dec4b92b4ae81248f35ca3fa1fb Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 16 Jan 2020 16:49:59 -0800 Subject: [PATCH 119/179] enable debug clang++ compilation in azure pipelines --- azure-pipelines.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 844a4d08..8ff0b629 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -81,6 +81,11 @@ jobs: CXX: clang++ BuildType: secure-clang cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON + Debug++ Clang: + CC: clang + CXX: clang++ + BuildType: debug-clang + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -DMI_USE_CXX=ON steps: - task: CMake@1 inputs: From f750e793ff60c6353d50ae106579d9317a17a54f Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 16 Jan 2020 16:52:44 -0800 Subject: [PATCH 120/179] ensure unique build names for C++ azure pipeline jobs --- azure-pipelines.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 8ff0b629..6c7bad96 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -64,7 +64,7 @@ jobs: Debug++: CC: gcc CXX: g++ - BuildType: debug + BuildType: debug-cxx cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -DMI_USE_CXX=ON Debug Clang: CC: clang @@ -84,7 +84,7 @@ jobs: Debug++ Clang: CC: clang CXX: clang++ - BuildType: debug-clang + BuildType: debug-clang-cxx cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -DMI_USE_CXX=ON steps: - task: CMake@1 From c6037ac8f2c92cc07248051e58e88bf3aac7cf05 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 16 Jan 2020 16:56:02 -0800 Subject: [PATCH 121/179] fix assertion index --- src/page.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/page.c b/src/page.c index 02f10238..84baf306 100644 --- a/src/page.c +++ b/src/page.c @@ -626,8 +626,8 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(page->retire_expire == 0); mi_assert_internal(!mi_page_has_aligned(page)); #if (MI_ENCODE_FREELIST) + mi_assert_internal(page->key[0] != 0); mi_assert_internal(page->key[1] != 0); - mi_assert_internal(page->key[2] != 0); #endif mi_assert_expensive(mi_page_is_valid_init(page)); From b84b11b6a90ab26d581c4a569748cef80be38276 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 16 Jan 2020 18:11:29 -0800 Subject: [PATCH 122/179] update readme --- readme.md | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/readme.md b/readme.md index 2cbd1baf..b6258cfc 100644 --- a/readme.md +++ b/readme.md @@ -56,8 +56,10 @@ Enjoy! ### Releases -* 2020-01-15, `v1.3.0`: stable release 1.3: bug fixes, improved randomness and stronger -free list encoding in secure mode. +* 2020-01-XX, `v1.4.0`: stable release 1.4: delayed OS page reset for (much) better performance + with page reset enabled, more eager concurrent free, addition of STL allocator. +* 2020-01-15, `v1.3.0`: stable release 1.3: bug fixes, improved randomness and [stronger +free list encoding](https://github.com/microsoft/mimalloc/blob/783e3377f79ee82af43a0793910a9f2d01ac7863/include/mimalloc-internal.h#L396) in secure mode. * 2019-12-22, `v1.2.2`: stable release 1.2: minor updates. * 2019-11-22, `v1.2.0`: stable release 1.2: bug fixes, improved secure mode (free list corruption checks, double free mitigation). Improved dynamic overriding on Windows. * 2019-10-07, `v1.1.0`: stable release 1.1. @@ -130,7 +132,7 @@ mimalloc uses only safe OS calls (`mmap` and `VirtualAlloc`) and can co-exist with other allocators linked to the same program. If you use `cmake`, you can simply use: ``` -find_package(mimalloc 1.0 REQUIRED) +find_package(mimalloc 1.4 REQUIRED) ``` in your `CMakeLists.txt` to find a locally installed mimalloc. Then use either: ``` @@ -144,7 +146,9 @@ to link with the static library. See `test\CMakeLists.txt` for an example. For best performance in C++ programs, it is also recommended to override the global `new` and `delete` operators. For convience, mimalloc provides -[mimalloc-new-delete.h](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) which does this for you -- just include it in a single(!) source file in your project. +[`mimalloc-new-delete.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) which does this for you -- just include it in a single(!) source file in your project. +In C++, mimalloc also provides the `mi_stl_allocator` struct which implements the `std::allocator` +interface. You can pass environment variables to print verbose messages (`MIMALLOC_VERBOSE=1`) and statistics (`MIMALLOC_SHOW_STATS=1`) (in the debug version): @@ -195,11 +199,15 @@ or via environment variables. - `MIMALLOC_SHOW_STATS=1`: show statistics when the program terminates. - `MIMALLOC_VERBOSE=1`: show verbose messages. - `MIMALLOC_SHOW_ERRORS=1`: show error and warning messages. +- `MIMALLOC_PAGE_RESET=1`: reset (or purge) OS pages when not in use. This can reduce + memory fragmentation in long running (server) programs. If performance is impacted, + `MIMALLOC_RESET_DELAY=` can be set higher (100ms by default) to make the page + reset occur less frequently. - `MIMALLOC_LARGE_OS_PAGES=1`: use large OS pages when available; for some workloads this can significantly improve performance. Use `MIMALLOC_VERBOSE` to check if the large OS pages are enabled -- usually one needs to explicitly allow large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). However, sometimes the OS is very slow to reserve contiguous physical memory for large OS pages so use with care on systems that - can have fragmented memory. + can have fragmented memory (for that reason, we generally recommend to use `MIMALLOC_RESERVE_HUGE_OS_PAGES` instead when possible). - `MIMALLOC_EAGER_REGION_COMMIT=1`: on Windows, commit large (256MiB) regions eagerly. On Windows, these regions show in the working set even though usually just a small part is committed to physical memory. This is why it turned off by default on Windows as it looks not good in the task manager. However, in reality it is always better @@ -207,11 +215,16 @@ or via environment variables. - `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where N is the number of 1GiB huge OS pages. This reserves the huge pages at startup and can give quite a performance improvement on long running workloads. Usually it is better to not use `MIMALLOC_LARGE_OS_PAGES` in combination with this setting. Just like large OS pages, use with care as reserving - contiguous physical memory can take a long time when memory is fragmented. Still experimental. + contiguous physical memory can take a long time when memory is fragmented. + Note that we usually need to explicitly enable huge OS pages (as on [Windows][windows-huge] and [Linux][linux-huge])). With huge OS pages, it may be beneficial to set the setting + `MIMALLOC_EAGER_COMMIT_DELAY=N` (with usually `N` as 1) to delay the initial `N` segments + of a thread to not allocate in the huge OS pages; this prevents threads that are short lived + and allocate just a little to take up space in the huge OS page area (which cannot be reset). [linux-huge]: https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/5/html/tuning_and_optimizing_red_hat_enterprise_linux_for_oracle_9i_and_10g_databases/sect-oracle_9i_and_10g_tuning_guide-large_memory_optimization_big_pages_and_huge_pages-configuring_huge_pages_in_red_hat_enterprise_linux_4_or_5 [windows-huge]: https://docs.microsoft.com/en-us/sql/database-engine/configure-windows/enable-the-lock-pages-in-memory-option-windows?view=sql-server-2017 + # Overriding Malloc Overriding the standard `malloc` can be done either _dynamically_ or _statically_. @@ -251,13 +264,13 @@ resolved to the _mimalloc_ library. Note that certain security restrictions may apply when doing this from the [shell](https://stackoverflow.com/questions/43941322/dyld-insert-libraries-ignored-when-calling-application-through-bash). -Note: unfortunately, at this time, dynamic overriding on macOS seems broken but it is actively worked on to fix this -(see issue [`#50`](https://github.com/microsoft/mimalloc/issues/50)). +Note: unfortunately, at this time, dynamic overriding on macOS seems broken but it is +actively worked on to fix this (see issue [`#50`](https://github.com/microsoft/mimalloc/issues/50)). ### Windows -On Windows you need to link your program explicitly with the mimalloc -DLL and use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch). +Overriding on Windows is robust but requires that you link your program explicitly with +the mimalloc DLL and use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch). Moreover, you need to ensure the `mimalloc-redirect.dll` (or `mimalloc-redirect32.dll`) is available in the same folder as the main `mimalloc-override.dll` at runtime (as it is a dependency). The redirection DLL ensures that all calls to the C runtime malloc API get redirected to @@ -267,8 +280,8 @@ To ensure the mimalloc DLL is loaded at run-time it is easiest to insert some call to the mimalloc API in the `main` function, like `mi_version()` (or use the `/INCLUDE:mi_version` switch on the linker). See the `mimalloc-override-test` project for an example on how to use this. For best performance on Windows with C++, it -is highly recommended to also override the `new`/`delete` operations (as described -in the introduction). +is highly recommended to also override the `new`/`delete` operations (by including +[`mimalloc-new-delete.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) a single(!) source file in your project). The environment variable `MIMALLOC_DISABLE_REDIRECT=1` can be used to disable dynamic overriding at run-time. Use `MIMALLOC_VERBOSE=1` to check if mimalloc was successfully redirected. From 6b7356a10aad3ff839689fbc2e50e11512d910b5 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 16 Jan 2020 18:12:01 -0800 Subject: [PATCH 123/179] make mimalloc compile with the highest warning level on msvc --- ide/vs2019/mimalloc.vcxproj | 2 +- include/mimalloc-internal.h | 1 + src/memory.c | 22 +++++++++++----------- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index f59de292..037e380d 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -111,7 +111,7 @@ - Level2 + Level4 Disabled true true diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 500764ed..f039fc50 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -21,6 +21,7 @@ terms of the MIT license. A copy of the license can be found in the file #endif #if defined(_MSC_VER) +#pragma warning(disable:4127) // constant conditional due to MI_SECURE paths #define mi_decl_noinline __declspec(noinline) #define mi_attr_noreturn #elif defined(__GNUC__) || defined(__clang__) diff --git a/src/memory.c b/src/memory.c index ee84f755..9603a26f 100644 --- a/src/memory.c +++ b/src/memory.c @@ -80,7 +80,7 @@ typedef union mi_region_info_u { bool valid; bool is_large; short numa_node; - }; + } x; } mi_region_info_t; @@ -204,9 +204,9 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, // and share it mi_region_info_t info; - info.valid = true; - info.is_large = region_large; - info.numa_node = _mi_os_numa_node(tld); + info.x.valid = true; + info.x.is_large = region_large; + info.x.numa_node = (short)_mi_os_numa_node(tld); mi_atomic_write(&r->info, info.value); // now make it available to others *region = r; return true; @@ -224,12 +224,12 @@ static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, boo // numa correct if (numa_node >= 0) { // use negative numa node to always succeed - int rnode = info.numa_node; + int rnode = info.x.numa_node; if (rnode >= 0 && rnode != numa_node) return false; } // check allow-large - if (!allow_large && info.is_large) return false; + if (!allow_large && info.x.is_large) return false; return true; } @@ -278,11 +278,11 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo mi_region_info_t info; info.value = mi_atomic_read(®ion->info); void* start = mi_atomic_read_ptr(®ion->start); - mi_assert_internal(!(info.is_large && !*is_large)); + mi_assert_internal(!(info.x.is_large && !*is_large)); mi_assert_internal(start != NULL); *is_zero = mi_bitmap_unclaim(®ion->dirty, 1, blocks, bit_idx); - *is_large = info.is_large; + *is_large = info.x.is_large; *memid = mi_memid_create(region, bit_idx); void* p = (uint8_t*)start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE); @@ -292,7 +292,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo bool any_uncommitted; mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, &any_uncommitted); if (any_uncommitted) { - mi_assert_internal(!info.is_large); + mi_assert_internal(!info.x.is_large); bool commit_zero; _mi_mem_commit(p, blocks * MI_SEGMENT_SIZE, &commit_zero, tld); if (commit_zero) *is_zero = true; @@ -307,7 +307,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo // unreset reset blocks if (mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)) { // some blocks are still reset - mi_assert_internal(!info.is_large); + mi_assert_internal(!info.x.is_large); mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit || mi_option_get(mi_option_eager_commit_delay) > 0); mi_bitmap_unclaim(®ion->reset, 1, blocks, bit_idx); if (*commit || !mi_option_is_enabled(mi_option_reset_decommits)) { // only if needed @@ -412,7 +412,7 @@ void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_re } // reset the blocks to reduce the working set. - if (!info.is_large && mi_option_is_enabled(mi_option_segment_reset) + if (!info.x.is_large && mi_option_is_enabled(mi_option_segment_reset) && (mi_option_is_enabled(mi_option_eager_commit) || mi_option_is_enabled(mi_option_reset_decommits))) // cannot reset halfway committed segments, use only `option_page_reset` instead { From dc94d25890e965fb317ee15f4bca6e7b30b8898f Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 16 Jan 2020 19:39:14 -0800 Subject: [PATCH 124/179] update documentation --- doc/mimalloc-doc.h | 128 +++++-- docs/annotated.html | 2 +- docs/bench.html | 2 +- docs/build.html | 4 +- docs/classes.html | 2 +- docs/environment.html | 127 +++++++ docs/functions.html | 2 +- docs/functions_vars.html | 2 +- docs/group__aligned.html | 2 +- docs/group__analysis.html | 2 +- docs/group__extended.html | 349 ++++++++++++------ docs/group__extended.js | 16 +- docs/group__heap.html | 2 +- docs/group__malloc.html | 2 +- docs/group__options.html | 21 +- docs/group__options.js | 5 +- docs/group__posix.html | 2 +- docs/group__typed.html | 2 +- docs/group__zeroinit.html | 597 +++++++++++++++++++++++++++++++ docs/group__zeroinit.js | 14 + docs/index.html | 4 +- docs/mimalloc-doc_8h_source.html | 68 ++-- docs/modules.html | 2 +- docs/navtreeindex0.js | 41 ++- docs/overrides.html | 10 +- docs/pages.html | 2 +- docs/search/all_6.js | 18 +- docs/search/all_c.html | 30 ++ docs/search/all_c.js | 4 + docs/search/all_d.html | 30 ++ docs/search/all_d.js | 4 + docs/search/enumvalues_1.js | 3 +- docs/search/functions_0.js | 11 +- docs/search/functions_1.html | 30 ++ docs/search/functions_1.js | 4 + docs/search/groups_7.html | 30 ++ docs/search/groups_7.js | 4 + docs/search/pages_4.html | 30 ++ docs/search/pages_4.js | 4 + docs/search/typedefs_0.js | 4 +- docs/search/typedefs_1.html | 30 ++ docs/search/typedefs_1.js | 4 + docs/search/typedefs_2.html | 30 ++ docs/search/typedefs_2.js | 5 + docs/using.html | 8 +- 45 files changed, 1437 insertions(+), 256 deletions(-) create mode 100644 docs/environment.html create mode 100644 docs/group__zeroinit.html create mode 100644 docs/group__zeroinit.js create mode 100644 docs/search/all_c.html create mode 100644 docs/search/all_c.js create mode 100644 docs/search/all_d.html create mode 100644 docs/search/all_d.js create mode 100644 docs/search/functions_1.html create mode 100644 docs/search/functions_1.js create mode 100644 docs/search/groups_7.html create mode 100644 docs/search/groups_7.js create mode 100644 docs/search/pages_4.html create mode 100644 docs/search/pages_4.js create mode 100644 docs/search/typedefs_1.html create mode 100644 docs/search/typedefs_1.js create mode 100644 docs/search/typedefs_2.html create mode 100644 docs/search/typedefs_2.js diff --git a/doc/mimalloc-doc.h b/doc/mimalloc-doc.h index 5ad5a1e6..71cc1589 100644 --- a/doc/mimalloc-doc.h +++ b/doc/mimalloc-doc.h @@ -26,7 +26,7 @@ without code changes, for example, on Unix you can use it as: Notable aspects of the design include: -- __small and consistent__: the library is less than 3500 LOC using simple and +- __small and consistent__: the library is less than 6k LOC using simple and consistent data structures. This makes it very suitable to integrate and adapt in other projects. For runtime systems it provides hooks for a monotonic _heartbeat_ and deferred freeing (for @@ -297,10 +297,17 @@ size_t mi_good_size(size_t size); void mi_collect(bool force); /// Print the main statistics. -/// @param out Output function. Use \a NULL for outputting to \a stderr. +/// @param out Ignored, outputs to the registered output function or stderr by default. /// /// Most detailed when using a debug build. -void mi_stats_print(mi_output_fun* out); +void mi_stats_print(void* out); + +/// Print the main statistics. +/// @param out An output function or \a NULL for the default. +/// @param arg Optional argument passed to \a out (if not \a NULL) +/// +/// Most detailed when using a debug build. +void mi_stats_print(mi_output_fun* out, void* arg); /// Reset statistics. void mi_stats_reset(void); @@ -320,20 +327,23 @@ void mi_thread_init(void); void mi_thread_done(void); /// Print out heap statistics for this thread. -/// @param out Output function. Use \a NULL for outputting to \a stderr. +/// @param out An output function or \a NULL for the default. +/// @param arg Optional argument passed to \a out (if not \a NULL) /// /// Most detailed when using a debug build. -void mi_thread_stats_print(mi_output_fun* out); +void mi_thread_stats_print_out(mi_output_fun* out, void* arg); /// Type of deferred free functions. /// @param force If \a true all outstanding items should be freed. /// @param heartbeat A monotonically increasing count. +/// @param arg Argument that was passed at registration to hold extra state. /// /// @see mi_register_deferred_free -typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat); +typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat, void* arg); /// Register a deferred free function. /// @param deferred_free Address of a deferred free-ing function or \a NULL to unregister. +/// @param arg Argument that will be passed on to the deferred free function. /// /// Some runtime systems use deferred free-ing, for example when using /// reference counting to limit the worst case free time. @@ -346,20 +356,22 @@ typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat); /// to be called deterministically after some number of allocations /// (regardless of freeing or available free memory). /// At most one \a deferred_free function can be active. -void mi_register_deferred_free(mi_deferred_free_fun* deferred_free); +void mi_register_deferred_free(mi_deferred_free_fun* deferred_free, void* arg); /// Type of output functions. /// @param msg Message to output. +/// @param arg Argument that was passed at registration to hold extra state. /// /// @see mi_register_output() -typedef void (mi_output_fun)(const char* msg); +typedef void (mi_output_fun)(const char* msg, void* arg); /// Register an output function. -/// @param out The output function, use `NULL` to output to stdout. +/// @param out The output function, use `NULL` to output to stderr. +/// @param arg Argument that will be passed on to the output function. /// /// The `out` function is called to output any information from mimalloc, /// like verbose or warning messages. -void mi_register_output(mi_output_fun* out) mi_attr_noexcept; +void mi_register_output(mi_output_fun* out, void* arg); /// Is a pointer part of our heap? /// @param p The pointer to check. @@ -367,18 +379,35 @@ void mi_register_output(mi_output_fun* out) mi_attr_noexcept; /// This function is relatively fast. bool mi_is_in_heap_region(const void* p); -/// Reserve \a pages of huge OS pages (1GiB) but stops after at most `max_secs` seconds. + +/// Reserve \a pages of huge OS pages (1GiB) evenly divided over \a numa_nodes nodes, +/// but stops after at most `timeout_msecs` seconds. /// @param pages The number of 1GiB pages to reserve. -/// @param max_secs Maximum number of seconds to try reserving. -/// @param pages_reserved If not \a NULL, it is set to the actual number of pages that were reserved. +/// @param numa_nodes The number of nodes do evenly divide the pages over, or 0 for using the actual number of NUMA nodes. +/// @param timeout_msecs Maximum number of milli-seconds to try reserving, or 0 for no timeout. /// @returns 0 if successfull, \a ENOMEM if running out of memory, or \a ETIMEDOUT if timed out. /// /// The reserved memory is used by mimalloc to satisfy allocations. -/// May quit before \a max_secs are expired if it estimates it will take more than -/// 1.5 times \a max_secs. The time limit is needed because on some operating systems +/// May quit before \a timeout_msecs are expired if it estimates it will take more than +/// 1.5 times \a timeout_msecs. The time limit is needed because on some operating systems /// it can take a long time to reserve contiguous memory if the physical memory is /// fragmented. -int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved); +int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs); + +/// Reserve \a pages of huge OS pages (1GiB) at a specific \a numa_node, +/// but stops after at most `timeout_msecs` seconds. +/// @param pages The number of 1GiB pages to reserve. +/// @param numa_node The NUMA node where the memory is reserved (start at 0). +/// @param timeout_msecs Maximum number of milli-seconds to try reserving, or 0 for no timeout. +/// @returns 0 if successfull, \a ENOMEM if running out of memory, or \a ETIMEDOUT if timed out. +/// +/// The reserved memory is used by mimalloc to satisfy allocations. +/// May quit before \a timeout_msecs are expired if it estimates it will take more than +/// 1.5 times \a timeout_msecs. The time limit is needed because on some operating systems +/// it can take a long time to reserve contiguous memory if the physical memory is +/// fragmented. +int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs); + /// Is the C runtime \a malloc API redirected? /// @returns \a true if all malloc API calls are redirected to mimalloc. @@ -702,13 +731,14 @@ typedef enum mi_option_e { mi_option_eager_region_commit, ///< Eagerly commit large (256MiB) memory regions (enabled by default, except on Windows) mi_option_large_os_pages, ///< Use large OS pages (2MiB in size) if possible mi_option_reserve_huge_os_pages, ///< The number of huge OS pages (1GiB in size) to reserve at the start of the program. - mi_option_segment_cache, ///< The number of segments per thread to keep cached. - mi_option_page_reset, ///< Reset page memory when it becomes free. - mi_option_cache_reset, ///< Reset segment memory when a segment is cached. + mi_option_segment_cache, ///< The number of segments per thread to keep cached. + mi_option_page_reset, ///< Reset page memory after \a mi_option_reset_delay milliseconds when it becomes free. + mi_option_segment_reset, ///< Experimental + mi_option_reset_delay, ///< Delay in milli-seconds before resetting a page (100ms by default) + mi_option_use_numa_nodes, ///< Pretend there are at most N NUMA nodes mi_option_reset_decommits, ///< Experimental mi_option_eager_commit_delay, ///< Experimental - mi_option_segment_reset, ///< Experimental - mi_option_os_tag, ///< OS tag to assign to mimalloc'd memory + mi_option_os_tag, ///< OS tag to assign to mimalloc'd memory _mi_option_last } mi_option_t; @@ -774,7 +804,7 @@ git clone https://github.com/microsoft/mimalloc ## Windows -Open `ide/vs2017/mimalloc.sln` in Visual Studio 2017 and build. +Open `ide/vs2019/mimalloc.sln` in Visual Studio 2019 and build (or `ide/vs2017/mimalloc.sln`). The `mimalloc` project builds a static library (in `out/msvc-x64`), while the `mimalloc-override` project builds a DLL for overriding malloc in the entire program. @@ -826,6 +856,7 @@ Notes: /*! \page using Using the library +### Build The preferred usage is including ``, linking with the shared- or static library, and using the `mi_malloc` API exclusively for allocation. For example, @@ -849,6 +880,19 @@ target_link_libraries(myapp PUBLIC mimalloc-static) ``` to link with the static library. See `test\CMakeLists.txt` for an example. +### C++ +For best performance in C++ programs, it is also recommended to override the +global `new` and `delete` operators. For convience, mimalloc provides +[`mimalloc-new-delete.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) which does this for you -- just include it in a single(!) source file in your project. + +In C++, mimalloc also provides the `mi_stl_allocator` struct which implements the `std::allocator` +interface. For example: +``` +std::vector> vec; +vec.push_back(some_struct()); +``` + +### Statistics You can pass environment variables to print verbose messages (`MIMALLOC_VERBOSE=1`) and statistics (`MIMALLOC_SHOW_STATS=1`) (in the debug version): @@ -897,20 +941,33 @@ See \ref overrides for more info. /*! \page environment Environment Options -You can set further options either programmatically -(using [`mi_option_set`](https://microsoft.github.io/mimalloc/group__options.html)), +You can set further options either programmatically (using [`mi_option_set`](https://microsoft.github.io/mimalloc/group__options.html)), or via environment variables. - `MIMALLOC_SHOW_STATS=1`: show statistics when the program terminates. - `MIMALLOC_VERBOSE=1`: show verbose messages. - `MIMALLOC_SHOW_ERRORS=1`: show error and warning messages. +- `MIMALLOC_PAGE_RESET=1`: reset (or purge) OS pages when not in use. This can reduce + memory fragmentation in long running (server) programs. If performance is impacted, + `MIMALLOC_RESET_DELAY=`_msecs_ can be set higher (100ms by default) to make the page + reset occur less frequently. - `MIMALLOC_LARGE_OS_PAGES=1`: use large OS pages when available; for some workloads this can significantly improve performance. Use `MIMALLOC_VERBOSE` to check if the large OS pages are enabled -- usually one needs - to explicitly allow large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). + to explicitly allow large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). However, sometimes + the OS is very slow to reserve contiguous physical memory for large OS pages so use with care on systems that + can have fragmented memory (for that reason, we generally recommend to use `MIMALLOC_RESERVE_HUGE_OS_PAGES` instead when possible). - `MIMALLOC_EAGER_REGION_COMMIT=1`: on Windows, commit large (256MiB) regions eagerly. On Windows, these regions show in the working set even though usually just a small part is committed to physical memory. This is why it turned off by default on Windows as it looks not good in the task manager. However, in reality it is always better to turn it on as it improves performance and has no other drawbacks. +- `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where N is the number of 1GiB huge OS pages. This reserves the huge pages at + startup and can give quite a performance improvement on long running workloads. Usually it is better to not use + `MIMALLOC_LARGE_OS_PAGES` in combination with this setting. Just like large OS pages, use with care as reserving + contiguous physical memory can take a long time when memory is fragmented. + Note that we usually need to explicitly enable huge OS pages (as on [Windows][windows-huge] and [Linux][linux-huge])). With huge OS pages, it may be beneficial to set the setting + `MIMALLOC_EAGER_COMMIT_DELAY=N` (with usually `N` as 1) to delay the initial `N` segments + of a thread to not allocate in the huge OS pages; this prevents threads that are short lived + and allocate just a little to take up space in the huge OS page area (which cannot be reset). [linux-huge]: https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/5/html/tuning_and_optimizing_red_hat_enterprise_linux_for_oracle_9i_and_10g_databases/sect-oracle_9i_and_10g_tuning_guide-large_memory_optimization_big_pages_and_huge_pages-configuring_huge_pages_in_red_hat_enterprise_linux_4_or_5 [windows-huge]: https://docs.microsoft.com/en-us/sql/database-engine/configure-windows/enable-the-lock-pages-in-memory-option-windows?view=sql-server-2017 @@ -960,25 +1017,28 @@ Note: unfortunately, at this time, dynamic overriding on macOS seems broken but ### Windows -On Windows you need to link your program explicitly with the mimalloc -DLL and use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch). +Overriding on Windows is robust but requires that you link your program explicitly with +the mimalloc DLL and use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch). Moreover, you need to ensure the `mimalloc-redirect.dll` (or `mimalloc-redirect32.dll`) is available -in the same folder as the mimalloc DLL at runtime (as it as referred to by the mimalloc DLL). -The redirection DLL's ensure all calls to the C runtime malloc API get redirected to mimalloc. +in the same folder as the main `mimalloc-override.dll` at runtime (as it is a dependency). +The redirection DLL ensures that all calls to the C runtime malloc API get redirected to +mimalloc (in `mimalloc-override.dll`). To ensure the mimalloc DLL is loaded at run-time it is easiest to insert some call to the mimalloc API in the `main` function, like `mi_version()` (or use the `/INCLUDE:mi_version` switch on the linker). See the `mimalloc-override-test` project -for an example on how to use this. +for an example on how to use this. For best performance on Windows with C++, it +is highly recommended to also override the `new`/`delete` operations (by including +[`mimalloc-new-delete.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) a single(!) source file in your project). The environment variable `MIMALLOC_DISABLE_REDIRECT=1` can be used to disable dynamic -overriding at run-time. Use `MIMALLOC_VERBOSE=1` to check if mimalloc successfully redirected. +overriding at run-time. Use `MIMALLOC_VERBOSE=1` to check if mimalloc was successfully redirected. -(Note: in principle, it should be possible to patch existing executables -that are linked with the dynamic C runtime (`ucrtbase.dll`) by just putting the mimalloc DLL into -the import table (and putting `mimalloc-redirect.dll` in the same folder) +(Note: in principle, it is possible to patch existing executables +that are linked with the dynamic C runtime (`ucrtbase.dll`) by just putting the `mimalloc-override.dll` into the import table (and putting `mimalloc-redirect.dll` in the same folder) Such patching can be done for example with [CFF Explorer](https://ntcore.com/?page_id=388)). + ## Static override On Unix systems, you can also statically link with _mimalloc_ to override the standard diff --git a/docs/annotated.html b/docs/annotated.html index dcc2e74d..4d2a8bcc 100644 --- a/docs/annotated.html +++ b/docs/annotated.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
diff --git a/docs/bench.html b/docs/bench.html index 11b18550..6b289c04 100644 --- a/docs/bench.html +++ b/docs/bench.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
diff --git a/docs/build.html b/docs/build.html index 3e870697..755aad88 100644 --- a/docs/build.html +++ b/docs/build.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
@@ -103,7 +103,7 @@ $(document).ready(function(){initNavTree('build.html','');});

Checkout the sources from Github:

git clone https://github.com/microsoft/mimalloc

Windows

-

Open ide/vs2017/mimalloc.sln in Visual Studio 2017 and build. The mimalloc project builds a static library (in out/msvc-x64), while the mimalloc-override project builds a DLL for overriding malloc in the entire program.

+

Open ide/vs2019/mimalloc.sln in Visual Studio 2019 and build (or ide/vs2017/mimalloc.sln). The mimalloc project builds a static library (in out/msvc-x64), while the mimalloc-override project builds a DLL for overriding malloc in the entire program.

macOS, Linux, BSD, etc.

We use cmake1 as the build system:

> mkdir -p out/release
> cd out/release
> cmake ../..
> make

This builds the library as a shared (dynamic) library (.so or .dylib), a static library (.a), and as a single object file (.o).

diff --git a/docs/classes.html b/docs/classes.html index 760b28de..e5ea3ea8 100644 --- a/docs/classes.html +++ b/docs/classes.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
diff --git a/docs/environment.html b/docs/environment.html new file mode 100644 index 00000000..1063654e --- /dev/null +++ b/docs/environment.html @@ -0,0 +1,127 @@ + + + + + + + +mi-malloc: Environment Options + + + + + + + + + + + + + + + + +
+
+ + + + + + + + +
+
mi-malloc +  1.4 +
+
+ + + + + + +
+
+
+ + + +
+
+ +
+
+
+ +
+ +
+
+ + +
+ +
+ +
+
+
Environment Options
+
+
+

You can set further options either programmatically (using mi_option_set), or via environment variables.

+
    +
  • MIMALLOC_SHOW_STATS=1: show statistics when the program terminates.
  • +
  • MIMALLOC_VERBOSE=1: show verbose messages.
  • +
  • MIMALLOC_SHOW_ERRORS=1: show error and warning messages.
  • +
  • MIMALLOC_PAGE_RESET=1: reset (or purge) OS pages when not in use. This can reduce memory fragmentation in long running (server) programs. If performance is impacted, MIMALLOC_RESET_DELAY=_msecs_ can be set higher (100ms by default) to make the page reset occur less frequently.
  • +
  • MIMALLOC_LARGE_OS_PAGES=1: use large OS pages when available; for some workloads this can significantly improve performance. Use MIMALLOC_VERBOSE to check if the large OS pages are enabled – usually one needs to explicitly allow large OS pages (as on Windows and Linux). However, sometimes the OS is very slow to reserve contiguous physical memory for large OS pages so use with care on systems that can have fragmented memory (for that reason, we generally recommend to use MIMALLOC_RESERVE_HUGE_OS_PAGES instead when possible).
  • +
  • MIMALLOC_EAGER_REGION_COMMIT=1: on Windows, commit large (256MiB) regions eagerly. On Windows, these regions show in the working set even though usually just a small part is committed to physical memory. This is why it turned off by default on Windows as it looks not good in the task manager. However, in reality it is always better to turn it on as it improves performance and has no other drawbacks.
  • +
  • MIMALLOC_RESERVE_HUGE_OS_PAGES=N: where N is the number of 1GiB huge OS pages. This reserves the huge pages at startup and can give quite a performance improvement on long running workloads. Usually it is better to not use MIMALLOC_LARGE_OS_PAGES in combination with this setting. Just like large OS pages, use with care as reserving contiguous physical memory can take a long time when memory is fragmented. Note that we usually need to explicitly enable huge OS pages (as on Windows and Linux)). With huge OS pages, it may be beneficial to set the setting MIMALLOC_EAGER_COMMIT_DELAY=N (with usually N as 1) to delay the initial N segments of a thread to not allocate in the huge OS pages; this prevents threads that are short lived and allocate just a little to take up space in the huge OS page area (which cannot be reset).
  • +
+
+
+
+ + + + diff --git a/docs/functions.html b/docs/functions.html index d2615a17..43e116ed 100644 --- a/docs/functions.html +++ b/docs/functions.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
diff --git a/docs/functions_vars.html b/docs/functions_vars.html index b824832f..060a18d2 100644 --- a/docs/functions_vars.html +++ b/docs/functions_vars.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
diff --git a/docs/group__aligned.html b/docs/group__aligned.html index 4980b45a..88c10eb4 100644 --- a/docs/group__aligned.html +++ b/docs/group__aligned.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
diff --git a/docs/group__analysis.html b/docs/group__analysis.html index 3301fdef..b8d644aa 100644 --- a/docs/group__analysis.html +++ b/docs/group__analysis.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
diff --git a/docs/group__extended.html b/docs/group__extended.html index 4d07f38d..85ea3624 100644 --- a/docs/group__extended.html +++ b/docs/group__extended.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
@@ -118,12 +118,12 @@ Macros - - - - - - + + + + + +

Typedefs

typedef void() mi_deferred_free_fun(bool force, unsigned long long heartbeat)
 Type of deferred free functions. More...
 
typedef void() mi_output_fun(const char *msg)
 Type of output functions. More...
 
typedef void() mi_deferred_free_fun(bool force, unsigned long long heartbeat, void *arg)
 Type of deferred free functions. More...
 
typedef void() mi_output_fun(const char *msg, void *arg)
 Type of output functions. More...
 
@@ -142,9 +142,12 @@ Functions - - - + + + + + + @@ -157,21 +160,24 @@ Functions - - - - - - - - - + + + + + + + + + - - - + + + + + + @@ -196,14 +202,14 @@ Functions

Typedef Documentation

- -

◆ mi_deferred_free_fun

+ +

◆ mi_deferred_free_fun

Functions

void mi_collect (bool force)
 Eagerly free memory. More...
 
void mi_stats_print (mi_output_fun *out)
 Print the main statistics. More...
 
void mi_stats_print (void *out)
 Print the main statistics. More...
 
void mi_stats_print (mi_output_fun *out, void *arg)
 Print the main statistics. More...
 
void mi_stats_reset (void)
 Reset statistics. More...
 
void mi_thread_done (void)
 Uninitialize mimalloc on a thread. More...
 
void mi_thread_stats_print (mi_output_fun *out)
 Print out heap statistics for this thread. More...
 
void mi_register_deferred_free (mi_deferred_free_fun *deferred_free)
 Register a deferred free function. More...
 
void mi_register_output (mi_output_fun *out) mi_attr_noexcept
 Register an output function. More...
 
void mi_thread_stats_print_out (mi_output_fun *out, void *arg)
 Print out heap statistics for this thread. More...
 
void mi_register_deferred_free (mi_deferred_free_fun *deferred_free, void *arg)
 Register a deferred free function. More...
 
void mi_register_output (mi_output_fun *out, void *arg)
 Register an output function. More...
 
bool mi_is_in_heap_region (const void *p)
 Is a pointer part of our heap? More...
 
int mi_reserve_huge_os_pages (size_t pages, double max_secs, size_t *pages_reserved)
 Reserve pages of huge OS pages (1GiB) but stops after at most max_secs seconds. More...
 
int mi_reserve_huge_os_pages_interleave (size_t pages, size_t numa_nodes, size_t timeout_msecs)
 Reserve pages of huge OS pages (1GiB) evenly divided over numa_nodes nodes, but stops after at most timeout_msecs seconds. More...
 
int mi_reserve_huge_os_pages_at (size_t pages, int numa_node, size_t timeout_msecs)
 Reserve pages of huge OS pages (1GiB) at a specific numa_node, but stops after at most timeout_msecs seconds. More...
 
bool mi_is_redirected ()
 Is the C runtime malloc API redirected? More...
 
- +
typedef void() mi_deferred_free_fun(bool force, unsigned long long heartbeat)typedef void() mi_deferred_free_fun(bool force, unsigned long long heartbeat, void *arg)
@@ -212,22 +218,23 @@ Functions
Parameters
- + +
forceIf true all outstanding items should be freed.
heartbeatA monotonically increasing count.
heartbeatA monotonically increasing count.
argArgument that was passed at registration to hold extra state.
-
See also
mi_register_deferred_free
+
See also
mi_register_deferred_free
- -

◆ mi_output_fun

+ +

◆ mi_output_fun

- +
typedef void() mi_output_fun(const char *msg)typedef void() mi_output_fun(const char *msg, void *arg)
@@ -235,11 +242,12 @@ Functions

Type of output functions.

Parameters
- + +
msgMessage to output.
msgMessage to output.
argArgument that was passed at registration to hold extra state.
-
See also
mi_register_output()
+
See also
mi_register_output()
@@ -375,8 +383,8 @@ Functions
- -

◆ mi_register_deferred_free()

+ +

◆ mi_register_deferred_free()

@@ -384,74 +392,14 @@ Functions void mi_register_deferred_free ( - mi_deferred_free_fun *  - deferred_free) - - - -
- -

Register a deferred free function.

-
Parameters
- - -
deferred_freeAddress of a deferred free-ing function or NULL to unregister.
-
-
-

Some runtime systems use deferred free-ing, for example when using reference counting to limit the worst case free time. Such systems can register (re-entrant) deferred free function to free more memory on demand. When the force parameter is true all possible memory should be freed. The per-thread heartbeat parameter is monotonically increasing and guaranteed to be deterministic if the program allocates deterministically. The deferred_free function is guaranteed to be called deterministically after some number of allocations (regardless of freeing or available free memory). At most one deferred_free function can be active.

- -
-
- -

◆ mi_register_output()

- -
-
- - - - - - - - -
void mi_register_output (mi_output_funout)
-
- -

Register an output function.

-
Parameters
- - -
outThe output function, use NULL to output to stdout.
-
-
-

The out function is called to output any information from mimalloc, like verbose or warning messages.

- -
-
- -

◆ mi_reserve_huge_os_pages()

- -
-
- - - - - - + + - - - - - - - - + + @@ -461,17 +409,145 @@ Functions
int mi_reserve_huge_os_pages (size_t pages, mi_deferred_free_fundeferred_free,
double max_secs,
size_t * pages_reserved void * arg 
-

Reserve pages of huge OS pages (1GiB) but stops after at most max_secs seconds.

+

Register a deferred free function.

+
Parameters
+ + + +
deferred_freeAddress of a deferred free-ing function or NULL to unregister.
argArgument that will be passed on to the deferred free function.
+
+
+

Some runtime systems use deferred free-ing, for example when using reference counting to limit the worst case free time. Such systems can register (re-entrant) deferred free function to free more memory on demand. When the force parameter is true all possible memory should be freed. The per-thread heartbeat parameter is monotonically increasing and guaranteed to be deterministic if the program allocates deterministically. The deferred_free function is guaranteed to be called deterministically after some number of allocations (regardless of freeing or available free memory). At most one deferred_free function can be active.

+ +
+
+ +

◆ mi_register_output()

+ +
+
+ + + + + + + + + + + + + + + + + + +
void mi_register_output (mi_output_funout,
void * arg 
)
+
+ +

Register an output function.

+
Parameters
+ + + +
outThe output function, use NULL to output to stderr.
argArgument that will be passed on to the output function.
+
+
+

The out function is called to output any information from mimalloc, like verbose or warning messages.

+ +
+
+ +

◆ mi_reserve_huge_os_pages_at()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + +
int mi_reserve_huge_os_pages_at (size_t pages,
int numa_node,
size_t timeout_msecs 
)
+
+ +

Reserve pages of huge OS pages (1GiB) at a specific numa_node, but stops after at most timeout_msecs seconds.

Parameters
- - + +
pagesThe number of 1GiB pages to reserve.
max_secsMaximum number of seconds to try reserving.
pages_reservedIf not NULL, it is set to the actual number of pages that were reserved.
numa_nodeThe NUMA node where the memory is reserved (start at 0).
timeout_msecsMaximum number of milli-seconds to try reserving, or 0 for no timeout.
Returns
0 if successfull, ENOMEM if running out of memory, or ETIMEDOUT if timed out.
-

The reserved memory is used by mimalloc to satisfy allocations. May quit before max_secs are expired if it estimates it will take more than 1.5 times max_secs. The time limit is needed because on some operating systems it can take a long time to reserve contiguous memory if the physical memory is fragmented.

+

The reserved memory is used by mimalloc to satisfy allocations. May quit before timeout_msecs are expired if it estimates it will take more than 1.5 times timeout_msecs. The time limit is needed because on some operating systems it can take a long time to reserve contiguous memory if the physical memory is fragmented.

+ +
+
+ +

◆ mi_reserve_huge_os_pages_interleave()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + +
int mi_reserve_huge_os_pages_interleave (size_t pages,
size_t numa_nodes,
size_t timeout_msecs 
)
+
+ +

Reserve pages of huge OS pages (1GiB) evenly divided over numa_nodes nodes, but stops after at most timeout_msecs seconds.

+
Parameters
+ + + + +
pagesThe number of 1GiB pages to reserve.
numa_nodesThe number of nodes do evenly divide the pages over, or 0 for using the actual number of NUMA nodes.
timeout_msecsMaximum number of milli-seconds to try reserving, or 0 for no timeout.
+
+
+
Returns
0 if successfull, ENOMEM if running out of memory, or ETIMEDOUT if timed out.
+

The reserved memory is used by mimalloc to satisfy allocations. May quit before timeout_msecs are expired if it estimates it will take more than 1.5 times timeout_msecs. The time limit is needed because on some operating systems it can take a long time to reserve contiguous memory if the physical memory is fragmented.

@@ -495,8 +571,8 @@ Functions
- -

◆ mi_stats_print()

+ +

◆ mi_stats_print() [1/2]

@@ -504,7 +580,7 @@ Functions void mi_stats_print ( - mi_output_fun *  + void *  out) @@ -514,7 +590,45 @@ Functions

Print the main statistics.

Parameters
- + +
outOutput function. Use NULL for outputting to stderr.
outIgnored, outputs to the registered output function or stderr by default.
+
+
+

Most detailed when using a debug build.

+ +
+
+ +

◆ mi_stats_print() [2/2]

+ +
+
+ + + + + + + + + + + + + + + + + + +
void mi_stats_print (mi_output_funout,
void * arg 
)
+
+ +

Print the main statistics.

+
Parameters
+ + +
outAn output function or NULL for the default.
argOptional argument passed to out (if not NULL)
@@ -584,18 +698,28 @@ Functions
- -

◆ mi_thread_stats_print()

+ +

◆ mi_thread_stats_print_out()

- + - - + + + + + + + + + + + +
void mi_thread_stats_print void mi_thread_stats_print_out (mi_output_funout)mi_output_funout,
void * arg 
)
@@ -603,7 +727,8 @@ Functions

Print out heap statistics for this thread.

Parameters
- + +
outOutput function. Use NULL for outputting to stderr.
outAn output function or NULL for the default.
argOptional argument passed to out (if not NULL)
diff --git a/docs/group__extended.js b/docs/group__extended.js index 00c73614..7152b518 100644 --- a/docs/group__extended.js +++ b/docs/group__extended.js @@ -1,22 +1,24 @@ var group__extended = [ [ "MI_SMALL_SIZE_MAX", "group__extended.html#ga1ea64283508718d9d645c38efc2f4305", null ], - [ "mi_deferred_free_fun", "group__extended.html#ga22213691c3ce5ab4d91b24aff1023529", null ], - [ "mi_output_fun", "group__extended.html#ga2bed6d40b74591a67f81daea4b4a246f", null ], + [ "mi_deferred_free_fun", "group__extended.html#ga299dae78d25ce112e384a98b7309c5be", null ], + [ "mi_output_fun", "group__extended.html#gad823d23444a4b77a40f66bf075a98a0c", null ], [ "mi_collect", "group__extended.html#ga421430e2226d7d468529cec457396756", null ], [ "mi_good_size", "group__extended.html#gac057927cd06c854b45fe7847e921bd47", null ], [ "mi_is_in_heap_region", "group__extended.html#ga5f071b10d4df1c3658e04e7fd67a94e6", null ], [ "mi_is_redirected", "group__extended.html#gaad25050b19f30cd79397b227e0157a3f", null ], [ "mi_malloc_small", "group__extended.html#ga7136c2e55cb22c98ecf95d08d6debb99", null ], - [ "mi_register_deferred_free", "group__extended.html#ga24dc9cc6fca8daa2aa30aa8025467ce2", null ], - [ "mi_register_output", "group__extended.html#ga84a0c8b401e42eb5b1bce156852f44c5", null ], - [ "mi_reserve_huge_os_pages", "group__extended.html#ga2664f36a2dd557741c429cb799f04641", null ], + [ "mi_register_deferred_free", "group__extended.html#ga3460a6ca91af97be4058f523d3cb8ece", null ], + [ "mi_register_output", "group__extended.html#gae5b17ff027cd2150b43a33040250cf3f", null ], + [ "mi_reserve_huge_os_pages_at", "group__extended.html#ga7795a13d20087447281858d2c771cca1", null ], + [ "mi_reserve_huge_os_pages_interleave", "group__extended.html#ga3132f521fb756fc0e8ec0b74fb58df50", null ], [ "mi_stats_merge", "group__extended.html#ga854b1de8cb067c7316286c28b2fcd3d1", null ], - [ "mi_stats_print", "group__extended.html#ga8ca07ccff283956d71f48272f4fd5c01", null ], + [ "mi_stats_print", "group__extended.html#ga2d126e5c62d3badc35445e5d84166df2", null ], + [ "mi_stats_print", "group__extended.html#ga256cc6f13a142deabbadd954a217e228", null ], [ "mi_stats_reset", "group__extended.html#ga3bb8468b8cfcc6e2a61d98aee85c5f99", null ], [ "mi_thread_done", "group__extended.html#ga0ae4581e85453456a0d658b2b98bf7bf", null ], [ "mi_thread_init", "group__extended.html#gaf8e73efc2cbca9ebfdfb166983a04c17", null ], - [ "mi_thread_stats_print", "group__extended.html#ga489670a15d1a257ab4639e645ee4612a", null ], + [ "mi_thread_stats_print_out", "group__extended.html#gab1dac8476c46cb9eecab767eb40c1525", null ], [ "mi_usable_size", "group__extended.html#ga089c859d9eddc5f9b4bd946cd53cebee", null ], [ "mi_zalloc_small", "group__extended.html#ga220f29f40a44404b0061c15bc1c31152", null ] ]; \ No newline at end of file diff --git a/docs/group__heap.html b/docs/group__heap.html index 753aaba3..0973279a 100644 --- a/docs/group__heap.html +++ b/docs/group__heap.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
diff --git a/docs/group__malloc.html b/docs/group__malloc.html index 6bd71d06..bee7b4eb 100644 --- a/docs/group__malloc.html +++ b/docs/group__malloc.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
diff --git a/docs/group__options.html b/docs/group__options.html index a34a9307..71c7ba24 100644 --- a/docs/group__options.html +++ b/docs/group__options.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
@@ -123,11 +123,12 @@ Enumerations mi_option_segment_cache,
  mi_option_page_reset, -mi_option_cache_reset, -mi_option_reset_decommits, -mi_option_eager_commit_delay, +mi_option_segment_reset, +mi_option_reset_delay, +mi_option_use_numa_nodes,
-  mi_option_segment_reset, +  mi_option_reset_decommits, +mi_option_eager_commit_delay, mi_option_os_tag, _mi_option_last
@@ -183,16 +184,18 @@ Functions mi_option_segment_cache 

The number of segments per thread to keep cached.

-mi_option_page_reset 

Reset page memory when it becomes free.

+mi_option_page_reset 

Reset page memory after mi_option_reset_delay milliseconds when it becomes free.

-mi_option_cache_reset 

Reset segment memory when a segment is cached.

+mi_option_segment_reset 

Experimental.

+ +mi_option_reset_delay 

Delay in milli-seconds before resetting a page (100ms by default)

+ +mi_option_use_numa_nodes 

Pretend there are at most N NUMA nodes.

mi_option_reset_decommits 

Experimental.

mi_option_eager_commit_delay 

Experimental.

-mi_option_segment_reset 

Experimental.

- mi_option_os_tag 

OS tag to assign to mimalloc'd memory.

_mi_option_last  diff --git a/docs/group__options.js b/docs/group__options.js index 4bf52d54..1d84ea8b 100644 --- a/docs/group__options.js +++ b/docs/group__options.js @@ -10,10 +10,11 @@ var group__options = [ "mi_option_reserve_huge_os_pages", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884caca7ed041be3b0b9d0b82432c7bf41af2", null ], [ "mi_option_segment_cache", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca2ecbe7ef32f5c84de3739aa4f0b805a1", null ], [ "mi_option_page_reset", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cada854dd272c66342f18a93ee254a2968", null ], - [ "mi_option_cache_reset", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cac2157a0cb79cd996c1db7d9f6a090c07", null ], + [ "mi_option_segment_reset", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cafb121d30d87591850d5410ccc3a95c6d", null ], + [ "mi_option_reset_delay", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca154fe170131d5212cff57e22b99523c5", null ], + [ "mi_option_use_numa_nodes", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca0ac33a18f6b659fcfaf44efb0bab1b74", null ], [ "mi_option_reset_decommits", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cac81ee965b130fa81238913a3c239d536", null ], [ "mi_option_eager_commit_delay", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca17a190c25be381142d87e0468c4c068c", null ], - [ "mi_option_segment_reset", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cafb121d30d87591850d5410ccc3a95c6d", null ], [ "mi_option_os_tag", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca4b74ae2a69e445de6c2361b73c1d14bf", null ], [ "_mi_option_last", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca5b4357b74be0d87568036c32eb1a2e4a", null ] ] ], diff --git a/docs/group__posix.html b/docs/group__posix.html index b9cf0b52..65e8ff7e 100644 --- a/docs/group__posix.html +++ b/docs/group__posix.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
diff --git a/docs/group__typed.html b/docs/group__typed.html index 8ea0f095..cf5ac5d1 100644 --- a/docs/group__typed.html +++ b/docs/group__typed.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
diff --git a/docs/group__zeroinit.html b/docs/group__zeroinit.html new file mode 100644 index 00000000..28983138 --- /dev/null +++ b/docs/group__zeroinit.html @@ -0,0 +1,597 @@ + + + + + + + +mi-malloc: Zero initialized re-allocation + + + + + + + + + + + + + + + + +
+
+ + + + + + + + +
+
mi-malloc +  1.4 +
+
+ + + + + + +
+
+
+ + + +
+
+ +
+
+
+ +
+ +
+
+ + +
+ +
+ +
+ +
+
Zero initialized re-allocation
+
+
+ +

The zero-initialized re-allocations are only valid on memory that was originally allocated with zero initialization too. +More...

+ + + + + + + + + + + + + + + + + + + + + + + + +

+Functions

void * mi_rezalloc (void *p, size_t newsize)
 
void * mi_rezalloc_aligned (void *p, size_t newsize, size_t alignment)
 
void * mi_rezalloc_aligned_at (void *p, size_t newsize, size_t alignment, size_t offset)
 
void * mi_recalloc_aligned (void *p, size_t newcount, size_t size, size_t alignment)
 
void * mi_recalloc_aligned_at (void *p, size_t newcount, size_t size, size_t alignment, size_t offset)
 
void * mi_heap_rezalloc (mi_heap_t *heap, void *p, size_t newsize)
 
void * mi_heap_recalloc (mi_heap_t *heap, void *p, size_t newcount, size_t size)
 
void * mi_heap_rezalloc_aligned (mi_heap_t *heap, void *p, size_t newsize, size_t alignment)
 
void * mi_heap_rezalloc_aligned_at (mi_heap_t *heap, void *p, size_t newsize, size_t alignment, size_t offset)
 
void * mi_heap_recalloc_aligned (mi_heap_t *heap, void *p, size_t newcount, size_t size, size_t alignment)
 
void * mi_heap_recalloc_aligned_at (mi_heap_t *heap, void *p, size_t newcount, size_t size, size_t alignment, size_t offset)
 
+

Detailed Description

+

The zero-initialized re-allocations are only valid on memory that was originally allocated with zero initialization too.

+

e.g. mi_calloc, mi_zalloc, mi_zalloc_aligned etc. see https://github.com/microsoft/mimalloc/issues/63#issuecomment-508272992

+

Function Documentation

+ +

◆ mi_heap_recalloc()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
void* mi_heap_recalloc (mi_heap_theap,
void * p,
size_t newcount,
size_t size 
)
+
+ +
+
+ +

◆ mi_heap_recalloc_aligned()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
void* mi_heap_recalloc_aligned (mi_heap_theap,
void * p,
size_t newcount,
size_t size,
size_t alignment 
)
+
+ +
+
+ +

◆ mi_heap_recalloc_aligned_at()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
void* mi_heap_recalloc_aligned_at (mi_heap_theap,
void * p,
size_t newcount,
size_t size,
size_t alignment,
size_t offset 
)
+
+ +
+
+ +

◆ mi_heap_rezalloc()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + +
void* mi_heap_rezalloc (mi_heap_theap,
void * p,
size_t newsize 
)
+
+ +
+
+ +

◆ mi_heap_rezalloc_aligned()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
void* mi_heap_rezalloc_aligned (mi_heap_theap,
void * p,
size_t newsize,
size_t alignment 
)
+
+ +
+
+ +

◆ mi_heap_rezalloc_aligned_at()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
void* mi_heap_rezalloc_aligned_at (mi_heap_theap,
void * p,
size_t newsize,
size_t alignment,
size_t offset 
)
+
+ +
+
+ +

◆ mi_recalloc_aligned()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
void* mi_recalloc_aligned (void * p,
size_t newcount,
size_t size,
size_t alignment 
)
+
+ +
+
+ +

◆ mi_recalloc_aligned_at()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
void* mi_recalloc_aligned_at (void * p,
size_t newcount,
size_t size,
size_t alignment,
size_t offset 
)
+
+ +
+
+ +

◆ mi_rezalloc()

+ +
+
+ + + + + + + + + + + + + + + + + + +
void* mi_rezalloc (void * p,
size_t newsize 
)
+
+ +
+
+ +

◆ mi_rezalloc_aligned()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + +
void* mi_rezalloc_aligned (void * p,
size_t newsize,
size_t alignment 
)
+
+ +
+
+ +

◆ mi_rezalloc_aligned_at()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
void* mi_rezalloc_aligned_at (void * p,
size_t newsize,
size_t alignment,
size_t offset 
)
+
+ +
+
+
+
+ + + + diff --git a/docs/group__zeroinit.js b/docs/group__zeroinit.js new file mode 100644 index 00000000..b9297d21 --- /dev/null +++ b/docs/group__zeroinit.js @@ -0,0 +1,14 @@ +var group__zeroinit = +[ + [ "mi_heap_recalloc", "group__zeroinit.html#ga8648c5fbb22a80f0262859099f06dfbd", null ], + [ "mi_heap_recalloc_aligned", "group__zeroinit.html#ga9f3f999396c8f77ca5e80e7b40ac29e3", null ], + [ "mi_heap_recalloc_aligned_at", "group__zeroinit.html#ga496452c96f1de8c500be9fddf52edaf7", null ], + [ "mi_heap_rezalloc", "group__zeroinit.html#gacfad83f14eb5d6a42a497a898e19fc76", null ], + [ "mi_heap_rezalloc_aligned", "group__zeroinit.html#ga375fa8a611c51905e592d5d467c49664", null ], + [ "mi_heap_rezalloc_aligned_at", "group__zeroinit.html#gac90da54fa7e5d10bdc97ce0b51dce2eb", null ], + [ "mi_recalloc_aligned", "group__zeroinit.html#ga3e7e5c291acf1c7fd7ffd9914a9f945f", null ], + [ "mi_recalloc_aligned_at", "group__zeroinit.html#ga4ff5e92ad73585418a072c9d059e5cf9", null ], + [ "mi_rezalloc", "group__zeroinit.html#ga8c292e142110229a2980b37ab036dbc6", null ], + [ "mi_rezalloc_aligned", "group__zeroinit.html#gacd71a7bce96aab38ae6de17af2eb2cf0", null ], + [ "mi_rezalloc_aligned_at", "group__zeroinit.html#gae8b358c417e61d5307da002702b0a8e1", null ] +]; \ No newline at end of file diff --git a/docs/index.html b/docs/index.html index bf758c3c..0efc9c09 100644 --- a/docs/index.html +++ b/docs/index.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
@@ -105,7 +105,7 @@ $(document).ready(function(){initNavTree('index.html','');});

This is the API documentation of the mimalloc allocator (pronounced "me-malloc") – a general purpose allocator with excellent performance characteristics. Initially developed by Daan Leijen for the run-time systems of the Koka and Lean languages.

It is a drop-in replacement for malloc and can be used in other programs without code changes, for example, on Unix you can use it as:

> LD_PRELOAD=/usr/bin/libmimalloc.so myprogram

Notable aspects of the design include:

    -
  • small and consistent: the library is less than 3500 LOC using simple and consistent data structures. This makes it very suitable to integrate and adapt in other projects. For runtime systems it provides hooks for a monotonic heartbeat and deferred freeing (for bounded worst-case times with reference counting).
  • +
  • small and consistent: the library is less than 6k LOC using simple and consistent data structures. This makes it very suitable to integrate and adapt in other projects. For runtime systems it provides hooks for a monotonic heartbeat and deferred freeing (for bounded worst-case times with reference counting).
  • free list sharding: the big idea: instead of one big free list (per size class) we have many smaller lists per memory "page" which both reduces fragmentation and increases locality – things that are allocated close in time get allocated close in memory. (A memory "page" in mimalloc contains blocks of one size class and is usually 64KiB on a 64-bit system).
  • eager page reset: when a "page" becomes empty (with increased chance due to free list sharding) the memory is marked to the OS as unused ("reset" or "purged") reducing (real) memory pressure and fragmentation, especially in long running programs.
  • secure: mimalloc can be build in secure mode, adding guard pages, randomized allocation, encrypted free lists, etc. to protect against various heap vulnerabilities. The performance penalty is only around 3% on average over our benchmarks.
  • diff --git a/docs/mimalloc-doc_8h_source.html b/docs/mimalloc-doc_8h_source.html index 3a235533..c240f151 100644 --- a/docs/mimalloc-doc_8h_source.html +++ b/docs/mimalloc-doc_8h_source.html @@ -37,7 +37,7 @@ Logo
    mi-malloc -  1.0 +  1.4
    @@ -102,35 +102,30 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html','');});
    mimalloc-doc.h
-
1 /* ----------------------------------------------------------------------------
2 Copyright (c) 2018, Microsoft Research, Daan Leijen
3 This is free software; you can redistribute it and/or modify it under the
4 terms of the MIT license. A copy of the license can be found in the file
5 "LICENSE" at the root of this distribution.
6 -----------------------------------------------------------------------------*/
7 
8 #error "documentation file only!"
9 
10 
81 
85 
89 void mi_free(void* p);
90 
95 void* mi_malloc(size_t size);
96 
101 void* mi_zalloc(size_t size);
102 
112 void* mi_calloc(size_t count, size_t size);
113 
126 void* mi_realloc(void* p, size_t newsize);
127 
138 void* mi_recalloc(void* p, size_t count, size_t size);
139 
153 void* mi_expand(void* p, size_t newsize);
154 
164 void* mi_mallocn(size_t count, size_t size);
165 
175 void* mi_reallocn(void* p, size_t count, size_t size);
176 
193 void* mi_reallocf(void* p, size_t newsize);
194 
195 
204 char* mi_strdup(const char* s);
205 
215 char* mi_strndup(const char* s, size_t n);
216 
229 char* mi_realpath(const char* fname, char* resolved_name);
230 
232 
233 // ------------------------------------------------------
234 // Extended functionality
235 // ------------------------------------------------------
236 
240 
243 #define MI_SMALL_SIZE_MAX (128*sizeof(void*))
244 
252 void* mi_malloc_small(size_t size);
253 
261 void* mi_zalloc_small(size_t size);
262 
277 size_t mi_usable_size(void* p);
278 
288 size_t mi_good_size(size_t size);
289 
297 void mi_collect(bool force);
298 
303 void mi_stats_print(mi_output_fun* out);
304 
306 void mi_stats_reset(void);
307 
309 void mi_stats_merge(void);
310 
314 void mi_thread_init(void);
315 
320 void mi_thread_done(void);
321 
327 
333 typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat);
334 
350 
355 typedef void (mi_output_fun)(const char* msg);
356 
362 void mi_register_output(mi_output_fun* out) mi_attr_noexcept;
363 
368 bool mi_is_in_heap_region(const void* p);
369 
381 int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved);
382 
387 bool mi_is_redirected();
388 
389 
391 
392 // ------------------------------------------------------
393 // Aligned allocation
394 // ------------------------------------------------------
395 
401 
414 void* mi_malloc_aligned(size_t size, size_t alignment);
415 void* mi_zalloc_aligned(size_t size, size_t alignment);
416 void* mi_calloc_aligned(size_t count, size_t size, size_t alignment);
417 void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment);
418 
429 void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset);
430 void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset);
431 void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset);
432 void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset);
433 
435 
441 
446 struct mi_heap_s;
447 
452 typedef struct mi_heap_s mi_heap_t;
453 
456 
464 void mi_heap_delete(mi_heap_t* heap);
465 
473 void mi_heap_destroy(mi_heap_t* heap);
474 
479 
483 
490 
492 void mi_heap_collect(mi_heap_t* heap, bool force);
493 
496 void* mi_heap_malloc(mi_heap_t* heap, size_t size);
497 
501 void* mi_heap_malloc_small(mi_heap_t* heap, size_t size);
502 
505 void* mi_heap_zalloc(mi_heap_t* heap, size_t size);
506 
509 void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size);
510 
513 void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size);
514 
517 char* mi_heap_strdup(mi_heap_t* heap, const char* s);
518 
521 char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n);
522 
525 char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name);
526 
527 void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize);
528 void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size);
529 void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize);
530 
531 void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment);
532 void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset);
533 void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment);
534 void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset);
535 void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment);
536 void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset);
537 void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment);
538 void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset);
539 
541 
542 
551 
552 void* mi_rezalloc(void* p, size_t newsize);
553 void* mi_recalloc(void* p, size_t newcount, size_t size) ;
554 
555 void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment);
556 void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset);
557 void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment);
558 void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset);
559 
560 void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize);
561 void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t newcount, size_t size);
562 
563 void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment);
564 void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset);
565 void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment);
566 void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset);
567 
569 
575 
587 #define mi_malloc_tp(tp) ((tp*)mi_malloc(sizeof(tp)))
588 
590 #define mi_zalloc_tp(tp) ((tp*)mi_zalloc(sizeof(tp)))
591 
593 #define mi_calloc_tp(tp,count) ((tp*)mi_calloc(count,sizeof(tp)))
594 
596 #define mi_mallocn_tp(tp,count) ((tp*)mi_mallocn(count,sizeof(tp)))
597 
599 #define mi_reallocn_tp(p,tp,count) ((tp*)mi_reallocn(p,count,sizeof(tp)))
600 
602 #define mi_heap_malloc_tp(hp,tp) ((tp*)mi_heap_malloc(hp,sizeof(tp)))
603 
605 #define mi_heap_zalloc_tp(hp,tp) ((tp*)mi_heap_zalloc(hp,sizeof(tp)))
606 
608 #define mi_heap_calloc_tp(hp,tp,count) ((tp*)mi_heap_calloc(hp,count,sizeof(tp)))
609 
611 #define mi_heap_mallocn_tp(hp,tp,count) ((tp*)mi_heap_mallocn(hp,count,sizeof(tp)))
612 
614 #define mi_heap_reallocn_tp(hp,p,tp,count) ((tp*)mi_heap_reallocn(p,count,sizeof(tp)))
615 
617 #define mi_heap_recalloc_tp(hp,p,tp,count) ((tp*)mi_heap_recalloc(p,count,sizeof(tp)))
618 
620 
626 
633 bool mi_heap_contains_block(mi_heap_t* heap, const void* p);
634 
643 bool mi_heap_check_owned(mi_heap_t* heap, const void* p);
644 
652 bool mi_check_owned(const void* p);
653 
656 typedef struct mi_heap_area_s {
657  void* blocks;
658  size_t reserved;
659  size_t committed;
660  size_t used;
661  size_t block_size;
663 
671 typedef bool (mi_block_visit_fun)(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg);
672 
684 bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_blocks, mi_block_visit_fun* visitor, void* arg);
685 
687 
693 
695 typedef enum mi_option_e {
696  // stable options
700  // the following options are experimental
713 } mi_option_t;
714 
715 
716 bool mi_option_enabled(mi_option_t option);
717 void mi_option_enable(mi_option_t option, bool enable);
718 void mi_option_enable_default(mi_option_t option, bool enable);
719 
720 long mi_option_get(mi_option_t option);
721 void mi_option_set(mi_option_t option, long value);
722 void mi_option_set_default(mi_option_t option, long value);
723 
724 
726 
733 
734 void* mi_recalloc(void* p, size_t count, size_t size);
735 size_t mi_malloc_size(const void* p);
736 size_t mi_malloc_usable_size(const void *p);
737 
739 void mi_cfree(void* p);
740 
741 int mi_posix_memalign(void** p, size_t alignment, size_t size);
742 int mi__posix_memalign(void** p, size_t alignment, size_t size);
743 void* mi_memalign(size_t alignment, size_t size);
744 void* mi_valloc(size_t size);
745 
746 void* mi_pvalloc(size_t size);
747 void* mi_aligned_alloc(size_t alignment, size_t size);
748 void* mi_reallocarray(void* p, size_t count, size_t size);
749 
750 void mi_free_size(void* p, size_t size);
751 void mi_free_size_aligned(void* p, size_t size, size_t alignment);
752 void mi_free_aligned(void* p, size_t alignment);
753 
755 void* mi_new(std::size_t n) noexcept(false);
756 
758 void* mi_new_aligned(std::size_t n, std::align_val_t alignment) noexcept(false);
759 
761 void* mi_new_nothrow(size_t n);
762 ``
764 void* mi_new_aligned_nothrow(size_t n, size_t alignment);
765 
767 
void mi_option_enable_default(mi_option_t option, bool enable)
+
1 /* ----------------------------------------------------------------------------
2 Copyright (c) 2018, Microsoft Research, Daan Leijen
3 This is free software; you can redistribute it and/or modify it under the
4 terms of the MIT license. A copy of the license can be found in the file
5 "LICENSE" at the root of this distribution.
6 -----------------------------------------------------------------------------*/
7 
8 #error "documentation file only!"
9 
10 
81 
85 
89 void mi_free(void* p);
90 
95 void* mi_malloc(size_t size);
96 
101 void* mi_zalloc(size_t size);
102 
112 void* mi_calloc(size_t count, size_t size);
113 
126 void* mi_realloc(void* p, size_t newsize);
127 
138 void* mi_recalloc(void* p, size_t count, size_t size);
139 
153 void* mi_expand(void* p, size_t newsize);
154 
164 void* mi_mallocn(size_t count, size_t size);
165 
175 void* mi_reallocn(void* p, size_t count, size_t size);
176 
193 void* mi_reallocf(void* p, size_t newsize);
194 
195 
204 char* mi_strdup(const char* s);
205 
215 char* mi_strndup(const char* s, size_t n);
216 
229 char* mi_realpath(const char* fname, char* resolved_name);
230 
232 
233 // ------------------------------------------------------
234 // Extended functionality
235 // ------------------------------------------------------
236 
240 
243 #define MI_SMALL_SIZE_MAX (128*sizeof(void*))
244 
252 void* mi_malloc_small(size_t size);
253 
261 void* mi_zalloc_small(size_t size);
262 
277 size_t mi_usable_size(void* p);
278 
288 size_t mi_good_size(size_t size);
289 
297 void mi_collect(bool force);
298 
303 void mi_stats_print(void* out);
304 
310 void mi_stats_print(mi_output_fun* out, void* arg);
311 
313 void mi_stats_reset(void);
314 
316 void mi_stats_merge(void);
317 
321 void mi_thread_init(void);
322 
327 void mi_thread_done(void);
328 
334 void mi_thread_stats_print_out(mi_output_fun* out, void* arg);
335 
342 typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat, void* arg);
343 
359 void mi_register_deferred_free(mi_deferred_free_fun* deferred_free, void* arg);
360 
366 typedef void (mi_output_fun)(const char* msg, void* arg);
367 
374 void mi_register_output(mi_output_fun* out, void* arg);
375 
380 bool mi_is_in_heap_region(const void* p);
381 
382 
395 int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs);
396 
409 int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs);
410 
411 
416 bool mi_is_redirected();
417 
418 
420 
421 // ------------------------------------------------------
422 // Aligned allocation
423 // ------------------------------------------------------
424 
430 
443 void* mi_malloc_aligned(size_t size, size_t alignment);
444 void* mi_zalloc_aligned(size_t size, size_t alignment);
445 void* mi_calloc_aligned(size_t count, size_t size, size_t alignment);
446 void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment);
447 
458 void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset);
459 void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset);
460 void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset);
461 void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset);
462 
464 
470 
475 struct mi_heap_s;
476 
481 typedef struct mi_heap_s mi_heap_t;
482 
485 
493 void mi_heap_delete(mi_heap_t* heap);
494 
502 void mi_heap_destroy(mi_heap_t* heap);
503 
508 
512 
519 
521 void mi_heap_collect(mi_heap_t* heap, bool force);
522 
525 void* mi_heap_malloc(mi_heap_t* heap, size_t size);
526 
530 void* mi_heap_malloc_small(mi_heap_t* heap, size_t size);
531 
534 void* mi_heap_zalloc(mi_heap_t* heap, size_t size);
535 
538 void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size);
539 
542 void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size);
543 
546 char* mi_heap_strdup(mi_heap_t* heap, const char* s);
547 
550 char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n);
551 
554 char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name);
555 
556 void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize);
557 void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size);
558 void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize);
559 
560 void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment);
561 void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset);
562 void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment);
563 void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset);
564 void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment);
565 void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset);
566 void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment);
567 void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset);
568 
570 
571 
580 
581 void* mi_rezalloc(void* p, size_t newsize);
582 void* mi_recalloc(void* p, size_t newcount, size_t size) ;
583 
584 void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment);
585 void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset);
586 void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment);
587 void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset);
588 
589 void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize);
590 void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t newcount, size_t size);
591 
592 void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment);
593 void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset);
594 void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment);
595 void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset);
596 
598 
604 
616 #define mi_malloc_tp(tp) ((tp*)mi_malloc(sizeof(tp)))
617 
619 #define mi_zalloc_tp(tp) ((tp*)mi_zalloc(sizeof(tp)))
620 
622 #define mi_calloc_tp(tp,count) ((tp*)mi_calloc(count,sizeof(tp)))
623 
625 #define mi_mallocn_tp(tp,count) ((tp*)mi_mallocn(count,sizeof(tp)))
626 
628 #define mi_reallocn_tp(p,tp,count) ((tp*)mi_reallocn(p,count,sizeof(tp)))
629 
631 #define mi_heap_malloc_tp(hp,tp) ((tp*)mi_heap_malloc(hp,sizeof(tp)))
632 
634 #define mi_heap_zalloc_tp(hp,tp) ((tp*)mi_heap_zalloc(hp,sizeof(tp)))
635 
637 #define mi_heap_calloc_tp(hp,tp,count) ((tp*)mi_heap_calloc(hp,count,sizeof(tp)))
638 
640 #define mi_heap_mallocn_tp(hp,tp,count) ((tp*)mi_heap_mallocn(hp,count,sizeof(tp)))
641 
643 #define mi_heap_reallocn_tp(hp,p,tp,count) ((tp*)mi_heap_reallocn(p,count,sizeof(tp)))
644 
646 #define mi_heap_recalloc_tp(hp,p,tp,count) ((tp*)mi_heap_recalloc(p,count,sizeof(tp)))
647 
649 
655 
662 bool mi_heap_contains_block(mi_heap_t* heap, const void* p);
663 
672 bool mi_heap_check_owned(mi_heap_t* heap, const void* p);
673 
681 bool mi_check_owned(const void* p);
682 
685 typedef struct mi_heap_area_s {
686  void* blocks;
687  size_t reserved;
688  size_t committed;
689  size_t used;
690  size_t block_size;
692 
700 typedef bool (mi_block_visit_fun)(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg);
701 
713 bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_blocks, mi_block_visit_fun* visitor, void* arg);
714 
716 
722 
724 typedef enum mi_option_e {
725  // stable options
729  // the following options are experimental
743 } mi_option_t;
744 
745 
746 bool mi_option_enabled(mi_option_t option);
747 void mi_option_enable(mi_option_t option, bool enable);
748 void mi_option_enable_default(mi_option_t option, bool enable);
749 
750 long mi_option_get(mi_option_t option);
751 void mi_option_set(mi_option_t option, long value);
752 void mi_option_set_default(mi_option_t option, long value);
753 
754 
756 
763 
764 void* mi_recalloc(void* p, size_t count, size_t size);
765 size_t mi_malloc_size(const void* p);
766 size_t mi_malloc_usable_size(const void *p);
767 
769 void mi_cfree(void* p);
770 
771 int mi_posix_memalign(void** p, size_t alignment, size_t size);
772 int mi__posix_memalign(void** p, size_t alignment, size_t size);
773 void* mi_memalign(size_t alignment, size_t size);
774 void* mi_valloc(size_t size);
775 
776 void* mi_pvalloc(size_t size);
777 void* mi_aligned_alloc(size_t alignment, size_t size);
778 void* mi_reallocarray(void* p, size_t count, size_t size);
779 
780 void mi_free_size(void* p, size_t size);
781 void mi_free_size_aligned(void* p, size_t size, size_t alignment);
782 void mi_free_aligned(void* p, size_t alignment);
783 
785 void* mi_new(std::size_t n) noexcept(false);
786 
788 void* mi_new_aligned(std::size_t n, std::align_val_t alignment) noexcept(false);
789 
791 void* mi_new_nothrow(size_t n);
792 ``
794 void* mi_new_aligned_nothrow(size_t n, size_t alignment);
795 
797 
void mi_option_enable_default(mi_option_t option, bool enable)
size_t mi_usable_size(void *p)
Return the available bytes in a memory block.
void * mi_reallocn(void *p, size_t count, size_t size)
Re-allocate memory to count elements of size bytes.
void * mi_malloc_aligned(size_t size, size_t alignment)
Allocate size bytes aligned by alignment.
void * mi_recalloc_aligned_at(void *p, size_t newcount, size_t size, size_t alignment, size_t offset)
-
void mi_stats_print(mi_output_fun *out)
Print the main statistics.
void mi_stats_reset(void)
Reset statistics.
void * mi_heap_realloc_aligned(mi_heap_t *heap, void *p, size_t newsize, size_t alignment)
void * mi_recalloc(void *p, size_t count, size_t size)
Re-allocate memory to count elements of size bytes, with extra memory initialized to zero.
void * mi_mallocn(size_t count, size_t size)
Allocate count elements of size bytes.
size_t mi_malloc_size(const void *p)
-
Reset segment memory when a segment is cached.
Definition: mimalloc-doc.h:707
int mi_posix_memalign(void **p, size_t alignment, size_t size)
void mi_stats_merge(void)
Merge thread local statistics with the main statistics and reset.
-
void() mi_output_fun(const char *msg)
Type of output functions.
Definition: mimalloc-doc.h:355
-
void mi_register_output(mi_output_fun *out) mi_attr_noexcept
Register an output function.
void mi_option_set_default(mi_option_t option, long value)
void * mi_new_aligned(std::size_t n, std::align_val_t alignment) noexcept(false)
raise std::bad_alloc exception on failure.
void * mi_rezalloc(void *p, size_t newsize)
-
Eagerly commit segments (4MiB) (enabled by default).
Definition: mimalloc-doc.h:701
+
Eagerly commit segments (4MiB) (enabled by default).
Definition: mimalloc-doc.h:730
void * mi_heap_zalloc(mi_heap_t *heap, size_t size)
Allocate zero-initialized in a specific heap.
void mi_option_set(mi_option_t option, long value)
-
void mi_register_deferred_free(mi_deferred_free_fun *deferred_free)
Register a deferred free function.
-
Eagerly commit large (256MiB) memory regions (enabled by default, except on Windows)
Definition: mimalloc-doc.h:702
+
Eagerly commit large (256MiB) memory regions (enabled by default, except on Windows)
Definition: mimalloc-doc.h:731
void mi_cfree(void *p)
Just as free but also checks if the pointer p belongs to our heap.
void * mi_recalloc_aligned(void *p, size_t newcount, size_t size, size_t alignment)
-
Definition: mimalloc-doc.h:712
+
Definition: mimalloc-doc.h:742
void * mi_realloc_aligned_at(void *p, size_t newsize, size_t alignment, size_t offset)
-
void * blocks
start of the area containing heap blocks
Definition: mimalloc-doc.h:657
+
void * blocks
start of the area containing heap blocks
Definition: mimalloc-doc.h:686
void * mi_realloc_aligned(void *p, size_t newsize, size_t alignment)
int mi__posix_memalign(void **p, size_t alignment, size_t size)
void mi_free(void *p)
Free previously allocated memory.
@@ -146,35 +141,36 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html','');});
void * mi_heap_rezalloc_aligned_at(mi_heap_t *heap, void *p, size_t newsize, size_t alignment, size_t offset)
void * mi_zalloc(size_t size)
Allocate zero-initialized size bytes.
void * mi_heap_rezalloc(mi_heap_t *heap, void *p, size_t newsize)
-
The number of segments per thread to keep cached.
Definition: mimalloc-doc.h:705
+
The number of segments per thread to keep cached.
Definition: mimalloc-doc.h:734
void * mi_heap_calloc(mi_heap_t *heap, size_t count, size_t size)
Allocate count zero-initialized elements in a specific heap.
void * mi_new(std::size_t n) noexcept(false)
raise std::bad_alloc exception on failure.
void * mi_heap_calloc_aligned(mi_heap_t *heap, size_t count, size_t size, size_t alignment)
bool mi_is_redirected()
Is the C runtime malloc API redirected?
-
size_t block_size
size in bytes of one block
Definition: mimalloc-doc.h:661
+
size_t block_size
size in bytes of one block
Definition: mimalloc-doc.h:690
void * mi_reallocarray(void *p, size_t count, size_t size)
+
int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs)
Reserve pages of huge OS pages (1GiB) evenly divided over numa_nodes nodes, but stops after at most t...
+
void() mi_deferred_free_fun(bool force, unsigned long long heartbeat, void *arg)
Type of deferred free functions.
Definition: mimalloc-doc.h:342
bool mi_is_in_heap_region(const void *p)
Is a pointer part of our heap?
void mi_option_enable(mi_option_t option, bool enable)
void * mi_realloc(void *p, size_t newsize)
Re-allocate memory to newsize bytes.
-
The number of huge OS pages (1GiB in size) to reserve at the start of the program.
Definition: mimalloc-doc.h:704
-
int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t *pages_reserved)
Reserve pages of huge OS pages (1GiB) but stops after at most max_secs seconds.
+
The number of huge OS pages (1GiB in size) to reserve at the start of the program.
Definition: mimalloc-doc.h:733
void * mi_heap_reallocf(mi_heap_t *heap, void *p, size_t newsize)
void mi_free_size_aligned(void *p, size_t size, size_t alignment)
void * mi_rezalloc_aligned_at(void *p, size_t newsize, size_t alignment, size_t offset)
-
Reset page memory when it becomes free.
Definition: mimalloc-doc.h:706
+
Reset page memory after mi_option_reset_delay milliseconds when it becomes free.
Definition: mimalloc-doc.h:735
void mi_thread_done(void)
Uninitialize mimalloc on a thread.
bool mi_heap_visit_blocks(const mi_heap_t *heap, bool visit_all_blocks, mi_block_visit_fun *visitor, void *arg)
Visit all areas and blocks in a heap.
-
void mi_thread_stats_print(mi_output_fun *out)
Print out heap statistics for this thread.
+
Pretend there are at most N NUMA nodes.
Definition: mimalloc-doc.h:738
void * mi_malloc(size_t size)
Allocate size bytes.
bool mi_option_enabled(mi_option_t option)
-
Experimental.
Definition: mimalloc-doc.h:708
+
Experimental.
Definition: mimalloc-doc.h:739
char * mi_heap_strndup(mi_heap_t *heap, const char *s, size_t n)
Duplicate a string of at most length n in a specific heap.
-
bool() mi_block_visit_fun(const mi_heap_t *heap, const mi_heap_area_t *area, void *block, size_t block_size, void *arg)
Visitor function passed to mi_heap_visit_blocks()
Definition: mimalloc-doc.h:671
+
bool() mi_block_visit_fun(const mi_heap_t *heap, const mi_heap_area_t *area, void *block, size_t block_size, void *arg)
Visitor function passed to mi_heap_visit_blocks()
Definition: mimalloc-doc.h:700
void * mi_heap_recalloc(mi_heap_t *heap, void *p, size_t newcount, size_t size)
void * mi_heap_malloc_aligned_at(mi_heap_t *heap, size_t size, size_t alignment, size_t offset)
char * mi_realpath(const char *fname, char *resolved_name)
Resolve a file path name.
-
Print error messages to stderr.
Definition: mimalloc-doc.h:698
-
Experimental.
Definition: mimalloc-doc.h:710
+
Print error messages to stderr.
Definition: mimalloc-doc.h:727
+
Experimental.
Definition: mimalloc-doc.h:736
void * mi_heap_rezalloc_aligned(mi_heap_t *heap, void *p, size_t newsize, size_t alignment)
void * mi_memalign(size_t alignment, size_t size)
void * mi_new_aligned_nothrow(size_t n, size_t alignment)
return NULL on failure.
@@ -183,35 +179,40 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html','');});
bool mi_heap_contains_block(mi_heap_t *heap, const void *p)
Does a heap contain a pointer to a previously allocated block?
void mi_heap_collect(mi_heap_t *heap, bool force)
Release outstanding resources in a specific heap.
void * mi_heap_recalloc_aligned_at(mi_heap_t *heap, void *p, size_t newcount, size_t size, size_t alignment, size_t offset)
-
Print verbose messages to stderr.
Definition: mimalloc-doc.h:699
+
Print verbose messages to stderr.
Definition: mimalloc-doc.h:728
void * mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset)
void * mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset)
Allocate size bytes aligned by alignment at a specified offset.
void mi_heap_delete(mi_heap_t *heap)
Delete a previously allocated heap.
-
OS tag to assign to mimalloc'd memory.
Definition: mimalloc-doc.h:711
+
OS tag to assign to mimalloc'd memory.
Definition: mimalloc-doc.h:741
mi_heap_t * mi_heap_get_default()
Get the default heap that is used for mi_malloc() et al.
+
int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs)
Reserve pages of huge OS pages (1GiB) at a specific numa_node, but stops after at most timeout_msecs ...
void * mi_aligned_alloc(size_t alignment, size_t size)
void * mi_valloc(size_t size)
void mi_thread_init(void)
Initialize mimalloc on a thread.
size_t mi_good_size(size_t size)
Return the used allocation size.
-
Experimental.
Definition: mimalloc-doc.h:709
+
void mi_stats_print(void *out)
Print the main statistics.
+
Experimental.
Definition: mimalloc-doc.h:740
void * mi_heap_recalloc_aligned(mi_heap_t *heap, void *p, size_t newcount, size_t size, size_t alignment)
void * mi_heap_mallocn(mi_heap_t *heap, size_t count, size_t size)
Allocate count elements in a specific heap.
-
An area of heap space contains blocks of a single size.
Definition: mimalloc-doc.h:656
-
Print statistics to stderr when the program is done.
Definition: mimalloc-doc.h:697
+
An area of heap space contains blocks of a single size.
Definition: mimalloc-doc.h:685
+
void mi_thread_stats_print_out(mi_output_fun *out, void *arg)
Print out heap statistics for this thread.
+
Print statistics to stderr when the program is done.
Definition: mimalloc-doc.h:726
void * mi_zalloc_aligned(size_t size, size_t alignment)
-
size_t reserved
bytes reserved for this area
Definition: mimalloc-doc.h:658
-
struct mi_heap_s mi_heap_t
Type of first-class heaps.
Definition: mimalloc-doc.h:452
-
size_t used
bytes in use by allocated blocks
Definition: mimalloc-doc.h:660
-
void() mi_deferred_free_fun(bool force, unsigned long long heartbeat)
Type of deferred free functions.
Definition: mimalloc-doc.h:333
+
size_t reserved
bytes reserved for this area
Definition: mimalloc-doc.h:687
+
struct mi_heap_s mi_heap_t
Type of first-class heaps.
Definition: mimalloc-doc.h:481
+
size_t used
bytes in use by allocated blocks
Definition: mimalloc-doc.h:689
+
void mi_register_deferred_free(mi_deferred_free_fun *deferred_free, void *arg)
Register a deferred free function.
void mi_free_size(void *p, size_t size)
void mi_collect(bool force)
Eagerly free memory.
void mi_heap_destroy(mi_heap_t *heap)
Destroy a heap, freeing all its still allocated blocks.
void * mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset)
-
Use large OS pages (2MiB in size) if possible.
Definition: mimalloc-doc.h:703
+
Use large OS pages (2MiB in size) if possible.
Definition: mimalloc-doc.h:732
void * mi_heap_reallocn(mi_heap_t *heap, void *p, size_t count, size_t size)
+
void mi_register_output(mi_output_fun *out, void *arg)
Register an output function.
void * mi_heap_malloc_small(mi_heap_t *heap, size_t size)
Allocate a small object in a specific heap.
void * mi_heap_realloc(mi_heap_t *heap, void *p, size_t newsize)
size_t mi_malloc_usable_size(const void *p)
+
void() mi_output_fun(const char *msg, void *arg)
Type of output functions.
Definition: mimalloc-doc.h:366
char * mi_strdup(const char *s)
Allocate and duplicate a string.
void * mi_heap_realloc_aligned_at(mi_heap_t *heap, void *p, size_t newsize, size_t alignment, size_t offset)
void * mi_reallocf(void *p, size_t newsize)
Re-allocate memory to newsize bytes,.
@@ -223,10 +224,11 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html','');});
long mi_option_get(mi_option_t option)
mi_heap_t * mi_heap_get_backing()
Get the backing heap.
void mi_free_aligned(void *p, size_t alignment)
+
Delay in milli-seconds before resetting a page (100ms by default)
Definition: mimalloc-doc.h:737
mi_heap_t * mi_heap_new()
Create a new heap that can be used for allocation.
void * mi_heap_malloc(mi_heap_t *heap, size_t size)
Allocate in a specific heap.
-
size_t committed
current committed bytes of this area
Definition: mimalloc-doc.h:659
-
mi_option_t
Runtime options.
Definition: mimalloc-doc.h:695
+
size_t committed
current committed bytes of this area
Definition: mimalloc-doc.h:688
+
mi_option_t
Runtime options.
Definition: mimalloc-doc.h:724
bool mi_heap_check_owned(mi_heap_t *heap, const void *p)
Check safely if any pointer is part of a heap.
mi_heap_t * mi_heap_set_default(mi_heap_t *heap)
Set the default heap to use for mi_malloc() et al.
diff --git a/docs/modules.html b/docs/modules.html index ca18e1eb..0bc6036d 100644 --- a/docs/modules.html +++ b/docs/modules.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
diff --git a/docs/navtreeindex0.js b/docs/navtreeindex0.js index 90be7d78..d1b0e072 100644 --- a/docs/navtreeindex0.js +++ b/docs/navtreeindex0.js @@ -29,25 +29,27 @@ var NAVTREEINDEX0 = "group__analysis.html#gadfa01e2900f0e5d515ad5506b26f6d65":[5,6,1], "group__analysis.html#structmi__heap__area__t":[5,6,0], "group__extended.html":[5,1], -"group__extended.html#ga089c859d9eddc5f9b4bd946cd53cebee":[5,1,17], -"group__extended.html#ga0ae4581e85453456a0d658b2b98bf7bf":[5,1,14], +"group__extended.html#ga089c859d9eddc5f9b4bd946cd53cebee":[5,1,19], +"group__extended.html#ga0ae4581e85453456a0d658b2b98bf7bf":[5,1,16], "group__extended.html#ga1ea64283508718d9d645c38efc2f4305":[5,1,0], -"group__extended.html#ga220f29f40a44404b0061c15bc1c31152":[5,1,18], -"group__extended.html#ga22213691c3ce5ab4d91b24aff1023529":[5,1,1], -"group__extended.html#ga24dc9cc6fca8daa2aa30aa8025467ce2":[5,1,8], -"group__extended.html#ga2664f36a2dd557741c429cb799f04641":[5,1,10], -"group__extended.html#ga2bed6d40b74591a67f81daea4b4a246f":[5,1,2], -"group__extended.html#ga3bb8468b8cfcc6e2a61d98aee85c5f99":[5,1,13], +"group__extended.html#ga220f29f40a44404b0061c15bc1c31152":[5,1,20], +"group__extended.html#ga256cc6f13a142deabbadd954a217e228":[5,1,14], +"group__extended.html#ga299dae78d25ce112e384a98b7309c5be":[5,1,1], +"group__extended.html#ga2d126e5c62d3badc35445e5d84166df2":[5,1,13], +"group__extended.html#ga3132f521fb756fc0e8ec0b74fb58df50":[5,1,11], +"group__extended.html#ga3460a6ca91af97be4058f523d3cb8ece":[5,1,8], +"group__extended.html#ga3bb8468b8cfcc6e2a61d98aee85c5f99":[5,1,15], "group__extended.html#ga421430e2226d7d468529cec457396756":[5,1,3], -"group__extended.html#ga489670a15d1a257ab4639e645ee4612a":[5,1,16], "group__extended.html#ga5f071b10d4df1c3658e04e7fd67a94e6":[5,1,5], "group__extended.html#ga7136c2e55cb22c98ecf95d08d6debb99":[5,1,7], -"group__extended.html#ga84a0c8b401e42eb5b1bce156852f44c5":[5,1,9], -"group__extended.html#ga854b1de8cb067c7316286c28b2fcd3d1":[5,1,11], -"group__extended.html#ga8ca07ccff283956d71f48272f4fd5c01":[5,1,12], +"group__extended.html#ga7795a13d20087447281858d2c771cca1":[5,1,10], +"group__extended.html#ga854b1de8cb067c7316286c28b2fcd3d1":[5,1,12], "group__extended.html#gaad25050b19f30cd79397b227e0157a3f":[5,1,6], +"group__extended.html#gab1dac8476c46cb9eecab767eb40c1525":[5,1,18], "group__extended.html#gac057927cd06c854b45fe7847e921bd47":[5,1,4], -"group__extended.html#gaf8e73efc2cbca9ebfdfb166983a04c17":[5,1,15], +"group__extended.html#gad823d23444a4b77a40f66bf075a98a0c":[5,1,2], +"group__extended.html#gae5b17ff027cd2150b43a33040250cf3f":[5,1,9], +"group__extended.html#gaf8e73efc2cbca9ebfdfb166983a04c17":[5,1,17], "group__heap.html":[5,3], "group__heap.html#ga00e95ba1e01acac3cfd95bb7a357a6f0":[5,3,20], "group__heap.html#ga08ca6419a5c057a4d965868998eef487":[5,3,3], @@ -99,19 +101,20 @@ var NAVTREEINDEX0 = "group__options.html#gaf84921c32375e25754dc2ee6a911fa60":[5,7,5], "group__options.html#gafebf7ed116adb38ae5218bc3ce06884c":[5,7,0], "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca0957ef73b2550764b4840edf48422fda":[5,7,0,0], -"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca17a190c25be381142d87e0468c4c068c":[5,7,0,11], +"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca0ac33a18f6b659fcfaf44efb0bab1b74":[5,7,0,11], +"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca154fe170131d5212cff57e22b99523c5":[5,7,0,10], +"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca17a190c25be381142d87e0468c4c068c":[5,7,0,13], "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca1e8de72c93da7ff22d91e1e27b52ac2b":[5,7,0,3], "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca2ecbe7ef32f5c84de3739aa4f0b805a1":[5,7,0,7], "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca32ce97ece29f69e82579679cf8a307ad":[5,7,0,4], "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca4192d491200d0055df0554d4cf65054e":[5,7,0,5], -"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca4b74ae2a69e445de6c2361b73c1d14bf":[5,7,0,13], -"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca5b4357b74be0d87568036c32eb1a2e4a":[5,7,0,14], +"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca4b74ae2a69e445de6c2361b73c1d14bf":[5,7,0,14], +"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca5b4357b74be0d87568036c32eb1a2e4a":[5,7,0,15], "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca7c8b7bf5281c581bad64f5daa6442777":[5,7,0,2], -"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cac2157a0cb79cd996c1db7d9f6a090c07":[5,7,0,9], -"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cac81ee965b130fa81238913a3c239d536":[5,7,0,10], +"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cac81ee965b130fa81238913a3c239d536":[5,7,0,12], "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884caca7ed041be3b0b9d0b82432c7bf41af2":[5,7,0,6], "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cada854dd272c66342f18a93ee254a2968":[5,7,0,8], -"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cafb121d30d87591850d5410ccc3a95c6d":[5,7,0,12], +"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cafb121d30d87591850d5410ccc3a95c6d":[5,7,0,9], "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cafbf4822e5c00732c5984b32a032837f0":[5,7,0,1], "group__posix.html":[5,8], "group__posix.html#ga06d07cf357bbac5c73ba5d0c0c421e17":[5,8,7], diff --git a/docs/overrides.html b/docs/overrides.html index 74ef9dbd..3b5d9bd3 100644 --- a/docs/overrides.html +++ b/docs/overrides.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
@@ -118,10 +118,10 @@ $(document).ready(function(){initNavTree('overrides.html','');});

Note that certain security restrictions may apply when doing this from the shell.

Note: unfortunately, at this time, dynamic overriding on macOS seems broken but it is actively worked on to fix this (see issue #50).

Windows

-

On Windows you need to link your program explicitly with the mimalloc DLL and use the C-runtime library as a DLL (using the /MD or /MDd switch). Moreover, you need to ensure the mimalloc-redirect.dll (or mimalloc-redirect32.dll) is available in the same folder as the mimalloc DLL at runtime (as it as referred to by the mimalloc DLL). The redirection DLL's ensure all calls to the C runtime malloc API get redirected to mimalloc.

-

To ensure the mimalloc DLL is loaded at run-time it is easiest to insert some call to the mimalloc API in the main function, like mi_version() (or use the /INCLUDE:mi_version switch on the linker). See the mimalloc-override-test project for an example on how to use this.

-

The environment variable MIMALLOC_DISABLE_REDIRECT=1 can be used to disable dynamic overriding at run-time. Use MIMALLOC_VERBOSE=1 to check if mimalloc successfully redirected.

-

(Note: in principle, it should be possible to patch existing executables that are linked with the dynamic C runtime (ucrtbase.dll) by just putting the mimalloc DLL into the import table (and putting mimalloc-redirect.dll in the same folder) Such patching can be done for example with CFF Explorer).

+

Overriding on Windows is robust but requires that you link your program explicitly with the mimalloc DLL and use the C-runtime library as a DLL (using the /MD or /MDd switch). Moreover, you need to ensure the mimalloc-redirect.dll (or mimalloc-redirect32.dll) is available in the same folder as the main mimalloc-override.dll at runtime (as it is a dependency). The redirection DLL ensures that all calls to the C runtime malloc API get redirected to mimalloc (in mimalloc-override.dll).

+

To ensure the mimalloc DLL is loaded at run-time it is easiest to insert some call to the mimalloc API in the main function, like mi_version() (or use the /INCLUDE:mi_version switch on the linker). See the mimalloc-override-test project for an example on how to use this. For best performance on Windows with C++, it is highly recommended to also override the new/delete operations (by including mimalloc-new-delete.h a single(!) source file in your project).

+

The environment variable MIMALLOC_DISABLE_REDIRECT=1 can be used to disable dynamic overriding at run-time. Use MIMALLOC_VERBOSE=1 to check if mimalloc was successfully redirected.

+

(Note: in principle, it is possible to patch existing executables that are linked with the dynamic C runtime (ucrtbase.dll) by just putting the mimalloc-override.dll into the import table (and putting mimalloc-redirect.dll in the same folder) Such patching can be done for example with CFF Explorer).

Static override

On Unix systems, you can also statically link with mimalloc to override the standard malloc interface. The recommended way is to link the final program with the mimalloc single object file (mimalloc-override.o). We use an object file instead of a library file as linkers give preference to that over archives to resolve symbols. To ensure that the standard malloc interface resolves to the mimalloc library, link it as the first object file. For example:

gcc -o myprogram mimalloc-override.o myfile1.c ...

List of Overrides:

diff --git a/docs/pages.html b/docs/pages.html index d0ee9f7a..ad5549bf 100644 --- a/docs/pages.html +++ b/docs/pages.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
diff --git a/docs/search/all_6.js b/docs/search/all_6.js index 2edb9986..cc7a26ec 100644 --- a/docs/search/all_6.js +++ b/docs/search/all_6.js @@ -10,7 +10,7 @@ var searchData= ['mi_5fcfree',['mi_cfree',['../group__posix.html#ga705dc7a64bffacfeeb0141501a5c35d7',1,'mimalloc-doc.h']]], ['mi_5fcheck_5fowned',['mi_check_owned',['../group__analysis.html#ga628c237489c2679af84a4d0d143b3dd5',1,'mimalloc-doc.h']]], ['mi_5fcollect',['mi_collect',['../group__extended.html#ga421430e2226d7d468529cec457396756',1,'mimalloc-doc.h']]], - ['mi_5fdeferred_5ffree_5ffun',['mi_deferred_free_fun',['../group__extended.html#ga22213691c3ce5ab4d91b24aff1023529',1,'mimalloc-doc.h']]], + ['mi_5fdeferred_5ffree_5ffun',['mi_deferred_free_fun',['../group__extended.html#ga299dae78d25ce112e384a98b7309c5be',1,'mimalloc-doc.h']]], ['mi_5fexpand',['mi_expand',['../group__malloc.html#gaaee66a1d483c3e28f585525fb96707e4',1,'mimalloc-doc.h']]], ['mi_5ffree',['mi_free',['../group__malloc.html#gaf2c7b89c327d1f60f59e68b9ea644d95',1,'mimalloc-doc.h']]], ['mi_5ffree_5faligned',['mi_free_aligned',['../group__posix.html#ga0d28d5cf61e6bfbb18c63092939fe5c9',1,'mimalloc-doc.h']]], @@ -76,7 +76,6 @@ var searchData= ['mi_5fnew_5faligned',['mi_new_aligned',['../group__posix.html#gaef2c2bdb4f70857902d3c8903ac095f3',1,'mimalloc-doc.h']]], ['mi_5fnew_5faligned_5fnothrow',['mi_new_aligned_nothrow',['../group__posix.html#gab5e29558926d934c3f1cae8c815f942c',1,'mimalloc-doc.h']]], ['mi_5fnew_5fnothrow',['mi_new_nothrow',['../group__posix.html#gaeaded64eda71ed6b1d569d3e723abc4a',1,'mimalloc-doc.h']]], - ['mi_5foption_5fcache_5freset',['mi_option_cache_reset',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cac2157a0cb79cd996c1db7d9f6a090c07',1,'mimalloc-doc.h']]], ['mi_5foption_5feager_5fcommit',['mi_option_eager_commit',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca1e8de72c93da7ff22d91e1e27b52ac2b',1,'mimalloc-doc.h']]], ['mi_5foption_5feager_5fcommit_5fdelay',['mi_option_eager_commit_delay',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca17a190c25be381142d87e0468c4c068c',1,'mimalloc-doc.h']]], ['mi_5foption_5feager_5fregion_5fcommit',['mi_option_eager_region_commit',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca32ce97ece29f69e82579679cf8a307ad',1,'mimalloc-doc.h']]], @@ -89,6 +88,7 @@ var searchData= ['mi_5foption_5fpage_5freset',['mi_option_page_reset',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cada854dd272c66342f18a93ee254a2968',1,'mimalloc-doc.h']]], ['mi_5foption_5freserve_5fhuge_5fos_5fpages',['mi_option_reserve_huge_os_pages',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884caca7ed041be3b0b9d0b82432c7bf41af2',1,'mimalloc-doc.h']]], ['mi_5foption_5freset_5fdecommits',['mi_option_reset_decommits',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cac81ee965b130fa81238913a3c239d536',1,'mimalloc-doc.h']]], + ['mi_5foption_5freset_5fdelay',['mi_option_reset_delay',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca154fe170131d5212cff57e22b99523c5',1,'mimalloc-doc.h']]], ['mi_5foption_5fsegment_5fcache',['mi_option_segment_cache',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca2ecbe7ef32f5c84de3739aa4f0b805a1',1,'mimalloc-doc.h']]], ['mi_5foption_5fsegment_5freset',['mi_option_segment_reset',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cafb121d30d87591850d5410ccc3a95c6d',1,'mimalloc-doc.h']]], ['mi_5foption_5fset',['mi_option_set',['../group__options.html#gaf84921c32375e25754dc2ee6a911fa60',1,'mimalloc-doc.h']]], @@ -96,8 +96,9 @@ var searchData= ['mi_5foption_5fshow_5ferrors',['mi_option_show_errors',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cafbf4822e5c00732c5984b32a032837f0',1,'mimalloc-doc.h']]], ['mi_5foption_5fshow_5fstats',['mi_option_show_stats',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca0957ef73b2550764b4840edf48422fda',1,'mimalloc-doc.h']]], ['mi_5foption_5ft',['mi_option_t',['../group__options.html#gafebf7ed116adb38ae5218bc3ce06884c',1,'mimalloc-doc.h']]], + ['mi_5foption_5fuse_5fnuma_5fnodes',['mi_option_use_numa_nodes',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca0ac33a18f6b659fcfaf44efb0bab1b74',1,'mimalloc-doc.h']]], ['mi_5foption_5fverbose',['mi_option_verbose',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca7c8b7bf5281c581bad64f5daa6442777',1,'mimalloc-doc.h']]], - ['mi_5foutput_5ffun',['mi_output_fun',['../group__extended.html#ga2bed6d40b74591a67f81daea4b4a246f',1,'mimalloc-doc.h']]], + ['mi_5foutput_5ffun',['mi_output_fun',['../group__extended.html#gad823d23444a4b77a40f66bf075a98a0c',1,'mimalloc-doc.h']]], ['mi_5fposix_5fmemalign',['mi_posix_memalign',['../group__posix.html#gacff84f226ba9feb2031b8992e5579447',1,'mimalloc-doc.h']]], ['mi_5fpvalloc',['mi_pvalloc',['../group__posix.html#gaeb325c39b887d3b90d85d1eb1712fb1e',1,'mimalloc-doc.h']]], ['mi_5frealloc',['mi_realloc',['../group__malloc.html#gaf11eb497da57bdfb2de65eb191c69db6',1,'mimalloc-doc.h']]], @@ -111,21 +112,22 @@ var searchData= ['mi_5frecalloc',['mi_recalloc',['../group__malloc.html#ga23a0fbb452b5dce8e31fab1a1958cacc',1,'mimalloc-doc.h']]], ['mi_5frecalloc_5faligned',['mi_recalloc_aligned',['../group__zeroinit.html#ga3e7e5c291acf1c7fd7ffd9914a9f945f',1,'mimalloc-doc.h']]], ['mi_5frecalloc_5faligned_5fat',['mi_recalloc_aligned_at',['../group__zeroinit.html#ga4ff5e92ad73585418a072c9d059e5cf9',1,'mimalloc-doc.h']]], - ['mi_5fregister_5fdeferred_5ffree',['mi_register_deferred_free',['../group__extended.html#ga24dc9cc6fca8daa2aa30aa8025467ce2',1,'mimalloc-doc.h']]], - ['mi_5fregister_5foutput',['mi_register_output',['../group__extended.html#ga84a0c8b401e42eb5b1bce156852f44c5',1,'mimalloc-doc.h']]], - ['mi_5freserve_5fhuge_5fos_5fpages',['mi_reserve_huge_os_pages',['../group__extended.html#ga2664f36a2dd557741c429cb799f04641',1,'mimalloc-doc.h']]], + ['mi_5fregister_5fdeferred_5ffree',['mi_register_deferred_free',['../group__extended.html#ga3460a6ca91af97be4058f523d3cb8ece',1,'mimalloc-doc.h']]], + ['mi_5fregister_5foutput',['mi_register_output',['../group__extended.html#gae5b17ff027cd2150b43a33040250cf3f',1,'mimalloc-doc.h']]], + ['mi_5freserve_5fhuge_5fos_5fpages_5fat',['mi_reserve_huge_os_pages_at',['../group__extended.html#ga7795a13d20087447281858d2c771cca1',1,'mimalloc-doc.h']]], + ['mi_5freserve_5fhuge_5fos_5fpages_5finterleave',['mi_reserve_huge_os_pages_interleave',['../group__extended.html#ga3132f521fb756fc0e8ec0b74fb58df50',1,'mimalloc-doc.h']]], ['mi_5frezalloc',['mi_rezalloc',['../group__zeroinit.html#ga8c292e142110229a2980b37ab036dbc6',1,'mimalloc-doc.h']]], ['mi_5frezalloc_5faligned',['mi_rezalloc_aligned',['../group__zeroinit.html#gacd71a7bce96aab38ae6de17af2eb2cf0',1,'mimalloc-doc.h']]], ['mi_5frezalloc_5faligned_5fat',['mi_rezalloc_aligned_at',['../group__zeroinit.html#gae8b358c417e61d5307da002702b0a8e1',1,'mimalloc-doc.h']]], ['mi_5fsmall_5fsize_5fmax',['MI_SMALL_SIZE_MAX',['../group__extended.html#ga1ea64283508718d9d645c38efc2f4305',1,'mimalloc-doc.h']]], ['mi_5fstats_5fmerge',['mi_stats_merge',['../group__extended.html#ga854b1de8cb067c7316286c28b2fcd3d1',1,'mimalloc-doc.h']]], - ['mi_5fstats_5fprint',['mi_stats_print',['../group__extended.html#ga8ca07ccff283956d71f48272f4fd5c01',1,'mimalloc-doc.h']]], + ['mi_5fstats_5fprint',['mi_stats_print',['../group__extended.html#ga2d126e5c62d3badc35445e5d84166df2',1,'mi_stats_print(void *out): mimalloc-doc.h'],['../group__extended.html#ga256cc6f13a142deabbadd954a217e228',1,'mi_stats_print(mi_output_fun *out, void *arg): mimalloc-doc.h']]], ['mi_5fstats_5freset',['mi_stats_reset',['../group__extended.html#ga3bb8468b8cfcc6e2a61d98aee85c5f99',1,'mimalloc-doc.h']]], ['mi_5fstrdup',['mi_strdup',['../group__malloc.html#gac7cffe13f1f458ed16789488bf92b9b2',1,'mimalloc-doc.h']]], ['mi_5fstrndup',['mi_strndup',['../group__malloc.html#gaaabf971c2571891433477e2d21a35266',1,'mimalloc-doc.h']]], ['mi_5fthread_5fdone',['mi_thread_done',['../group__extended.html#ga0ae4581e85453456a0d658b2b98bf7bf',1,'mimalloc-doc.h']]], ['mi_5fthread_5finit',['mi_thread_init',['../group__extended.html#gaf8e73efc2cbca9ebfdfb166983a04c17',1,'mimalloc-doc.h']]], - ['mi_5fthread_5fstats_5fprint',['mi_thread_stats_print',['../group__extended.html#ga489670a15d1a257ab4639e645ee4612a',1,'mimalloc-doc.h']]], + ['mi_5fthread_5fstats_5fprint_5fout',['mi_thread_stats_print_out',['../group__extended.html#gab1dac8476c46cb9eecab767eb40c1525',1,'mimalloc-doc.h']]], ['mi_5fusable_5fsize',['mi_usable_size',['../group__extended.html#ga089c859d9eddc5f9b4bd946cd53cebee',1,'mimalloc-doc.h']]], ['mi_5fvalloc',['mi_valloc',['../group__posix.html#ga73baaf5951f5165ba0763d0c06b6a93b',1,'mimalloc-doc.h']]], ['mi_5fzalloc',['mi_zalloc',['../group__malloc.html#gafdd9d8bb2986e668ba9884f28af38000',1,'mimalloc-doc.h']]], diff --git a/docs/search/all_c.html b/docs/search/all_c.html new file mode 100644 index 00000000..3de15867 --- /dev/null +++ b/docs/search/all_c.html @@ -0,0 +1,30 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/search/all_c.js b/docs/search/all_c.js new file mode 100644 index 00000000..2b9b4cea --- /dev/null +++ b/docs/search/all_c.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['zero_20initialized_20re_2dallocation',['Zero initialized re-allocation',['../group__zeroinit.html',1,'']]] +]; diff --git a/docs/search/all_d.html b/docs/search/all_d.html new file mode 100644 index 00000000..a2d5bd7e --- /dev/null +++ b/docs/search/all_d.html @@ -0,0 +1,30 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/search/all_d.js b/docs/search/all_d.js new file mode 100644 index 00000000..2b9b4cea --- /dev/null +++ b/docs/search/all_d.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['zero_20initialized_20re_2dallocation',['Zero initialized re-allocation',['../group__zeroinit.html',1,'']]] +]; diff --git a/docs/search/enumvalues_1.js b/docs/search/enumvalues_1.js index 3ed91631..3b712708 100644 --- a/docs/search/enumvalues_1.js +++ b/docs/search/enumvalues_1.js @@ -1,6 +1,5 @@ var searchData= [ - ['mi_5foption_5fcache_5freset',['mi_option_cache_reset',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cac2157a0cb79cd996c1db7d9f6a090c07',1,'mimalloc-doc.h']]], ['mi_5foption_5feager_5fcommit',['mi_option_eager_commit',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca1e8de72c93da7ff22d91e1e27b52ac2b',1,'mimalloc-doc.h']]], ['mi_5foption_5feager_5fcommit_5fdelay',['mi_option_eager_commit_delay',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca17a190c25be381142d87e0468c4c068c',1,'mimalloc-doc.h']]], ['mi_5foption_5feager_5fregion_5fcommit',['mi_option_eager_region_commit',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca32ce97ece29f69e82579679cf8a307ad',1,'mimalloc-doc.h']]], @@ -9,9 +8,11 @@ var searchData= ['mi_5foption_5fpage_5freset',['mi_option_page_reset',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cada854dd272c66342f18a93ee254a2968',1,'mimalloc-doc.h']]], ['mi_5foption_5freserve_5fhuge_5fos_5fpages',['mi_option_reserve_huge_os_pages',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884caca7ed041be3b0b9d0b82432c7bf41af2',1,'mimalloc-doc.h']]], ['mi_5foption_5freset_5fdecommits',['mi_option_reset_decommits',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cac81ee965b130fa81238913a3c239d536',1,'mimalloc-doc.h']]], + ['mi_5foption_5freset_5fdelay',['mi_option_reset_delay',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca154fe170131d5212cff57e22b99523c5',1,'mimalloc-doc.h']]], ['mi_5foption_5fsegment_5fcache',['mi_option_segment_cache',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca2ecbe7ef32f5c84de3739aa4f0b805a1',1,'mimalloc-doc.h']]], ['mi_5foption_5fsegment_5freset',['mi_option_segment_reset',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cafb121d30d87591850d5410ccc3a95c6d',1,'mimalloc-doc.h']]], ['mi_5foption_5fshow_5ferrors',['mi_option_show_errors',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cafbf4822e5c00732c5984b32a032837f0',1,'mimalloc-doc.h']]], ['mi_5foption_5fshow_5fstats',['mi_option_show_stats',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca0957ef73b2550764b4840edf48422fda',1,'mimalloc-doc.h']]], + ['mi_5foption_5fuse_5fnuma_5fnodes',['mi_option_use_numa_nodes',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca0ac33a18f6b659fcfaf44efb0bab1b74',1,'mimalloc-doc.h']]], ['mi_5foption_5fverbose',['mi_option_verbose',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca7c8b7bf5281c581bad64f5daa6442777',1,'mimalloc-doc.h']]] ]; diff --git a/docs/search/functions_0.js b/docs/search/functions_0.js index c5eeb540..d1d209a1 100644 --- a/docs/search/functions_0.js +++ b/docs/search/functions_0.js @@ -81,20 +81,21 @@ var searchData= ['mi_5frecalloc',['mi_recalloc',['../group__malloc.html#ga23a0fbb452b5dce8e31fab1a1958cacc',1,'mimalloc-doc.h']]], ['mi_5frecalloc_5faligned',['mi_recalloc_aligned',['../group__zeroinit.html#ga3e7e5c291acf1c7fd7ffd9914a9f945f',1,'mimalloc-doc.h']]], ['mi_5frecalloc_5faligned_5fat',['mi_recalloc_aligned_at',['../group__zeroinit.html#ga4ff5e92ad73585418a072c9d059e5cf9',1,'mimalloc-doc.h']]], - ['mi_5fregister_5fdeferred_5ffree',['mi_register_deferred_free',['../group__extended.html#ga24dc9cc6fca8daa2aa30aa8025467ce2',1,'mimalloc-doc.h']]], - ['mi_5fregister_5foutput',['mi_register_output',['../group__extended.html#ga84a0c8b401e42eb5b1bce156852f44c5',1,'mimalloc-doc.h']]], - ['mi_5freserve_5fhuge_5fos_5fpages',['mi_reserve_huge_os_pages',['../group__extended.html#ga2664f36a2dd557741c429cb799f04641',1,'mimalloc-doc.h']]], + ['mi_5fregister_5fdeferred_5ffree',['mi_register_deferred_free',['../group__extended.html#ga3460a6ca91af97be4058f523d3cb8ece',1,'mimalloc-doc.h']]], + ['mi_5fregister_5foutput',['mi_register_output',['../group__extended.html#gae5b17ff027cd2150b43a33040250cf3f',1,'mimalloc-doc.h']]], + ['mi_5freserve_5fhuge_5fos_5fpages_5fat',['mi_reserve_huge_os_pages_at',['../group__extended.html#ga7795a13d20087447281858d2c771cca1',1,'mimalloc-doc.h']]], + ['mi_5freserve_5fhuge_5fos_5fpages_5finterleave',['mi_reserve_huge_os_pages_interleave',['../group__extended.html#ga3132f521fb756fc0e8ec0b74fb58df50',1,'mimalloc-doc.h']]], ['mi_5frezalloc',['mi_rezalloc',['../group__zeroinit.html#ga8c292e142110229a2980b37ab036dbc6',1,'mimalloc-doc.h']]], ['mi_5frezalloc_5faligned',['mi_rezalloc_aligned',['../group__zeroinit.html#gacd71a7bce96aab38ae6de17af2eb2cf0',1,'mimalloc-doc.h']]], ['mi_5frezalloc_5faligned_5fat',['mi_rezalloc_aligned_at',['../group__zeroinit.html#gae8b358c417e61d5307da002702b0a8e1',1,'mimalloc-doc.h']]], ['mi_5fstats_5fmerge',['mi_stats_merge',['../group__extended.html#ga854b1de8cb067c7316286c28b2fcd3d1',1,'mimalloc-doc.h']]], - ['mi_5fstats_5fprint',['mi_stats_print',['../group__extended.html#ga8ca07ccff283956d71f48272f4fd5c01',1,'mimalloc-doc.h']]], + ['mi_5fstats_5fprint',['mi_stats_print',['../group__extended.html#ga2d126e5c62d3badc35445e5d84166df2',1,'mi_stats_print(void *out): mimalloc-doc.h'],['../group__extended.html#ga256cc6f13a142deabbadd954a217e228',1,'mi_stats_print(mi_output_fun *out, void *arg): mimalloc-doc.h']]], ['mi_5fstats_5freset',['mi_stats_reset',['../group__extended.html#ga3bb8468b8cfcc6e2a61d98aee85c5f99',1,'mimalloc-doc.h']]], ['mi_5fstrdup',['mi_strdup',['../group__malloc.html#gac7cffe13f1f458ed16789488bf92b9b2',1,'mimalloc-doc.h']]], ['mi_5fstrndup',['mi_strndup',['../group__malloc.html#gaaabf971c2571891433477e2d21a35266',1,'mimalloc-doc.h']]], ['mi_5fthread_5fdone',['mi_thread_done',['../group__extended.html#ga0ae4581e85453456a0d658b2b98bf7bf',1,'mimalloc-doc.h']]], ['mi_5fthread_5finit',['mi_thread_init',['../group__extended.html#gaf8e73efc2cbca9ebfdfb166983a04c17',1,'mimalloc-doc.h']]], - ['mi_5fthread_5fstats_5fprint',['mi_thread_stats_print',['../group__extended.html#ga489670a15d1a257ab4639e645ee4612a',1,'mimalloc-doc.h']]], + ['mi_5fthread_5fstats_5fprint_5fout',['mi_thread_stats_print_out',['../group__extended.html#gab1dac8476c46cb9eecab767eb40c1525',1,'mimalloc-doc.h']]], ['mi_5fusable_5fsize',['mi_usable_size',['../group__extended.html#ga089c859d9eddc5f9b4bd946cd53cebee',1,'mimalloc-doc.h']]], ['mi_5fvalloc',['mi_valloc',['../group__posix.html#ga73baaf5951f5165ba0763d0c06b6a93b',1,'mimalloc-doc.h']]], ['mi_5fzalloc',['mi_zalloc',['../group__malloc.html#gafdd9d8bb2986e668ba9884f28af38000',1,'mimalloc-doc.h']]], diff --git a/docs/search/functions_1.html b/docs/search/functions_1.html new file mode 100644 index 00000000..bfcf880b --- /dev/null +++ b/docs/search/functions_1.html @@ -0,0 +1,30 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/search/functions_1.js b/docs/search/functions_1.js new file mode 100644 index 00000000..06dbb19b --- /dev/null +++ b/docs/search/functions_1.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['void',['void',['../group__extended.html#gadc49452cc1634aa03ac83ffe9b97a19c',1,'mimalloc-doc.h']]] +]; diff --git a/docs/search/groups_7.html b/docs/search/groups_7.html new file mode 100644 index 00000000..6a24e7cf --- /dev/null +++ b/docs/search/groups_7.html @@ -0,0 +1,30 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/search/groups_7.js b/docs/search/groups_7.js new file mode 100644 index 00000000..2b9b4cea --- /dev/null +++ b/docs/search/groups_7.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['zero_20initialized_20re_2dallocation',['Zero initialized re-allocation',['../group__zeroinit.html',1,'']]] +]; diff --git a/docs/search/pages_4.html b/docs/search/pages_4.html new file mode 100644 index 00000000..021d277a --- /dev/null +++ b/docs/search/pages_4.html @@ -0,0 +1,30 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/search/pages_4.js b/docs/search/pages_4.js new file mode 100644 index 00000000..b47682a4 --- /dev/null +++ b/docs/search/pages_4.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['using_20the_20library',['Using the library',['../using.html',1,'']]] +]; diff --git a/docs/search/typedefs_0.js b/docs/search/typedefs_0.js index c6f0f7ec..17816828 100644 --- a/docs/search/typedefs_0.js +++ b/docs/search/typedefs_0.js @@ -1,7 +1,7 @@ var searchData= [ ['mi_5fblock_5fvisit_5ffun',['mi_block_visit_fun',['../group__analysis.html#gadfa01e2900f0e5d515ad5506b26f6d65',1,'mimalloc-doc.h']]], - ['mi_5fdeferred_5ffree_5ffun',['mi_deferred_free_fun',['../group__extended.html#ga22213691c3ce5ab4d91b24aff1023529',1,'mimalloc-doc.h']]], + ['mi_5fdeferred_5ffree_5ffun',['mi_deferred_free_fun',['../group__extended.html#ga299dae78d25ce112e384a98b7309c5be',1,'mimalloc-doc.h']]], ['mi_5fheap_5ft',['mi_heap_t',['../group__heap.html#ga34a47cde5a5b38c29f1aa3c5e76943c2',1,'mimalloc-doc.h']]], - ['mi_5foutput_5ffun',['mi_output_fun',['../group__extended.html#ga2bed6d40b74591a67f81daea4b4a246f',1,'mimalloc-doc.h']]] + ['mi_5foutput_5ffun',['mi_output_fun',['../group__extended.html#gad823d23444a4b77a40f66bf075a98a0c',1,'mimalloc-doc.h']]] ]; diff --git a/docs/search/typedefs_1.html b/docs/search/typedefs_1.html new file mode 100644 index 00000000..c8a02685 --- /dev/null +++ b/docs/search/typedefs_1.html @@ -0,0 +1,30 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/search/typedefs_1.js b/docs/search/typedefs_1.js new file mode 100644 index 00000000..ecccb16a --- /dev/null +++ b/docs/search/typedefs_1.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['heartbeat',['heartbeat',['../group__extended.html#ga411f6e94394a2400aa460c796beff8d8',1,'mimalloc-doc.h']]] +]; diff --git a/docs/search/typedefs_2.html b/docs/search/typedefs_2.html new file mode 100644 index 00000000..86a91955 --- /dev/null +++ b/docs/search/typedefs_2.html @@ -0,0 +1,30 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/search/typedefs_2.js b/docs/search/typedefs_2.js new file mode 100644 index 00000000..2af06079 --- /dev/null +++ b/docs/search/typedefs_2.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['mi_5fblock_5fvisit_5ffun',['mi_block_visit_fun',['../group__analysis.html#gadfa01e2900f0e5d515ad5506b26f6d65',1,'mimalloc-doc.h']]], + ['mi_5fheap_5ft',['mi_heap_t',['../group__heap.html#ga34a47cde5a5b38c29f1aa3c5e76943c2',1,'mimalloc-doc.h']]] +]; diff --git a/docs/using.html b/docs/using.html index 9b7305b0..eae37a5e 100644 --- a/docs/using.html +++ b/docs/using.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
@@ -102,7 +102,11 @@ $(document).ready(function(){initNavTree('using.html','');});
Using the library
-

The preferred usage is including <mimalloc.h>, linking with the shared- or static library, and using the mi_malloc API exclusively for allocation. For example,

gcc -o myprogram -lmimalloc myfile.c

mimalloc uses only safe OS calls (mmap and VirtualAlloc) and can co-exist with other allocators linked to the same program. If you use cmake, you can simply use:

find_package(mimalloc 1.0 REQUIRED)

in your CMakeLists.txt to find a locally installed mimalloc. Then use either:

target_link_libraries(myapp PUBLIC mimalloc)

to link with the shared (dynamic) library, or:

target_link_libraries(myapp PUBLIC mimalloc-static)

to link with the static library. See test\CMakeLists.txt for an example.

+

Build

+

The preferred usage is including <mimalloc.h>, linking with the shared- or static library, and using the mi_malloc API exclusively for allocation. For example,

gcc -o myprogram -lmimalloc myfile.c

mimalloc uses only safe OS calls (mmap and VirtualAlloc) and can co-exist with other allocators linked to the same program. If you use cmake, you can simply use:

find_package(mimalloc 1.0 REQUIRED)

in your CMakeLists.txt to find a locally installed mimalloc. Then use either:

target_link_libraries(myapp PUBLIC mimalloc)

to link with the shared (dynamic) library, or:

target_link_libraries(myapp PUBLIC mimalloc-static)

to link with the static library. See test\CMakeLists.txt for an example.

+

C++

+

For best performance in C++ programs, it is also recommended to override the global new and delete operators. For convience, mimalloc provides mimalloc-new-delete.h which does this for you – just include it in a single(!) source file in your project.

+

In C++, mimalloc also provides the mi_stl_allocator struct which implements the std::allocator interface. For example:

std::vector<some_struct, mi_stl_allocator<some_struct>> vec;
vec.push_back(some_struct());

Statistics

You can pass environment variables to print verbose messages (MIMALLOC_VERBOSE=1) and statistics (MIMALLOC_SHOW_STATS=1) (in the debug version):

> env MIMALLOC_SHOW_STATS=1 ./cfrac 175451865205073170563711388363
175451865205073170563711388363 = 374456281610909315237213 * 468551
heap stats: peak total freed unit
normal 2: 16.4 kb 17.5 mb 17.5 mb 16 b ok
normal 3: 16.3 kb 15.2 mb 15.2 mb 24 b ok
normal 4: 64 b 4.6 kb 4.6 kb 32 b ok
normal 5: 80 b 118.4 kb 118.4 kb 40 b ok
normal 6: 48 b 48 b 48 b 48 b ok
normal 17: 960 b 960 b 960 b 320 b ok
heap stats: peak total freed unit
normal: 33.9 kb 32.8 mb 32.8 mb 1 b ok
huge: 0 b 0 b 0 b 1 b ok
total: 33.9 kb 32.8 mb 32.8 mb 1 b ok
malloc requested: 32.8 mb
committed: 58.2 kb 58.2 kb 58.2 kb 1 b ok
reserved: 2.0 mb 2.0 mb 2.0 mb 1 b ok
reset: 0 b 0 b 0 b 1 b ok
segments: 1 1 1
-abandoned: 0
pages: 6 6 6
-abandoned: 0
mmaps: 3
mmap fast: 0
mmap slow: 1
threads: 0
elapsed: 2.022s
process: user: 1.781s, system: 0.016s, faults: 756, reclaims: 0, rss: 2.7 mb

The above model of using the mi_ prefixed API is not always possible though in existing programs that already use the standard malloc interface, and another option is to override the standard malloc interface completely and redirect all calls to the mimalloc library instead.

See Overriding Malloc for more info.

From 9453d8b4683ccc347458666bf3a10de7fa8c2638 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 17 Jan 2020 15:39:06 -0800 Subject: [PATCH 125/179] update documentation --- doc/doxyfile | 4 ++-- doc/mimalloc-doc.h | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/doc/doxyfile b/doc/doxyfile index 11d71667..91adbeb8 100644 --- a/doc/doxyfile +++ b/doc/doxyfile @@ -38,7 +38,7 @@ PROJECT_NAME = mi-malloc # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = 1.0 +PROJECT_NUMBER = 1.4 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a @@ -1235,7 +1235,7 @@ HTML_EXTRA_STYLESHEET = mimalloc-doxygen.css # files will be copied as-is; there are no commands or markers available. # This tag requires that the tag GENERATE_HTML is set to YES. -HTML_EXTRA_FILES = +HTML_EXTRA_FILES = # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen # will adjust the colors in the style sheet and background images according to diff --git a/doc/mimalloc-doc.h b/doc/mimalloc-doc.h index 71cc1589..ea526b12 100644 --- a/doc/mimalloc-doc.h +++ b/doc/mimalloc-doc.h @@ -784,6 +784,9 @@ void mi_free_aligned(void* p, size_t alignment); /// raise `std::bad_alloc` exception on failure. void* mi_new(std::size_t n) noexcept(false); +/// raise `std::bad_alloc` exception on failure or overflow. +void* mi_new_n(size_t count, size_t size) noexcept(false); + /// raise `std::bad_alloc` exception on failure. void* mi_new_aligned(std::size_t n, std::align_val_t alignment) noexcept(false); From f4ee1760b8f58bdb4967e47f34c2495387c2cff2 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 17 Jan 2020 15:39:41 -0800 Subject: [PATCH 126/179] Suppress C source compiled as C++ warning on clang --- CMakeLists.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a894de9b..8f05c883 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -96,7 +96,10 @@ endif() if(MI_USE_CXX MATCHES "ON") message(STATUS "Use the C++ compiler to compile (MI_USE_CXX=ON)") set_source_files_properties(${mi_sources} PROPERTIES LANGUAGE CXX ) - set_source_files_properties(src/static.c test/test-api.c PROPERTIES LANGUAGE CXX ) + set_source_files_properties(src/static.c test/test-api.c test/test-stress.c PROPERTIES LANGUAGE CXX ) + if(CMAKE_CXX_COMPILER_ID MATCHES "AppleClang|Clang") + list(APPEND mi_cflags -Wno-deprecated) + endif() endif() # Compiler flags From 6dd636d82db6516f325c1c3b2695e20a024230ce Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 17 Jan 2020 15:41:52 -0800 Subject: [PATCH 127/179] improve STL allocator using mi_new_n and removing unused parameter names; follow up from pr #193 and #188 --- include/mimalloc.h | 31 +++++++++++++++++------------- src/alloc.c | 47 ++++++++++++++++++++++++++++++---------------- 2 files changed, 49 insertions(+), 29 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 59f394a7..485978e6 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -326,10 +326,11 @@ mi_decl_export void mi_free_size(void* p, size_t size) mi_attr_noexcept; mi_decl_export void mi_free_size_aligned(void* p, size_t size, size_t alignment) mi_attr_noexcept; mi_decl_export void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept; -mi_decl_export void* mi_new(size_t n) mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_export void* mi_new_aligned(size_t n, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_export void* mi_new_nothrow(size_t n) mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_export void* mi_new_aligned_nothrow(size_t n, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_export void* mi_new(size_t size) mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_export void* mi_new_n(size_t count, size_t size) mi_attr_malloc mi_attr_alloc_size2(1,2); +mi_decl_export void* mi_new_aligned(size_t size, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_export void* mi_new_nothrow(size_t size) mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_export void* mi_new_aligned_nothrow(size_t size, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1); #ifdef __cplusplus } @@ -347,21 +348,25 @@ mi_decl_export void* mi_new_aligned_nothrow(size_t n, size_t alignment) mi_attr_ template struct mi_stl_allocator { typedef T value_type; -#if (__cplusplus >= 201103L) || (_MSC_VER > 1900) // C++11 + #if (__cplusplus >= 201103L) || (_MSC_VER > 1900) // C++11 using propagate_on_container_copy_assignment = std::true_type; using propagate_on_container_move_assignment = std::true_type; using propagate_on_container_swap = std::true_type; using is_always_equal = std::true_type; -#endif - mi_stl_allocator() mi_attr_noexcept {} - mi_stl_allocator(const mi_stl_allocator& other) mi_attr_noexcept { (void)other; } - template mi_stl_allocator(const mi_stl_allocator& other) mi_attr_noexcept { (void)other; } - T* allocate(size_t n, const void* hint = 0) { (void)hint; return (T*)mi_mallocn(n, sizeof(T)); } - void deallocate(T* p, size_t n) { mi_free_size(p,n); } + #endif + mi_stl_allocator() mi_attr_noexcept { } + mi_stl_allocator(const mi_stl_allocator& ) mi_attr_noexcept { } + template mi_stl_allocator(const mi_stl_allocator& ) mi_attr_noexcept { } + void deallocate(T* p, size_t size) { mi_free_size(p, size); } + #if (__cplusplus >= 201703L) // C++17 + T* allocate(size_t count) { return (T*)mi_new_n(count, sizeof(T)); } + #else + T* allocate(size_t count, const void* hint = 0) { (void)hint; return (T*)mi_new_n(count, sizeof(T)); } + #endif }; -template bool operator==(const mi_stl_allocator& lhs, const mi_stl_allocator& rhs) mi_attr_noexcept { (void)lhs; (void)rhs; return true; } -template bool operator!=(const mi_stl_allocator& lhs, const mi_stl_allocator& rhs) mi_attr_noexcept { (void)lhs; (void)rhs; return false; } +template bool operator==(const mi_stl_allocator& , const mi_stl_allocator& ) mi_attr_noexcept { return true; } +template bool operator!=(const mi_stl_allocator& , const mi_stl_allocator& ) mi_attr_noexcept { return false; } #endif // __cplusplus #endif diff --git a/src/alloc.c b/src/alloc.c index d66c629b..37d43d9f 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -678,36 +678,51 @@ static bool mi_try_new_handler(bool nothrow) { } #endif -static mi_decl_noinline void* mi_try_new(size_t n, bool nothrow ) { +static mi_decl_noinline void* mi_try_new(size_t size, bool nothrow ) { void* p = NULL; while(p == NULL && mi_try_new_handler(nothrow)) { - p = mi_malloc(n); + p = mi_malloc(size); } return p; } -void* mi_new(size_t n) { - void* p = mi_malloc(n); - if (mi_unlikely(p == NULL)) return mi_try_new(n,false); +void* mi_new(size_t size) { + void* p = mi_malloc(size); + if (mi_unlikely(p == NULL)) return mi_try_new(size,false); return p; } -void* mi_new_aligned(size_t n, size_t alignment) { +void* mi_new_nothrow(size_t size) { + void* p = mi_malloc(size); + if (mi_unlikely(p == NULL)) return mi_try_new(size, true); + return p; +} + +void* mi_new_aligned(size_t size, size_t alignment) { void* p; - do { p = mi_malloc_aligned(n, alignment); } + do { + p = mi_malloc_aligned(size, alignment); + } while(p == NULL && mi_try_new_handler(false)); return p; } -void* mi_new_nothrow(size_t n) { - void* p = mi_malloc(n); - if (mi_unlikely(p == NULL)) return mi_try_new(n,true); +void* mi_new_aligned_nothrow(size_t size, size_t alignment) { + void* p; + do { + p = mi_malloc_aligned(size, alignment); + } + while(p == NULL && mi_try_new_handler(true)); return p; } -void* mi_new_aligned_nothrow(size_t n, size_t alignment) { - void* p; - do { p = mi_malloc_aligned(n, alignment); } - while (p == NULL && mi_try_new_handler(true)); - return p; -} +void* mi_new_n(size_t count, size_t size) { + size_t total; + if (mi_unlikely(mi_mul_overflow(count, size, &total))) { + mi_try_new_handler(false); // on overflow we invoke the try_new_handler once to potentially throw std::bad_alloc + return NULL; + } + else { + return mi_new(total); + } +} \ No newline at end of file From dbe721de393cbfce7a699cc6f1b5cf5955a85f7a Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 17 Jan 2020 15:45:12 -0800 Subject: [PATCH 128/179] dont compile test-stress.c as C++ code (or we get atomic compilation errors) --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8f05c883..27729584 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -96,7 +96,7 @@ endif() if(MI_USE_CXX MATCHES "ON") message(STATUS "Use the C++ compiler to compile (MI_USE_CXX=ON)") set_source_files_properties(${mi_sources} PROPERTIES LANGUAGE CXX ) - set_source_files_properties(src/static.c test/test-api.c test/test-stress.c PROPERTIES LANGUAGE CXX ) + set_source_files_properties(src/static.c test/test-api.c PROPERTIES LANGUAGE CXX ) if(CMAKE_CXX_COMPILER_ID MATCHES "AppleClang|Clang") list(APPEND mi_cflags -Wno-deprecated) endif() From 3e982a3813f191b32fee60b06fba758a98f3a633 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 17 Jan 2020 15:58:43 -0800 Subject: [PATCH 129/179] fix STL deallocate passing count (instead of size) to mi_free_size --- include/mimalloc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 485978e6..67ff1a35 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -357,7 +357,7 @@ template struct mi_stl_allocator { mi_stl_allocator() mi_attr_noexcept { } mi_stl_allocator(const mi_stl_allocator& ) mi_attr_noexcept { } template mi_stl_allocator(const mi_stl_allocator& ) mi_attr_noexcept { } - void deallocate(T* p, size_t size) { mi_free_size(p, size); } + void deallocate(T* p, size_t /* count */) { mi_free(p); } #if (__cplusplus >= 201703L) // C++17 T* allocate(size_t count) { return (T*)mi_new_n(count, sizeof(T)); } #else From dc5838896837cc4bd6bc1583f390fa51e389ae48 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 17 Jan 2020 19:59:55 -0800 Subject: [PATCH 130/179] Add ability to register custom error function called on various error conditions; including ENOMEM --- doc/mimalloc-doc.h | 24 +++++++++ docs/group__extended.html | 74 ++++++++++++++++++++++++++ docs/group__extended.js | 2 + docs/group__posix.html | 41 +++++++++++++++ docs/group__posix.js | 1 + docs/mimalloc-doc_8h_source.html | 55 ++++++++++---------- docs/navtreeindex0.js | 51 +++++++++--------- docs/search/all_6.js | 3 ++ docs/search/functions_0.js | 2 + docs/search/typedefs_0.js | 1 + include/mimalloc-internal.h | 89 +++++++++++++++++++++----------- include/mimalloc.h | 15 ++++-- src/alloc-aligned.c | 6 +-- src/alloc.c | 27 ++++------ src/arena.c | 6 +-- src/init.c | 2 +- src/options.c | 44 ++++++++++++---- src/os.c | 10 ++-- src/page.c | 10 ++-- test/test-api.c | 6 ++- 20 files changed, 342 insertions(+), 127 deletions(-) diff --git a/doc/mimalloc-doc.h b/doc/mimalloc-doc.h index ea526b12..ca744e4c 100644 --- a/doc/mimalloc-doc.h +++ b/doc/mimalloc-doc.h @@ -373,6 +373,30 @@ typedef void (mi_output_fun)(const char* msg, void* arg); /// like verbose or warning messages. void mi_register_output(mi_output_fun* out, void* arg); +/// Type of error callback functions. +/// @param err Error code (see mi_register_error() for a complete list). +/// @param arg Argument that was passed at registration to hold extra state. +/// +/// @see mi_register_error() +typedef void (mi_error_fun)(int err, void* arg); + +/// Register an error callback function. +/// @param errfun The error function that is called on an error (use \a NULL for default) +/// @param arg Extra argument that will be passed on to the error function. +/// +/// The \a errfun function is called on an error in mimalloc after emitting +/// an error message (through the output function). It as always legal to just +/// return from the \a errfun function in which case allocation functions generally +/// return \a NULL or ignore the condition. The default function only calls abort() +/// when compiled in secure mode with an \a EFAULT error. The possible error +/// codes are: +/// * \a EAGAIN: Double free was detected (only in debug and secure mode). +/// * \a EFAULT: Corrupted free list or meta-data was detected (only in debug and secure mode). +/// * \a ENOMEM: Not enough memory available to satisfy the request. +/// * \a EOVERFLOW: Too large a request, for example in mi_calloc(), the \a count and \a size parameters are too large. +/// * \a EINVAL: Trying to free or re-allocate an invalid pointer. +void mi_register_error(mi_error_fun* errfun, void* arg); + /// Is a pointer part of our heap? /// @param p The pointer to check. /// @returns \a true if this is a pointer into our heap. diff --git a/docs/group__extended.html b/docs/group__extended.html index 85ea3624..9e2a2efc 100644 --- a/docs/group__extended.html +++ b/docs/group__extended.html @@ -124,6 +124,9 @@ Typedefs typedef void() mi_output_fun(const char *msg, void *arg)  Type of output functions. More...
  +typedef void() mi_error_fun(int err, void *arg) + Type of error callback functions. More...
+  @@ -169,6 +172,9 @@ Functions + + + @@ -225,6 +231,30 @@ Functions
See also
mi_register_deferred_free
+ + + +

◆ mi_error_fun

+ +
+
+

Functions

void mi_register_output (mi_output_fun *out, void *arg)
 Register an output function. More...
 
void mi_register_error (mi_error_fun *errfun, void *arg)
 Register an error callback function. More...
 
bool mi_is_in_heap_region (const void *p)
 Is a pointer part of our heap? More...
 
+ + + +
typedef void() mi_error_fun(int err, void *arg)
+
+ +

Type of error callback functions.

+
Parameters
+ + + +
errError code (see mi_register_error() for a complete list).
argArgument that was passed at registration to hold extra state.
+
+
+
See also
mi_register_error()
+
@@ -419,6 +449,50 @@ Functions

Some runtime systems use deferred free-ing, for example when using reference counting to limit the worst case free time. Such systems can register (re-entrant) deferred free function to free more memory on demand. When the force parameter is true all possible memory should be freed. The per-thread heartbeat parameter is monotonically increasing and guaranteed to be deterministic if the program allocates deterministically. The deferred_free function is guaranteed to be called deterministically after some number of allocations (regardless of freeing or available free memory). At most one deferred_free function can be active.

+
+
+ +

◆ mi_register_error()

+ +
+
+ + + + + + + + + + + + + + + + + + +
void mi_register_error (mi_error_funerrfun,
void * arg 
)
+
+ +

Register an error callback function.

+
Parameters
+ + + +
errfunThe error function that is called on an error (use NULL for default)
argExtra argument that will be passed on to the error function.
+
+
+

The errfun function is called on an error in mimalloc after emitting an error message (through the output function). It as always legal to just return from the errfun function in which case allocation functions generally return NULL or ignore the condition. The default function only calls abort() when compiled in secure mode with an EFAULT error. The possible error codes are:

    +
  • EAGAIN: Double free was detected (only in debug and secure mode).
  • +
  • EFAULT: Corrupted free list or meta-data was detected (only in debug and secure mode).
  • +
  • ENOMEM: Not enough memory available to satisfy the request.
  • +
  • EOVERFLOW: Too large a request, for example in mi_calloc(), the count and size parameters are too large.
  • +
  • EINVAL: Trying to free or re-allocate an invalid pointer.
  • +
+
diff --git a/docs/group__extended.js b/docs/group__extended.js index 7152b518..ff8891b2 100644 --- a/docs/group__extended.js +++ b/docs/group__extended.js @@ -2,6 +2,7 @@ var group__extended = [ [ "MI_SMALL_SIZE_MAX", "group__extended.html#ga1ea64283508718d9d645c38efc2f4305", null ], [ "mi_deferred_free_fun", "group__extended.html#ga299dae78d25ce112e384a98b7309c5be", null ], + [ "mi_error_fun", "group__extended.html#ga251d369cda3f1c2a955c555486ed90e5", null ], [ "mi_output_fun", "group__extended.html#gad823d23444a4b77a40f66bf075a98a0c", null ], [ "mi_collect", "group__extended.html#ga421430e2226d7d468529cec457396756", null ], [ "mi_good_size", "group__extended.html#gac057927cd06c854b45fe7847e921bd47", null ], @@ -9,6 +10,7 @@ var group__extended = [ "mi_is_redirected", "group__extended.html#gaad25050b19f30cd79397b227e0157a3f", null ], [ "mi_malloc_small", "group__extended.html#ga7136c2e55cb22c98ecf95d08d6debb99", null ], [ "mi_register_deferred_free", "group__extended.html#ga3460a6ca91af97be4058f523d3cb8ece", null ], + [ "mi_register_error", "group__extended.html#gaa1d55e0e894be240827e5d87ec3a1f45", null ], [ "mi_register_output", "group__extended.html#gae5b17ff027cd2150b43a33040250cf3f", null ], [ "mi_reserve_huge_os_pages_at", "group__extended.html#ga7795a13d20087447281858d2c771cca1", null ], [ "mi_reserve_huge_os_pages_interleave", "group__extended.html#ga3132f521fb756fc0e8ec0b74fb58df50", null ], diff --git a/docs/group__posix.html b/docs/group__posix.html index 65e8ff7e..eaa4a10f 100644 --- a/docs/group__posix.html +++ b/docs/group__posix.html @@ -140,6 +140,9 @@ Functions void * mi_new (std::size_t n) noexcept(false)  raise std::bad_alloc exception on failure. More...
  +void * mi_new_n (size_t count, size_t size) noexcept(false) + raise std::bad_alloc exception on failure or overflow. More...
+  void * mi_new_aligned (std::size_t n, std::align_val_t alignment) noexcept(false)  raise std::bad_alloc exception on failure. More...
  @@ -484,6 +487,44 @@ Functions

return NULL on failure.

+
+
+ +

◆ mi_new_n()

+ +
+
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
void* mi_new_n (size_t count,
size_t size 
)
+
+noexcept
+
+ +

raise std::bad_alloc exception on failure or overflow.

+
diff --git a/docs/group__posix.js b/docs/group__posix.js index 5584092b..0f2b895d 100644 --- a/docs/group__posix.js +++ b/docs/group__posix.js @@ -12,6 +12,7 @@ var group__posix = [ "mi_new", "group__posix.html#gaad048a9fce3d02c5909cd05c6ec24545", null ], [ "mi_new_aligned", "group__posix.html#gaef2c2bdb4f70857902d3c8903ac095f3", null ], [ "mi_new_aligned_nothrow", "group__posix.html#gab5e29558926d934c3f1cae8c815f942c", null ], + [ "mi_new_n", "group__posix.html#gae7bc4f56cd57ed3359060ff4f38bda81", null ], [ "mi_new_nothrow", "group__posix.html#gaeaded64eda71ed6b1d569d3e723abc4a", null ], [ "mi_posix_memalign", "group__posix.html#gacff84f226ba9feb2031b8992e5579447", null ], [ "mi_pvalloc", "group__posix.html#gaeb325c39b887d3b90d85d1eb1712fb1e", null ], diff --git a/docs/mimalloc-doc_8h_source.html b/docs/mimalloc-doc_8h_source.html index c240f151..12d0f799 100644 --- a/docs/mimalloc-doc_8h_source.html +++ b/docs/mimalloc-doc_8h_source.html @@ -102,7 +102,7 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html','');});
mimalloc-doc.h
-
1 /* ----------------------------------------------------------------------------
2 Copyright (c) 2018, Microsoft Research, Daan Leijen
3 This is free software; you can redistribute it and/or modify it under the
4 terms of the MIT license. A copy of the license can be found in the file
5 "LICENSE" at the root of this distribution.
6 -----------------------------------------------------------------------------*/
7 
8 #error "documentation file only!"
9 
10 
81 
85 
89 void mi_free(void* p);
90 
95 void* mi_malloc(size_t size);
96 
101 void* mi_zalloc(size_t size);
102 
112 void* mi_calloc(size_t count, size_t size);
113 
126 void* mi_realloc(void* p, size_t newsize);
127 
138 void* mi_recalloc(void* p, size_t count, size_t size);
139 
153 void* mi_expand(void* p, size_t newsize);
154 
164 void* mi_mallocn(size_t count, size_t size);
165 
175 void* mi_reallocn(void* p, size_t count, size_t size);
176 
193 void* mi_reallocf(void* p, size_t newsize);
194 
195 
204 char* mi_strdup(const char* s);
205 
215 char* mi_strndup(const char* s, size_t n);
216 
229 char* mi_realpath(const char* fname, char* resolved_name);
230 
232 
233 // ------------------------------------------------------
234 // Extended functionality
235 // ------------------------------------------------------
236 
240 
243 #define MI_SMALL_SIZE_MAX (128*sizeof(void*))
244 
252 void* mi_malloc_small(size_t size);
253 
261 void* mi_zalloc_small(size_t size);
262 
277 size_t mi_usable_size(void* p);
278 
288 size_t mi_good_size(size_t size);
289 
297 void mi_collect(bool force);
298 
303 void mi_stats_print(void* out);
304 
310 void mi_stats_print(mi_output_fun* out, void* arg);
311 
313 void mi_stats_reset(void);
314 
316 void mi_stats_merge(void);
317 
321 void mi_thread_init(void);
322 
327 void mi_thread_done(void);
328 
334 void mi_thread_stats_print_out(mi_output_fun* out, void* arg);
335 
342 typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat, void* arg);
343 
359 void mi_register_deferred_free(mi_deferred_free_fun* deferred_free, void* arg);
360 
366 typedef void (mi_output_fun)(const char* msg, void* arg);
367 
374 void mi_register_output(mi_output_fun* out, void* arg);
375 
380 bool mi_is_in_heap_region(const void* p);
381 
382 
395 int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs);
396 
409 int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs);
410 
411 
416 bool mi_is_redirected();
417 
418 
420 
421 // ------------------------------------------------------
422 // Aligned allocation
423 // ------------------------------------------------------
424 
430 
443 void* mi_malloc_aligned(size_t size, size_t alignment);
444 void* mi_zalloc_aligned(size_t size, size_t alignment);
445 void* mi_calloc_aligned(size_t count, size_t size, size_t alignment);
446 void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment);
447 
458 void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset);
459 void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset);
460 void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset);
461 void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset);
462 
464 
470 
475 struct mi_heap_s;
476 
481 typedef struct mi_heap_s mi_heap_t;
482 
485 
493 void mi_heap_delete(mi_heap_t* heap);
494 
502 void mi_heap_destroy(mi_heap_t* heap);
503 
508 
512 
519 
521 void mi_heap_collect(mi_heap_t* heap, bool force);
522 
525 void* mi_heap_malloc(mi_heap_t* heap, size_t size);
526 
530 void* mi_heap_malloc_small(mi_heap_t* heap, size_t size);
531 
534 void* mi_heap_zalloc(mi_heap_t* heap, size_t size);
535 
538 void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size);
539 
542 void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size);
543 
546 char* mi_heap_strdup(mi_heap_t* heap, const char* s);
547 
550 char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n);
551 
554 char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name);
555 
556 void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize);
557 void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size);
558 void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize);
559 
560 void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment);
561 void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset);
562 void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment);
563 void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset);
564 void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment);
565 void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset);
566 void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment);
567 void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset);
568 
570 
571 
580 
581 void* mi_rezalloc(void* p, size_t newsize);
582 void* mi_recalloc(void* p, size_t newcount, size_t size) ;
583 
584 void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment);
585 void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset);
586 void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment);
587 void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset);
588 
589 void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize);
590 void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t newcount, size_t size);
591 
592 void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment);
593 void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset);
594 void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment);
595 void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset);
596 
598 
604 
616 #define mi_malloc_tp(tp) ((tp*)mi_malloc(sizeof(tp)))
617 
619 #define mi_zalloc_tp(tp) ((tp*)mi_zalloc(sizeof(tp)))
620 
622 #define mi_calloc_tp(tp,count) ((tp*)mi_calloc(count,sizeof(tp)))
623 
625 #define mi_mallocn_tp(tp,count) ((tp*)mi_mallocn(count,sizeof(tp)))
626 
628 #define mi_reallocn_tp(p,tp,count) ((tp*)mi_reallocn(p,count,sizeof(tp)))
629 
631 #define mi_heap_malloc_tp(hp,tp) ((tp*)mi_heap_malloc(hp,sizeof(tp)))
632 
634 #define mi_heap_zalloc_tp(hp,tp) ((tp*)mi_heap_zalloc(hp,sizeof(tp)))
635 
637 #define mi_heap_calloc_tp(hp,tp,count) ((tp*)mi_heap_calloc(hp,count,sizeof(tp)))
638 
640 #define mi_heap_mallocn_tp(hp,tp,count) ((tp*)mi_heap_mallocn(hp,count,sizeof(tp)))
641 
643 #define mi_heap_reallocn_tp(hp,p,tp,count) ((tp*)mi_heap_reallocn(p,count,sizeof(tp)))
644 
646 #define mi_heap_recalloc_tp(hp,p,tp,count) ((tp*)mi_heap_recalloc(p,count,sizeof(tp)))
647 
649 
655 
662 bool mi_heap_contains_block(mi_heap_t* heap, const void* p);
663 
672 bool mi_heap_check_owned(mi_heap_t* heap, const void* p);
673 
681 bool mi_check_owned(const void* p);
682 
685 typedef struct mi_heap_area_s {
686  void* blocks;
687  size_t reserved;
688  size_t committed;
689  size_t used;
690  size_t block_size;
692 
700 typedef bool (mi_block_visit_fun)(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg);
701 
713 bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_blocks, mi_block_visit_fun* visitor, void* arg);
714 
716 
722 
724 typedef enum mi_option_e {
725  // stable options
729  // the following options are experimental
743 } mi_option_t;
744 
745 
746 bool mi_option_enabled(mi_option_t option);
747 void mi_option_enable(mi_option_t option, bool enable);
748 void mi_option_enable_default(mi_option_t option, bool enable);
749 
750 long mi_option_get(mi_option_t option);
751 void mi_option_set(mi_option_t option, long value);
752 void mi_option_set_default(mi_option_t option, long value);
753 
754 
756 
763 
764 void* mi_recalloc(void* p, size_t count, size_t size);
765 size_t mi_malloc_size(const void* p);
766 size_t mi_malloc_usable_size(const void *p);
767 
769 void mi_cfree(void* p);
770 
771 int mi_posix_memalign(void** p, size_t alignment, size_t size);
772 int mi__posix_memalign(void** p, size_t alignment, size_t size);
773 void* mi_memalign(size_t alignment, size_t size);
774 void* mi_valloc(size_t size);
775 
776 void* mi_pvalloc(size_t size);
777 void* mi_aligned_alloc(size_t alignment, size_t size);
778 void* mi_reallocarray(void* p, size_t count, size_t size);
779 
780 void mi_free_size(void* p, size_t size);
781 void mi_free_size_aligned(void* p, size_t size, size_t alignment);
782 void mi_free_aligned(void* p, size_t alignment);
783 
785 void* mi_new(std::size_t n) noexcept(false);
786 
788 void* mi_new_aligned(std::size_t n, std::align_val_t alignment) noexcept(false);
789 
791 void* mi_new_nothrow(size_t n);
792 ``
794 void* mi_new_aligned_nothrow(size_t n, size_t alignment);
795 
797 
void mi_option_enable_default(mi_option_t option, bool enable)
+
1 /* ----------------------------------------------------------------------------
2 Copyright (c) 2018, Microsoft Research, Daan Leijen
3 This is free software; you can redistribute it and/or modify it under the
4 terms of the MIT license. A copy of the license can be found in the file
5 "LICENSE" at the root of this distribution.
6 -----------------------------------------------------------------------------*/
7 
8 #error "documentation file only!"
9 
10 
81 
85 
89 void mi_free(void* p);
90 
95 void* mi_malloc(size_t size);
96 
101 void* mi_zalloc(size_t size);
102 
112 void* mi_calloc(size_t count, size_t size);
113 
126 void* mi_realloc(void* p, size_t newsize);
127 
138 void* mi_recalloc(void* p, size_t count, size_t size);
139 
153 void* mi_expand(void* p, size_t newsize);
154 
164 void* mi_mallocn(size_t count, size_t size);
165 
175 void* mi_reallocn(void* p, size_t count, size_t size);
176 
193 void* mi_reallocf(void* p, size_t newsize);
194 
195 
204 char* mi_strdup(const char* s);
205 
215 char* mi_strndup(const char* s, size_t n);
216 
229 char* mi_realpath(const char* fname, char* resolved_name);
230 
232 
233 // ------------------------------------------------------
234 // Extended functionality
235 // ------------------------------------------------------
236 
240 
243 #define MI_SMALL_SIZE_MAX (128*sizeof(void*))
244 
252 void* mi_malloc_small(size_t size);
253 
261 void* mi_zalloc_small(size_t size);
262 
277 size_t mi_usable_size(void* p);
278 
288 size_t mi_good_size(size_t size);
289 
297 void mi_collect(bool force);
298 
303 void mi_stats_print(void* out);
304 
310 void mi_stats_print(mi_output_fun* out, void* arg);
311 
313 void mi_stats_reset(void);
314 
316 void mi_stats_merge(void);
317 
321 void mi_thread_init(void);
322 
327 void mi_thread_done(void);
328 
334 void mi_thread_stats_print_out(mi_output_fun* out, void* arg);
335 
342 typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat, void* arg);
343 
359 void mi_register_deferred_free(mi_deferred_free_fun* deferred_free, void* arg);
360 
366 typedef void (mi_output_fun)(const char* msg, void* arg);
367 
374 void mi_register_output(mi_output_fun* out, void* arg);
375 
381 typedef void (mi_error_fun)(int err, void* arg);
382 
398 void mi_register_error(mi_error_fun* errfun, void* arg);
399 
404 bool mi_is_in_heap_region(const void* p);
405 
406 
419 int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs);
420 
433 int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs);
434 
435 
440 bool mi_is_redirected();
441 
442 
444 
445 // ------------------------------------------------------
446 // Aligned allocation
447 // ------------------------------------------------------
448 
454 
467 void* mi_malloc_aligned(size_t size, size_t alignment);
468 void* mi_zalloc_aligned(size_t size, size_t alignment);
469 void* mi_calloc_aligned(size_t count, size_t size, size_t alignment);
470 void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment);
471 
482 void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset);
483 void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset);
484 void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset);
485 void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset);
486 
488 
494 
499 struct mi_heap_s;
500 
505 typedef struct mi_heap_s mi_heap_t;
506 
509 
517 void mi_heap_delete(mi_heap_t* heap);
518 
526 void mi_heap_destroy(mi_heap_t* heap);
527 
532 
536 
543 
545 void mi_heap_collect(mi_heap_t* heap, bool force);
546 
549 void* mi_heap_malloc(mi_heap_t* heap, size_t size);
550 
554 void* mi_heap_malloc_small(mi_heap_t* heap, size_t size);
555 
558 void* mi_heap_zalloc(mi_heap_t* heap, size_t size);
559 
562 void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size);
563 
566 void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size);
567 
570 char* mi_heap_strdup(mi_heap_t* heap, const char* s);
571 
574 char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n);
575 
578 char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name);
579 
580 void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize);
581 void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size);
582 void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize);
583 
584 void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment);
585 void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset);
586 void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment);
587 void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset);
588 void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment);
589 void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset);
590 void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment);
591 void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset);
592 
594 
595 
604 
605 void* mi_rezalloc(void* p, size_t newsize);
606 void* mi_recalloc(void* p, size_t newcount, size_t size) ;
607 
608 void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment);
609 void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset);
610 void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment);
611 void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset);
612 
613 void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize);
614 void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t newcount, size_t size);
615 
616 void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment);
617 void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset);
618 void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment);
619 void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset);
620 
622 
628 
640 #define mi_malloc_tp(tp) ((tp*)mi_malloc(sizeof(tp)))
641 
643 #define mi_zalloc_tp(tp) ((tp*)mi_zalloc(sizeof(tp)))
644 
646 #define mi_calloc_tp(tp,count) ((tp*)mi_calloc(count,sizeof(tp)))
647 
649 #define mi_mallocn_tp(tp,count) ((tp*)mi_mallocn(count,sizeof(tp)))
650 
652 #define mi_reallocn_tp(p,tp,count) ((tp*)mi_reallocn(p,count,sizeof(tp)))
653 
655 #define mi_heap_malloc_tp(hp,tp) ((tp*)mi_heap_malloc(hp,sizeof(tp)))
656 
658 #define mi_heap_zalloc_tp(hp,tp) ((tp*)mi_heap_zalloc(hp,sizeof(tp)))
659 
661 #define mi_heap_calloc_tp(hp,tp,count) ((tp*)mi_heap_calloc(hp,count,sizeof(tp)))
662 
664 #define mi_heap_mallocn_tp(hp,tp,count) ((tp*)mi_heap_mallocn(hp,count,sizeof(tp)))
665 
667 #define mi_heap_reallocn_tp(hp,p,tp,count) ((tp*)mi_heap_reallocn(p,count,sizeof(tp)))
668 
670 #define mi_heap_recalloc_tp(hp,p,tp,count) ((tp*)mi_heap_recalloc(p,count,sizeof(tp)))
671 
673 
679 
686 bool mi_heap_contains_block(mi_heap_t* heap, const void* p);
687 
696 bool mi_heap_check_owned(mi_heap_t* heap, const void* p);
697 
705 bool mi_check_owned(const void* p);
706 
709 typedef struct mi_heap_area_s {
710  void* blocks;
711  size_t reserved;
712  size_t committed;
713  size_t used;
714  size_t block_size;
716 
724 typedef bool (mi_block_visit_fun)(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg);
725 
737 bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_blocks, mi_block_visit_fun* visitor, void* arg);
738 
740 
746 
748 typedef enum mi_option_e {
749  // stable options
753  // the following options are experimental
767 } mi_option_t;
768 
769 
770 bool mi_option_enabled(mi_option_t option);
771 void mi_option_enable(mi_option_t option, bool enable);
772 void mi_option_enable_default(mi_option_t option, bool enable);
773 
774 long mi_option_get(mi_option_t option);
775 void mi_option_set(mi_option_t option, long value);
776 void mi_option_set_default(mi_option_t option, long value);
777 
778 
780 
787 
788 void* mi_recalloc(void* p, size_t count, size_t size);
789 size_t mi_malloc_size(const void* p);
790 size_t mi_malloc_usable_size(const void *p);
791 
793 void mi_cfree(void* p);
794 
795 int mi_posix_memalign(void** p, size_t alignment, size_t size);
796 int mi__posix_memalign(void** p, size_t alignment, size_t size);
797 void* mi_memalign(size_t alignment, size_t size);
798 void* mi_valloc(size_t size);
799 
800 void* mi_pvalloc(size_t size);
801 void* mi_aligned_alloc(size_t alignment, size_t size);
802 void* mi_reallocarray(void* p, size_t count, size_t size);
803 
804 void mi_free_size(void* p, size_t size);
805 void mi_free_size_aligned(void* p, size_t size, size_t alignment);
806 void mi_free_aligned(void* p, size_t alignment);
807 
809 void* mi_new(std::size_t n) noexcept(false);
810 
812 void* mi_new_n(size_t count, size_t size) noexcept(false);
813 
815 void* mi_new_aligned(std::size_t n, std::align_val_t alignment) noexcept(false);
816 
818 void* mi_new_nothrow(size_t n);
819 ``
821 void* mi_new_aligned_nothrow(size_t n, size_t alignment);
822 
824 
void mi_option_enable_default(mi_option_t option, bool enable)
size_t mi_usable_size(void *p)
Return the available bytes in a memory block.
void * mi_reallocn(void *p, size_t count, size_t size)
Re-allocate memory to count elements of size bytes.
void * mi_malloc_aligned(size_t size, size_t alignment)
Allocate size bytes aligned by alignment.
@@ -116,16 +116,18 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html','');});
void mi_stats_merge(void)
Merge thread local statistics with the main statistics and reset.
void mi_option_set_default(mi_option_t option, long value)
void * mi_new_aligned(std::size_t n, std::align_val_t alignment) noexcept(false)
raise std::bad_alloc exception on failure.
+
void() mi_error_fun(int err, void *arg)
Type of error callback functions.
Definition: mimalloc-doc.h:381
void * mi_rezalloc(void *p, size_t newsize)
-
Eagerly commit segments (4MiB) (enabled by default).
Definition: mimalloc-doc.h:730
+
Eagerly commit segments (4MiB) (enabled by default).
Definition: mimalloc-doc.h:754
void * mi_heap_zalloc(mi_heap_t *heap, size_t size)
Allocate zero-initialized in a specific heap.
void mi_option_set(mi_option_t option, long value)
-
Eagerly commit large (256MiB) memory regions (enabled by default, except on Windows)
Definition: mimalloc-doc.h:731
+
Eagerly commit large (256MiB) memory regions (enabled by default, except on Windows)
Definition: mimalloc-doc.h:755
void mi_cfree(void *p)
Just as free but also checks if the pointer p belongs to our heap.
void * mi_recalloc_aligned(void *p, size_t newcount, size_t size, size_t alignment)
-
Definition: mimalloc-doc.h:742
+
Definition: mimalloc-doc.h:766
void * mi_realloc_aligned_at(void *p, size_t newsize, size_t alignment, size_t offset)
-
void * blocks
start of the area containing heap blocks
Definition: mimalloc-doc.h:686
+
void * blocks
start of the area containing heap blocks
Definition: mimalloc-doc.h:710
+
void * mi_new_n(size_t count, size_t size) noexcept(false)
raise std::bad_alloc exception on failure or overflow.
void * mi_realloc_aligned(void *p, size_t newsize, size_t alignment)
int mi__posix_memalign(void **p, size_t alignment, size_t size)
void mi_free(void *p)
Free previously allocated memory.
@@ -141,36 +143,37 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html','');});
void * mi_heap_rezalloc_aligned_at(mi_heap_t *heap, void *p, size_t newsize, size_t alignment, size_t offset)
void * mi_zalloc(size_t size)
Allocate zero-initialized size bytes.
void * mi_heap_rezalloc(mi_heap_t *heap, void *p, size_t newsize)
-
The number of segments per thread to keep cached.
Definition: mimalloc-doc.h:734
+
The number of segments per thread to keep cached.
Definition: mimalloc-doc.h:758
void * mi_heap_calloc(mi_heap_t *heap, size_t count, size_t size)
Allocate count zero-initialized elements in a specific heap.
void * mi_new(std::size_t n) noexcept(false)
raise std::bad_alloc exception on failure.
void * mi_heap_calloc_aligned(mi_heap_t *heap, size_t count, size_t size, size_t alignment)
bool mi_is_redirected()
Is the C runtime malloc API redirected?
-
size_t block_size
size in bytes of one block
Definition: mimalloc-doc.h:690
+
size_t block_size
size in bytes of one block
Definition: mimalloc-doc.h:714
void * mi_reallocarray(void *p, size_t count, size_t size)
int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs)
Reserve pages of huge OS pages (1GiB) evenly divided over numa_nodes nodes, but stops after at most t...
void() mi_deferred_free_fun(bool force, unsigned long long heartbeat, void *arg)
Type of deferred free functions.
Definition: mimalloc-doc.h:342
bool mi_is_in_heap_region(const void *p)
Is a pointer part of our heap?
void mi_option_enable(mi_option_t option, bool enable)
void * mi_realloc(void *p, size_t newsize)
Re-allocate memory to newsize bytes.
-
The number of huge OS pages (1GiB in size) to reserve at the start of the program.
Definition: mimalloc-doc.h:733
+
The number of huge OS pages (1GiB in size) to reserve at the start of the program.
Definition: mimalloc-doc.h:757
void * mi_heap_reallocf(mi_heap_t *heap, void *p, size_t newsize)
void mi_free_size_aligned(void *p, size_t size, size_t alignment)
void * mi_rezalloc_aligned_at(void *p, size_t newsize, size_t alignment, size_t offset)
-
Reset page memory after mi_option_reset_delay milliseconds when it becomes free.
Definition: mimalloc-doc.h:735
+
Reset page memory after mi_option_reset_delay milliseconds when it becomes free.
Definition: mimalloc-doc.h:759
void mi_thread_done(void)
Uninitialize mimalloc on a thread.
bool mi_heap_visit_blocks(const mi_heap_t *heap, bool visit_all_blocks, mi_block_visit_fun *visitor, void *arg)
Visit all areas and blocks in a heap.
-
Pretend there are at most N NUMA nodes.
Definition: mimalloc-doc.h:738
+
Pretend there are at most N NUMA nodes.
Definition: mimalloc-doc.h:762
void * mi_malloc(size_t size)
Allocate size bytes.
bool mi_option_enabled(mi_option_t option)
-
Experimental.
Definition: mimalloc-doc.h:739
+
void mi_register_error(mi_error_fun *errfun, void *arg)
Register an error callback function.
+
Experimental.
Definition: mimalloc-doc.h:763
char * mi_heap_strndup(mi_heap_t *heap, const char *s, size_t n)
Duplicate a string of at most length n in a specific heap.
-
bool() mi_block_visit_fun(const mi_heap_t *heap, const mi_heap_area_t *area, void *block, size_t block_size, void *arg)
Visitor function passed to mi_heap_visit_blocks()
Definition: mimalloc-doc.h:700
+
bool() mi_block_visit_fun(const mi_heap_t *heap, const mi_heap_area_t *area, void *block, size_t block_size, void *arg)
Visitor function passed to mi_heap_visit_blocks()
Definition: mimalloc-doc.h:724
void * mi_heap_recalloc(mi_heap_t *heap, void *p, size_t newcount, size_t size)
void * mi_heap_malloc_aligned_at(mi_heap_t *heap, size_t size, size_t alignment, size_t offset)
char * mi_realpath(const char *fname, char *resolved_name)
Resolve a file path name.
-
Print error messages to stderr.
Definition: mimalloc-doc.h:727
-
Experimental.
Definition: mimalloc-doc.h:736
+
Print error messages to stderr.
Definition: mimalloc-doc.h:751
+
Experimental.
Definition: mimalloc-doc.h:760
void * mi_heap_rezalloc_aligned(mi_heap_t *heap, void *p, size_t newsize, size_t alignment)
void * mi_memalign(size_t alignment, size_t size)
void * mi_new_aligned_nothrow(size_t n, size_t alignment)
return NULL on failure.
@@ -179,11 +182,11 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html','');});
bool mi_heap_contains_block(mi_heap_t *heap, const void *p)
Does a heap contain a pointer to a previously allocated block?
void mi_heap_collect(mi_heap_t *heap, bool force)
Release outstanding resources in a specific heap.
void * mi_heap_recalloc_aligned_at(mi_heap_t *heap, void *p, size_t newcount, size_t size, size_t alignment, size_t offset)
-
Print verbose messages to stderr.
Definition: mimalloc-doc.h:728
+
Print verbose messages to stderr.
Definition: mimalloc-doc.h:752
void * mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset)
void * mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset)
Allocate size bytes aligned by alignment at a specified offset.
void mi_heap_delete(mi_heap_t *heap)
Delete a previously allocated heap.
-
OS tag to assign to mimalloc'd memory.
Definition: mimalloc-doc.h:741
+
OS tag to assign to mimalloc'd memory.
Definition: mimalloc-doc.h:765
mi_heap_t * mi_heap_get_default()
Get the default heap that is used for mi_malloc() et al.
int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs)
Reserve pages of huge OS pages (1GiB) at a specific numa_node, but stops after at most timeout_msecs ...
void * mi_aligned_alloc(size_t alignment, size_t size)
@@ -191,22 +194,22 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html','');});
void mi_thread_init(void)
Initialize mimalloc on a thread.
size_t mi_good_size(size_t size)
Return the used allocation size.
void mi_stats_print(void *out)
Print the main statistics.
-
Experimental.
Definition: mimalloc-doc.h:740
+
Experimental.
Definition: mimalloc-doc.h:764
void * mi_heap_recalloc_aligned(mi_heap_t *heap, void *p, size_t newcount, size_t size, size_t alignment)
void * mi_heap_mallocn(mi_heap_t *heap, size_t count, size_t size)
Allocate count elements in a specific heap.
-
An area of heap space contains blocks of a single size.
Definition: mimalloc-doc.h:685
+
An area of heap space contains blocks of a single size.
Definition: mimalloc-doc.h:709
void mi_thread_stats_print_out(mi_output_fun *out, void *arg)
Print out heap statistics for this thread.
-
Print statistics to stderr when the program is done.
Definition: mimalloc-doc.h:726
+
Print statistics to stderr when the program is done.
Definition: mimalloc-doc.h:750
void * mi_zalloc_aligned(size_t size, size_t alignment)
-
size_t reserved
bytes reserved for this area
Definition: mimalloc-doc.h:687
-
struct mi_heap_s mi_heap_t
Type of first-class heaps.
Definition: mimalloc-doc.h:481
-
size_t used
bytes in use by allocated blocks
Definition: mimalloc-doc.h:689
+
size_t reserved
bytes reserved for this area
Definition: mimalloc-doc.h:711
+
struct mi_heap_s mi_heap_t
Type of first-class heaps.
Definition: mimalloc-doc.h:505
+
size_t used
bytes in use by allocated blocks
Definition: mimalloc-doc.h:713
void mi_register_deferred_free(mi_deferred_free_fun *deferred_free, void *arg)
Register a deferred free function.
void mi_free_size(void *p, size_t size)
void mi_collect(bool force)
Eagerly free memory.
void mi_heap_destroy(mi_heap_t *heap)
Destroy a heap, freeing all its still allocated blocks.
void * mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset)
-
Use large OS pages (2MiB in size) if possible.
Definition: mimalloc-doc.h:732
+
Use large OS pages (2MiB in size) if possible.
Definition: mimalloc-doc.h:756
void * mi_heap_reallocn(mi_heap_t *heap, void *p, size_t count, size_t size)
void mi_register_output(mi_output_fun *out, void *arg)
Register an output function.
void * mi_heap_malloc_small(mi_heap_t *heap, size_t size)
Allocate a small object in a specific heap.
@@ -224,11 +227,11 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html','');});
long mi_option_get(mi_option_t option)
mi_heap_t * mi_heap_get_backing()
Get the backing heap.
void mi_free_aligned(void *p, size_t alignment)
-
Delay in milli-seconds before resetting a page (100ms by default)
Definition: mimalloc-doc.h:737
+
Delay in milli-seconds before resetting a page (100ms by default)
Definition: mimalloc-doc.h:761
mi_heap_t * mi_heap_new()
Create a new heap that can be used for allocation.
void * mi_heap_malloc(mi_heap_t *heap, size_t size)
Allocate in a specific heap.
-
size_t committed
current committed bytes of this area
Definition: mimalloc-doc.h:688
-
mi_option_t
Runtime options.
Definition: mimalloc-doc.h:724
+
size_t committed
current committed bytes of this area
Definition: mimalloc-doc.h:712
+
mi_option_t
Runtime options.
Definition: mimalloc-doc.h:748
bool mi_heap_check_owned(mi_heap_t *heap, const void *p)
Check safely if any pointer is part of a heap.
mi_heap_t * mi_heap_set_default(mi_heap_t *heap)
Set the default heap to use for mi_malloc() et al.
diff --git a/docs/navtreeindex0.js b/docs/navtreeindex0.js index d1b0e072..e2667728 100644 --- a/docs/navtreeindex0.js +++ b/docs/navtreeindex0.js @@ -29,27 +29,29 @@ var NAVTREEINDEX0 = "group__analysis.html#gadfa01e2900f0e5d515ad5506b26f6d65":[5,6,1], "group__analysis.html#structmi__heap__area__t":[5,6,0], "group__extended.html":[5,1], -"group__extended.html#ga089c859d9eddc5f9b4bd946cd53cebee":[5,1,19], -"group__extended.html#ga0ae4581e85453456a0d658b2b98bf7bf":[5,1,16], +"group__extended.html#ga089c859d9eddc5f9b4bd946cd53cebee":[5,1,21], +"group__extended.html#ga0ae4581e85453456a0d658b2b98bf7bf":[5,1,18], "group__extended.html#ga1ea64283508718d9d645c38efc2f4305":[5,1,0], -"group__extended.html#ga220f29f40a44404b0061c15bc1c31152":[5,1,20], -"group__extended.html#ga256cc6f13a142deabbadd954a217e228":[5,1,14], +"group__extended.html#ga220f29f40a44404b0061c15bc1c31152":[5,1,22], +"group__extended.html#ga251d369cda3f1c2a955c555486ed90e5":[5,1,2], +"group__extended.html#ga256cc6f13a142deabbadd954a217e228":[5,1,16], "group__extended.html#ga299dae78d25ce112e384a98b7309c5be":[5,1,1], -"group__extended.html#ga2d126e5c62d3badc35445e5d84166df2":[5,1,13], -"group__extended.html#ga3132f521fb756fc0e8ec0b74fb58df50":[5,1,11], -"group__extended.html#ga3460a6ca91af97be4058f523d3cb8ece":[5,1,8], -"group__extended.html#ga3bb8468b8cfcc6e2a61d98aee85c5f99":[5,1,15], -"group__extended.html#ga421430e2226d7d468529cec457396756":[5,1,3], -"group__extended.html#ga5f071b10d4df1c3658e04e7fd67a94e6":[5,1,5], -"group__extended.html#ga7136c2e55cb22c98ecf95d08d6debb99":[5,1,7], -"group__extended.html#ga7795a13d20087447281858d2c771cca1":[5,1,10], -"group__extended.html#ga854b1de8cb067c7316286c28b2fcd3d1":[5,1,12], -"group__extended.html#gaad25050b19f30cd79397b227e0157a3f":[5,1,6], -"group__extended.html#gab1dac8476c46cb9eecab767eb40c1525":[5,1,18], -"group__extended.html#gac057927cd06c854b45fe7847e921bd47":[5,1,4], -"group__extended.html#gad823d23444a4b77a40f66bf075a98a0c":[5,1,2], -"group__extended.html#gae5b17ff027cd2150b43a33040250cf3f":[5,1,9], -"group__extended.html#gaf8e73efc2cbca9ebfdfb166983a04c17":[5,1,17], +"group__extended.html#ga2d126e5c62d3badc35445e5d84166df2":[5,1,15], +"group__extended.html#ga3132f521fb756fc0e8ec0b74fb58df50":[5,1,13], +"group__extended.html#ga3460a6ca91af97be4058f523d3cb8ece":[5,1,9], +"group__extended.html#ga3bb8468b8cfcc6e2a61d98aee85c5f99":[5,1,17], +"group__extended.html#ga421430e2226d7d468529cec457396756":[5,1,4], +"group__extended.html#ga5f071b10d4df1c3658e04e7fd67a94e6":[5,1,6], +"group__extended.html#ga7136c2e55cb22c98ecf95d08d6debb99":[5,1,8], +"group__extended.html#ga7795a13d20087447281858d2c771cca1":[5,1,12], +"group__extended.html#ga854b1de8cb067c7316286c28b2fcd3d1":[5,1,14], +"group__extended.html#gaa1d55e0e894be240827e5d87ec3a1f45":[5,1,10], +"group__extended.html#gaad25050b19f30cd79397b227e0157a3f":[5,1,7], +"group__extended.html#gab1dac8476c46cb9eecab767eb40c1525":[5,1,20], +"group__extended.html#gac057927cd06c854b45fe7847e921bd47":[5,1,5], +"group__extended.html#gad823d23444a4b77a40f66bf075a98a0c":[5,1,3], +"group__extended.html#gae5b17ff027cd2150b43a33040250cf3f":[5,1,11], +"group__extended.html#gaf8e73efc2cbca9ebfdfb166983a04c17":[5,1,19], "group__heap.html":[5,3], "group__heap.html#ga00e95ba1e01acac3cfd95bb7a357a6f0":[5,3,20], "group__heap.html#ga08ca6419a5c057a4d965868998eef487":[5,3,3], @@ -121,18 +123,19 @@ var NAVTREEINDEX0 = "group__posix.html#ga0d28d5cf61e6bfbb18c63092939fe5c9":[5,8,3], "group__posix.html#ga1326d2e4388630b5f81ca7206318b8e5":[5,8,1], "group__posix.html#ga4531c9e775bb3ae12db57c1ba8a5d7de":[5,8,6], -"group__posix.html#ga48fad8648a2f1dab9c87ea9448a52088":[5,8,15], +"group__posix.html#ga48fad8648a2f1dab9c87ea9448a52088":[5,8,16], "group__posix.html#ga705dc7a64bffacfeeb0141501a5c35d7":[5,8,2], "group__posix.html#ga72e9d7ffb5fe94d69bc722c8506e27bc":[5,8,5], -"group__posix.html#ga73baaf5951f5165ba0763d0c06b6a93b":[5,8,16], +"group__posix.html#ga73baaf5951f5165ba0763d0c06b6a93b":[5,8,17], "group__posix.html#gaab7fa71ea93b96873f5d9883db57d40e":[5,8,8], "group__posix.html#gaad048a9fce3d02c5909cd05c6ec24545":[5,8,9], "group__posix.html#gab5e29558926d934c3f1cae8c815f942c":[5,8,11], -"group__posix.html#gacff84f226ba9feb2031b8992e5579447":[5,8,13], +"group__posix.html#gacff84f226ba9feb2031b8992e5579447":[5,8,14], "group__posix.html#gad5a69c8fea96aa2b7a7c818c2130090a":[5,8,0], "group__posix.html#gae01389eedab8d67341ff52e2aad80ebb":[5,8,4], -"group__posix.html#gaeaded64eda71ed6b1d569d3e723abc4a":[5,8,12], -"group__posix.html#gaeb325c39b887d3b90d85d1eb1712fb1e":[5,8,14], +"group__posix.html#gae7bc4f56cd57ed3359060ff4f38bda81":[5,8,12], +"group__posix.html#gaeaded64eda71ed6b1d569d3e723abc4a":[5,8,13], +"group__posix.html#gaeb325c39b887d3b90d85d1eb1712fb1e":[5,8,15], "group__posix.html#gaef2c2bdb4f70857902d3c8903ac095f3":[5,8,10], "group__typed.html":[5,5], "group__typed.html#ga0619a62c5fd886f1016030abe91f0557":[5,5,7], diff --git a/docs/search/all_6.js b/docs/search/all_6.js index cc7a26ec..7af11c0f 100644 --- a/docs/search/all_6.js +++ b/docs/search/all_6.js @@ -11,6 +11,7 @@ var searchData= ['mi_5fcheck_5fowned',['mi_check_owned',['../group__analysis.html#ga628c237489c2679af84a4d0d143b3dd5',1,'mimalloc-doc.h']]], ['mi_5fcollect',['mi_collect',['../group__extended.html#ga421430e2226d7d468529cec457396756',1,'mimalloc-doc.h']]], ['mi_5fdeferred_5ffree_5ffun',['mi_deferred_free_fun',['../group__extended.html#ga299dae78d25ce112e384a98b7309c5be',1,'mimalloc-doc.h']]], + ['mi_5ferror_5ffun',['mi_error_fun',['../group__extended.html#ga251d369cda3f1c2a955c555486ed90e5',1,'mimalloc-doc.h']]], ['mi_5fexpand',['mi_expand',['../group__malloc.html#gaaee66a1d483c3e28f585525fb96707e4',1,'mimalloc-doc.h']]], ['mi_5ffree',['mi_free',['../group__malloc.html#gaf2c7b89c327d1f60f59e68b9ea644d95',1,'mimalloc-doc.h']]], ['mi_5ffree_5faligned',['mi_free_aligned',['../group__posix.html#ga0d28d5cf61e6bfbb18c63092939fe5c9',1,'mimalloc-doc.h']]], @@ -75,6 +76,7 @@ var searchData= ['mi_5fnew',['mi_new',['../group__posix.html#gaad048a9fce3d02c5909cd05c6ec24545',1,'mimalloc-doc.h']]], ['mi_5fnew_5faligned',['mi_new_aligned',['../group__posix.html#gaef2c2bdb4f70857902d3c8903ac095f3',1,'mimalloc-doc.h']]], ['mi_5fnew_5faligned_5fnothrow',['mi_new_aligned_nothrow',['../group__posix.html#gab5e29558926d934c3f1cae8c815f942c',1,'mimalloc-doc.h']]], + ['mi_5fnew_5fn',['mi_new_n',['../group__posix.html#gae7bc4f56cd57ed3359060ff4f38bda81',1,'mimalloc-doc.h']]], ['mi_5fnew_5fnothrow',['mi_new_nothrow',['../group__posix.html#gaeaded64eda71ed6b1d569d3e723abc4a',1,'mimalloc-doc.h']]], ['mi_5foption_5feager_5fcommit',['mi_option_eager_commit',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca1e8de72c93da7ff22d91e1e27b52ac2b',1,'mimalloc-doc.h']]], ['mi_5foption_5feager_5fcommit_5fdelay',['mi_option_eager_commit_delay',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca17a190c25be381142d87e0468c4c068c',1,'mimalloc-doc.h']]], @@ -113,6 +115,7 @@ var searchData= ['mi_5frecalloc_5faligned',['mi_recalloc_aligned',['../group__zeroinit.html#ga3e7e5c291acf1c7fd7ffd9914a9f945f',1,'mimalloc-doc.h']]], ['mi_5frecalloc_5faligned_5fat',['mi_recalloc_aligned_at',['../group__zeroinit.html#ga4ff5e92ad73585418a072c9d059e5cf9',1,'mimalloc-doc.h']]], ['mi_5fregister_5fdeferred_5ffree',['mi_register_deferred_free',['../group__extended.html#ga3460a6ca91af97be4058f523d3cb8ece',1,'mimalloc-doc.h']]], + ['mi_5fregister_5ferror',['mi_register_error',['../group__extended.html#gaa1d55e0e894be240827e5d87ec3a1f45',1,'mimalloc-doc.h']]], ['mi_5fregister_5foutput',['mi_register_output',['../group__extended.html#gae5b17ff027cd2150b43a33040250cf3f',1,'mimalloc-doc.h']]], ['mi_5freserve_5fhuge_5fos_5fpages_5fat',['mi_reserve_huge_os_pages_at',['../group__extended.html#ga7795a13d20087447281858d2c771cca1',1,'mimalloc-doc.h']]], ['mi_5freserve_5fhuge_5fos_5fpages_5finterleave',['mi_reserve_huge_os_pages_interleave',['../group__extended.html#ga3132f521fb756fc0e8ec0b74fb58df50',1,'mimalloc-doc.h']]], diff --git a/docs/search/functions_0.js b/docs/search/functions_0.js index d1d209a1..098041bb 100644 --- a/docs/search/functions_0.js +++ b/docs/search/functions_0.js @@ -62,6 +62,7 @@ var searchData= ['mi_5fnew',['mi_new',['../group__posix.html#gaad048a9fce3d02c5909cd05c6ec24545',1,'mimalloc-doc.h']]], ['mi_5fnew_5faligned',['mi_new_aligned',['../group__posix.html#gaef2c2bdb4f70857902d3c8903ac095f3',1,'mimalloc-doc.h']]], ['mi_5fnew_5faligned_5fnothrow',['mi_new_aligned_nothrow',['../group__posix.html#gab5e29558926d934c3f1cae8c815f942c',1,'mimalloc-doc.h']]], + ['mi_5fnew_5fn',['mi_new_n',['../group__posix.html#gae7bc4f56cd57ed3359060ff4f38bda81',1,'mimalloc-doc.h']]], ['mi_5fnew_5fnothrow',['mi_new_nothrow',['../group__posix.html#gaeaded64eda71ed6b1d569d3e723abc4a',1,'mimalloc-doc.h']]], ['mi_5foption_5fenable',['mi_option_enable',['../group__options.html#ga6d45a20a3131f18bc351b69763b38ce4',1,'mimalloc-doc.h']]], ['mi_5foption_5fenable_5fdefault',['mi_option_enable_default',['../group__options.html#ga37988264b915a7db92530cc02d5494cb',1,'mimalloc-doc.h']]], @@ -82,6 +83,7 @@ var searchData= ['mi_5frecalloc_5faligned',['mi_recalloc_aligned',['../group__zeroinit.html#ga3e7e5c291acf1c7fd7ffd9914a9f945f',1,'mimalloc-doc.h']]], ['mi_5frecalloc_5faligned_5fat',['mi_recalloc_aligned_at',['../group__zeroinit.html#ga4ff5e92ad73585418a072c9d059e5cf9',1,'mimalloc-doc.h']]], ['mi_5fregister_5fdeferred_5ffree',['mi_register_deferred_free',['../group__extended.html#ga3460a6ca91af97be4058f523d3cb8ece',1,'mimalloc-doc.h']]], + ['mi_5fregister_5ferror',['mi_register_error',['../group__extended.html#gaa1d55e0e894be240827e5d87ec3a1f45',1,'mimalloc-doc.h']]], ['mi_5fregister_5foutput',['mi_register_output',['../group__extended.html#gae5b17ff027cd2150b43a33040250cf3f',1,'mimalloc-doc.h']]], ['mi_5freserve_5fhuge_5fos_5fpages_5fat',['mi_reserve_huge_os_pages_at',['../group__extended.html#ga7795a13d20087447281858d2c771cca1',1,'mimalloc-doc.h']]], ['mi_5freserve_5fhuge_5fos_5fpages_5finterleave',['mi_reserve_huge_os_pages_interleave',['../group__extended.html#ga3132f521fb756fc0e8ec0b74fb58df50',1,'mimalloc-doc.h']]], diff --git a/docs/search/typedefs_0.js b/docs/search/typedefs_0.js index 17816828..44a0a6c6 100644 --- a/docs/search/typedefs_0.js +++ b/docs/search/typedefs_0.js @@ -2,6 +2,7 @@ var searchData= [ ['mi_5fblock_5fvisit_5ffun',['mi_block_visit_fun',['../group__analysis.html#gadfa01e2900f0e5d515ad5506b26f6d65',1,'mimalloc-doc.h']]], ['mi_5fdeferred_5ffree_5ffun',['mi_deferred_free_fun',['../group__extended.html#ga299dae78d25ce112e384a98b7309c5be',1,'mimalloc-doc.h']]], + ['mi_5ferror_5ffun',['mi_error_fun',['../group__extended.html#ga251d369cda3f1c2a955c555486ed90e5',1,'mimalloc-doc.h']]], ['mi_5fheap_5ft',['mi_heap_t',['../group__heap.html#ga34a47cde5a5b38c29f1aa3c5e76943c2',1,'mimalloc-doc.h']]], ['mi_5foutput_5ffun',['mi_output_fun',['../group__extended.html#gad823d23444a4b77a40f66bf075a98a0c',1,'mimalloc-doc.h']]] ]; diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index f039fc50..eaa327be 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -23,25 +23,21 @@ terms of the MIT license. A copy of the license can be found in the file #if defined(_MSC_VER) #pragma warning(disable:4127) // constant conditional due to MI_SECURE paths #define mi_decl_noinline __declspec(noinline) -#define mi_attr_noreturn #elif defined(__GNUC__) || defined(__clang__) #define mi_decl_noinline __attribute__((noinline)) -#define mi_attr_noreturn __attribute__((noreturn)) #else #define mi_decl_noinline -#define mi_attr_noreturn #endif // "options.c" void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message); void _mi_fprintf(mi_output_fun* out, void* arg, const char* fmt, ...); -void _mi_error_message(const char* fmt, ...); void _mi_warning_message(const char* fmt, ...); void _mi_verbose_message(const char* fmt, ...); void _mi_trace_message(const char* fmt, ...); void _mi_options_init(void); -void _mi_fatal_error(const char* fmt, ...) mi_attr_noreturn; +void _mi_error_message(int err, const char* fmt, ...); // random.c void _mi_random_init(mi_random_ctx_t* ctx); @@ -146,6 +142,29 @@ bool _mi_page_is_valid(mi_page_t* page); #endif +/* ----------------------------------------------------------- + Error codes passed to `_mi_fatal_error` + All are recoverable but EFAULT is a serious error and aborts by default in secure mode. + For portability define undefined error codes using common Unix codes: + +----------------------------------------------------------- */ +#include +#ifndef EAGAIN // double free +#define EAGAIN (11) +#endif +#ifndef ENOMEM // out of memory +#define ENOMEM (12) +#endif +#ifndef EFAULT // corrupted free-list or meta-data +#define EFAULT (14) +#endif +#ifndef EINVAL // trying to free an invalid pointer +#define EINVAL (22) +#endif +#ifndef EOVERFLOW // count*size overflow +#define EOVERFLOW (75) +#endif + /* ----------------------------------------------------------- Inlined definitions @@ -166,30 +185,6 @@ bool _mi_page_is_valid(mi_page_t* page); #define MI_INIT256(x) MI_INIT128(x),MI_INIT128(x) -// Overflow detecting multiply -static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { - // quick check for the case where count is one (common for C++ allocators) - if (count==1) { - *total = size; - return false; - } -#if __has_builtin(__builtin_umul_overflow) || __GNUC__ >= 5 -#include // UINT_MAX, ULONG_MAX -#if (SIZE_MAX == UINT_MAX) - return __builtin_umul_overflow(count, size, total); -#elif (SIZE_MAX == ULONG_MAX) - return __builtin_umull_overflow(count, size, total); -#else - return __builtin_umulll_overflow(count, size, total); -#endif -#else /* __builtin_umul_overflow is unavailable */ - #define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX) - *total = count * size; - return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW) - && size > 0 && (SIZE_MAX / size) < count); -#endif -} - // Is `x` a power of two? (0 is considered a power of two) static inline bool _mi_is_power_of_two(uintptr_t x) { return ((x & (x - 1)) == 0); @@ -229,6 +224,40 @@ static inline size_t _mi_wsize_from_size(size_t size) { } +// Overflow detecting multiply +static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { +#if __has_builtin(__builtin_umul_overflow) || __GNUC__ >= 5 +#include // UINT_MAX, ULONG_MAX +#if (SIZE_MAX == UINT_MAX) + return __builtin_umul_overflow(count, size, total); +#elif (SIZE_MAX == ULONG_MAX) + return __builtin_umull_overflow(count, size, total); +#else + return __builtin_umulll_overflow(count, size, total); +#endif +#else /* __builtin_umul_overflow is unavailable */ + #define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX) + *total = count * size; + return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW) + && size > 0 && (SIZE_MAX / size) < count); +#endif +} + +// Safe multiply `count*size` into `total`; return `true` on overflow. +static inline bool mi_count_size_overflow(size_t count, size_t size, size_t* total) { + if (count==1) { // quick check for the case where count is one (common for C++ allocators) + *total = size; + return false; + } + else if (mi_unlikely(mi_mul_overflow(count, size, total))) { + _mi_error_message(EOVERFLOW, "allocation request too large (%zu * %zu bytes)\n", count, size); + *total = SIZE_MAX; + return true; + } + else return false; +} + + /* ----------------------------------------------------------- The thread local default heap ----------------------------------------------------------- */ @@ -506,7 +535,7 @@ static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* // check for free list corruption: is `next` at least in the same page? // TODO: check if `next` is `page->block_size` aligned? if (mi_unlikely(next!=NULL && !mi_is_in_same_page(block, next))) { - _mi_fatal_error("corrupted free list entry of size %zub at %p: value 0x%zx\n", mi_page_block_size(page), block, (uintptr_t)next); + _mi_error_message(EFAULT, "corrupted free list entry of size %zub at %p: value 0x%zx\n", mi_page_block_size(page), block, (uintptr_t)next); next = NULL; } return next; diff --git a/include/mimalloc.h b/include/mimalloc.h index 67ff1a35..1250314c 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -104,16 +104,23 @@ mi_decl_export mi_decl_allocator void* mi_mallocn(size_t count, size_t size) mi_decl_export mi_decl_allocator void* mi_reallocn(void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2,3); mi_decl_export mi_decl_allocator void* mi_reallocf(void* p, size_t newsize) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); - mi_decl_export size_t mi_usable_size(const void* p) mi_attr_noexcept; mi_decl_export size_t mi_good_size(size_t size) mi_attr_noexcept; + +// ------------------------------------------------------ +// Internals +// ------------------------------------------------------ + typedef void (mi_cdecl mi_deferred_free_fun)(bool force, unsigned long long heartbeat, void* arg); mi_decl_export void mi_register_deferred_free(mi_deferred_free_fun* deferred_free, void* arg) mi_attr_noexcept; typedef void (mi_cdecl mi_output_fun)(const char* msg, void* arg); mi_decl_export void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept; +typedef void (mi_cdecl mi_error_fun)(int err, void* arg); +mi_decl_export void mi_register_error(mi_error_fun* fun, void* arg); + mi_decl_export void mi_collect(bool force) mi_attr_noexcept; mi_decl_export int mi_version(void) mi_attr_noexcept; mi_decl_export void mi_stats_reset(void) mi_attr_noexcept; @@ -143,9 +150,9 @@ mi_decl_export mi_decl_allocator void* mi_realloc_aligned(void* p, size_t newsiz mi_decl_export mi_decl_allocator void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); -// ------------------------------------------------------ -// Heaps -// ------------------------------------------------------ +// ------------------------------------------------------------------------------------- +// Heaps: first-class, but can only allocate from the same thread that created it. +// ------------------------------------------------------------------------------------- struct mi_heap_s; typedef struct mi_heap_s mi_heap_t; diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 5a59a63a..55b0e041 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -79,7 +79,7 @@ mi_decl_allocator void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, siz mi_decl_allocator void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { size_t total; - if (mi_mul_overflow(count, size, &total)) return NULL; + if (mi_count_size_overflow(count, size, &total)) return NULL; return mi_heap_zalloc_aligned_at(heap, total, alignment, offset); } @@ -168,13 +168,13 @@ mi_decl_allocator void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_ mi_decl_allocator void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { size_t total; - if (mi_mul_overflow(newcount, size, &total)) return NULL; + if (mi_count_size_overflow(newcount, size, &total)) return NULL; return mi_heap_rezalloc_aligned_at(heap, p, total, alignment, offset); } mi_decl_allocator void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept { size_t total; - if (mi_mul_overflow(newcount, size, &total)) return NULL; + if (mi_count_size_overflow(newcount, size, &total)) return NULL; return mi_heap_rezalloc_aligned(heap, p, total, alignment); } diff --git a/src/alloc.c b/src/alloc.c index 37d43d9f..e605c017 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -146,7 +146,7 @@ static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, con mi_list_contains(page, page->local_free, block) || mi_list_contains(page, mi_page_thread_free(page), block)) { - _mi_fatal_error("double free detected of block %p with size %zu\n", block, mi_page_block_size(page)); + _mi_error_message(EAGAIN, "double free detected of block %p with size %zu\n", block, mi_page_block_size(page)); return true; } return false; @@ -300,7 +300,7 @@ void mi_free(void* p) mi_attr_noexcept { #if (MI_DEBUG>0) if (mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0)) { - _mi_error_message("trying to free an invalid (unaligned) pointer: %p\n", p); + _mi_error_message(EINVAL, "trying to free an invalid (unaligned) pointer: %p\n", p); return; } #endif @@ -310,16 +310,16 @@ void mi_free(void* p) mi_attr_noexcept #if (MI_DEBUG!=0) if (mi_unlikely(!mi_is_in_heap_region(p))) { - _mi_warning_message("possibly trying to free a pointer that does not point to a valid heap region: 0x%p\n" + _mi_warning_message("possibly trying to free a pointer that does not point to a valid heap region: %p\n" "(this may still be a valid very large allocation (over 64MiB))\n", p); if (mi_likely(_mi_ptr_cookie(segment) == segment->cookie)) { - _mi_warning_message("(yes, the previous pointer 0x%p was valid after all)\n", p); + _mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p); } } #endif #if (MI_DEBUG!=0 || MI_SECURE>=4) if (mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie)) { - _mi_error_message("trying to free a pointer that does not point to a valid heap space: %p\n", p); + _mi_error_message(EINVAL, "trying to free a pointer that does not point to a valid heap space: %p\n", p); return; } #endif @@ -432,7 +432,7 @@ void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept { extern inline mi_decl_allocator void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept { size_t total; - if (mi_mul_overflow(count,size,&total)) return NULL; + if (mi_count_size_overflow(count,size,&total)) return NULL; return mi_heap_zalloc(heap,total); } @@ -443,7 +443,7 @@ mi_decl_allocator void* mi_calloc(size_t count, size_t size) mi_attr_noexcept { // Uninitialized `calloc` extern mi_decl_allocator void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept { size_t total; - if (mi_mul_overflow(count, size, &total)) return NULL; + if (mi_count_size_overflow(count, size, &total)) return NULL; return mi_heap_malloc(heap, total); } @@ -484,7 +484,7 @@ mi_decl_allocator void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize mi_decl_allocator void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept { size_t total; - if (mi_mul_overflow(count, size, &total)) return NULL; + if (mi_count_size_overflow(count, size, &total)) return NULL; return mi_heap_realloc(heap, p, total); } @@ -502,7 +502,7 @@ mi_decl_allocator void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsiz mi_decl_allocator void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept { size_t total; - if (mi_mul_overflow(count, size, &total)) return NULL; + if (mi_count_size_overflow(count, size, &total)) return NULL; return mi_heap_rezalloc(heap, p, total); } @@ -570,7 +570,6 @@ char* mi_strndup(const char* s, size_t n) mi_attr_noexcept { #define PATH_MAX MAX_PATH #endif #include -#include char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept { // todo: use GetFullPathNameW to allow longer file names char buf[PATH_MAX]; @@ -645,10 +644,6 @@ static bool mi_try_new_handler(bool nothrow) { } } #else -#include -#ifndef ENOMEM -#define ENOMEM 12 -#endif typedef void (*std_new_handler_t)(); #if (defined(__GNUC__) || defined(__clang__)) @@ -668,7 +663,7 @@ std_new_handler_t mi_get_new_handler() { static bool mi_try_new_handler(bool nothrow) { std_new_handler_t h = mi_get_new_handler(); if (h==NULL) { - if (!nothrow) exit(ENOMEM); + if (!nothrow) exit(ENOMEM); // cannot throw in plain C, use exit as we are out of memory anyway. return false; } else { @@ -718,7 +713,7 @@ void* mi_new_aligned_nothrow(size_t size, size_t alignment) { void* mi_new_n(size_t count, size_t size) { size_t total; - if (mi_unlikely(mi_mul_overflow(count, size, &total))) { + if (mi_unlikely(mi_count_size_overflow(count, size, &total))) { mi_try_new_handler(false); // on overflow we invoke the try_new_handler once to potentially throw std::bad_alloc return NULL; } diff --git a/src/arena.c b/src/arena.c index 7f1a1caf..f20a03e9 100644 --- a/src/arena.c +++ b/src/arena.c @@ -229,18 +229,18 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) { mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[arena_idx])); mi_assert_internal(arena != NULL); if (arena == NULL) { - _mi_fatal_error("trying to free from non-existent arena: %p, size %zu, memid: 0x%zx\n", p, size, memid); + _mi_error_message(EINVAL, "trying to free from non-existent arena: %p, size %zu, memid: 0x%zx\n", p, size, memid); return; } mi_assert_internal(arena->field_count > mi_bitmap_index_field(bitmap_idx)); if (arena->field_count <= mi_bitmap_index_field(bitmap_idx)) { - _mi_fatal_error("trying to free from non-existent arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid); + _mi_error_message(EINVAL, "trying to free from non-existent arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid); return; } const size_t blocks = mi_block_count_of_size(size); bool ones = mi_bitmap_unclaim(arena->blocks_inuse, arena->field_count, blocks, bitmap_idx); if (!ones) { - _mi_fatal_error("trying to free an already freed block: %p, size %zu\n", p, size); + _mi_error_message(EAGAIN, "trying to free an already freed block: %p, size %zu\n", p, size); return; }; } diff --git a/src/init.c b/src/init.c index b8422c2f..18a18f60 100644 --- a/src/init.c +++ b/src/init.c @@ -157,7 +157,7 @@ static bool _mi_heap_init(void) { // use `_mi_os_alloc` to allocate directly from the OS mi_thread_data_t* td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t),&_mi_stats_main); // Todo: more efficient allocation? if (td == NULL) { - _mi_error_message("failed to allocate thread local heap memory\n"); + _mi_error_message(ENOMEM, "failed to allocate thread local heap memory\n"); return false; } mi_tld_t* tld = &td->tld; diff --git a/src/options.c b/src/options.c index c12c77e0..b06cbdb4 100644 --- a/src/options.c +++ b/src/options.c @@ -287,14 +287,10 @@ void _mi_verbose_message(const char* fmt, ...) { va_end(args); } -void _mi_error_message(const char* fmt, ...) { +static void mi_show_error_message(const char* fmt, va_list args) { if (!mi_option_is_enabled(mi_option_show_errors) && !mi_option_is_enabled(mi_option_verbose)) return; if (mi_atomic_increment(&error_count) > mi_max_error_count) return; - va_list args; - va_start(args,fmt); - mi_vfprintf(NULL, NULL, "mimalloc: error: ", fmt, args); - va_end(args); - mi_assert(false); + mi_vfprintf(NULL, NULL, "mimalloc: error: ", fmt, args); } void _mi_warning_message(const char* fmt, ...) { @@ -314,14 +310,40 @@ void _mi_assert_fail(const char* assertion, const char* fname, unsigned line, co } #endif -mi_attr_noreturn void _mi_fatal_error(const char* fmt, ...) { +// -------------------------------------------------------- +// Errors +// -------------------------------------------------------- + +static mi_error_fun* volatile mi_error_handler; // = NULL +static volatile _Atomic(void*) mi_error_arg; // = NULL + +static void mi_error_default(int err) { + UNUSED(err); +#if (MI_SECURE>0) + if (err==EFAULT) { // abort on serious errors in secure mode (corrupted meta-data) + abort(); + } +#endif +} + +void mi_register_error(mi_error_fun* fun, void* arg) { + mi_error_handler = fun; // can be NULL + mi_atomic_write_ptr(&mi_error_arg, arg); +} + +void _mi_error_message(int err, const char* fmt, ...) { + // show detailed error message va_list args; va_start(args, fmt); - mi_vfprintf(NULL, NULL, "mimalloc: fatal: ", fmt, args); + mi_show_error_message(fmt, args); va_end(args); - #if (MI_SECURE>=0) - abort(); - #endif + // and call the error handler which may abort (or return normally) + if (mi_error_handler != NULL) { + mi_error_handler(err, mi_atomic_read_ptr(&mi_error_arg)); + } + else { + mi_error_default(err); + } } // -------------------------------------------------------- diff --git a/src/os.c b/src/os.c index b5bd0ad9..be507b69 100644 --- a/src/os.c +++ b/src/os.c @@ -13,7 +13,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc-atomic.h" #include // strerror -#include + #if defined(_WIN32) #include @@ -655,7 +655,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ if (err != 0) { err = errno; } #endif if (err != 0) { - _mi_warning_message("%s error: start: 0x%p, csize: 0x%x, err: %i\n", commit ? "commit" : "decommit", start, csize, err); + _mi_warning_message("%s error: start: %p, csize: 0x%x, err: %i\n", commit ? "commit" : "decommit", start, csize, err); mi_mprotect_hint(err); } mi_assert_internal(err == 0); @@ -719,7 +719,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) int err = madvise(start, csize, MADV_DONTNEED); #endif if (err != 0) { - _mi_warning_message("madvise reset error: start: 0x%p, csize: 0x%x, errno: %i\n", start, csize, errno); + _mi_warning_message("madvise reset error: start: %p, csize: 0x%x, errno: %i\n", start, csize, errno); } //mi_assert(err == 0); if (err != 0) return false; @@ -774,7 +774,7 @@ static bool mi_os_protectx(void* addr, size_t size, bool protect) { if (err != 0) { err = errno; } #endif if (err != 0) { - _mi_warning_message("mprotect error: start: 0x%p, csize: 0x%x, err: %i\n", start, csize, err); + _mi_warning_message("mprotect error: start: %p, csize: 0x%x, err: %i\n", start, csize, err); mi_mprotect_hint(err); } return (err == 0); @@ -961,7 +961,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse if (p != addr) { // no success, issue a warning and break if (p != NULL) { - _mi_warning_message("could not allocate contiguous huge page %zu at 0x%p\n", page, addr); + _mi_warning_message("could not allocate contiguous huge page %zu at %p\n", page, addr); _mi_os_free(p, MI_HUGE_OS_PAGE_SIZE, &_mi_stats_main); } break; diff --git a/src/page.c b/src/page.c index 84baf306..d67a44de 100644 --- a/src/page.c +++ b/src/page.c @@ -175,7 +175,7 @@ static void _mi_page_thread_free_collect(mi_page_t* page) } // if `count > max_count` there was a memory corruption (possibly infinite list due to double multi-threaded free) if (count > max_count) { - _mi_fatal_error("corrupted thread-free list\n"); + _mi_error_message(EFAULT, "corrupted thread-free list\n"); return; // the thread-free items cannot be freed } @@ -796,7 +796,8 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_page_t* page; if (mi_unlikely(size > MI_LARGE_OBJ_SIZE_MAX)) { if (mi_unlikely(size > PTRDIFF_MAX)) { // we don't allocate more than PTRDIFF_MAX (see ) - page = NULL; + _mi_error_message(EOVERFLOW, "allocation request is too large (%zu b requested)\n", size); + return NULL; } else { page = mi_huge_page_alloc(heap,size); @@ -806,7 +807,10 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept // otherwise find a page with free blocks in our size segregated queues page = mi_find_free_page(heap,size); } - if (page == NULL) return NULL; // out of memory + if (mi_unlikely(page == NULL)) { // out of memory + _mi_error_message(ENOMEM, "cannot allocate memory (%zu bytes requested)\n", size); + return NULL; + } mi_assert_internal(mi_page_immediate_available(page)); mi_assert_internal(mi_page_block_size(page) >= size); diff --git a/test/test-api.c b/test/test-api.c index 060efc44..68df314e 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -9,7 +9,7 @@ terms of the MIT license. A copy of the license can be found in the file Testing allocators is difficult as bugs may only surface after particular allocation patterns. The main approach to testing _mimalloc_ is therefore to have extensive internal invariant checking (see `page_is_valid` in `page.c` -for example), which is enabled in debug mode with `-DMI_CHECK_FULL=ON`. +for example), which is enabled in debug mode with `-DMI_DEBUG_FULL=ON`. The main testing is then to run `mimalloc-bench` [1] using full invariant checking to catch any potential problems over a wide range of intensive allocation bench marks. @@ -88,6 +88,10 @@ int main() { CHECK_BODY("malloc-null",{ mi_free(NULL); }); + CHECK_BODY("calloc-overflow",{ + // use (size_t)&mi_calloc to get some number without triggering compiler warnings + result = (mi_calloc((size_t)&mi_calloc,SIZE_MAX/1000) == NULL); + }); // --------------------------------------------------- // Extended From 41e717c2e0bdb4e6c5cbd1fd8a0bae3b0afb46d2 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 18 Jan 2020 20:30:12 -0800 Subject: [PATCH 131/179] fix assertion in mi_block_zero_init (issue #194) --- src/alloc.c | 4 ++-- test/test-api.c | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/alloc.c b/src/alloc.c index e605c017..8f98b647 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -92,9 +92,9 @@ extern inline mi_decl_allocator void* mi_malloc(size_t size) mi_attr_noexcept { void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size) { // note: we need to initialize the whole block to zero, not just size // or the recalloc/rezalloc functions cannot safely expand in place (see issue #63) - UNUSED(size); + UNUSED_RELEASE(size); mi_assert_internal(p != NULL); - mi_assert_internal(size > 0 && mi_page_block_size(page) >= size); + mi_assert_internal(size >= 0 && mi_page_block_size(page) >= size); mi_assert_internal(_mi_ptr_page(p)==page); if (page->is_zero) { // already zero initialized memory? diff --git a/test/test-api.c b/test/test-api.c index 68df314e..d7a7be59 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -92,6 +92,9 @@ int main() { // use (size_t)&mi_calloc to get some number without triggering compiler warnings result = (mi_calloc((size_t)&mi_calloc,SIZE_MAX/1000) == NULL); }); + CHECK_BODY("calloc0",{ + result = (mi_usable_size(mi_calloc(0,1000)) >= 0); + }); // --------------------------------------------------- // Extended From e8d7c80c74e7bcc789e2d2dd74e0306f5a0d9b8e Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 19 Jan 2020 17:33:36 -0800 Subject: [PATCH 132/179] fix build warnings on linux --- src/alloc.c | 18 +++++++++--------- test/test-api.c | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/alloc.c b/src/alloc.c index 8f98b647..7fc9023c 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -94,7 +94,7 @@ void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size) { // or the recalloc/rezalloc functions cannot safely expand in place (see issue #63) UNUSED_RELEASE(size); mi_assert_internal(p != NULL); - mi_assert_internal(size >= 0 && mi_page_block_size(page) >= size); + mi_assert_internal(mi_page_block_size(page) >= size); // size can be zero mi_assert_internal(_mi_ptr_page(p)==page); if (page->is_zero) { // already zero initialized memory? @@ -141,7 +141,7 @@ static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, cons static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block) { // The decoded value is in the same page (or NULL). - // Walk the free lists to verify positively if it is already freed + // Walk the free lists to verify positively if it is already freed if (mi_list_contains(page, page->free, block) || mi_list_contains(page, page->local_free, block) || mi_list_contains(page, mi_page_thread_free(page), block)) @@ -343,8 +343,8 @@ void mi_free(void* p) mi_attr_noexcept mi_block_set_next(page, block, page->local_free); page->local_free = block; page->used--; - if (mi_unlikely(mi_page_all_free(page))) { - _mi_page_retire(page); + if (mi_unlikely(mi_page_all_free(page))) { + _mi_page_retire(page); } } else { @@ -695,8 +695,8 @@ void* mi_new_nothrow(size_t size) { void* mi_new_aligned(size_t size, size_t alignment) { void* p; - do { - p = mi_malloc_aligned(size, alignment); + do { + p = mi_malloc_aligned(size, alignment); } while(p == NULL && mi_try_new_handler(false)); return p; @@ -704,8 +704,8 @@ void* mi_new_aligned(size_t size, size_t alignment) { void* mi_new_aligned_nothrow(size_t size, size_t alignment) { void* p; - do { - p = mi_malloc_aligned(size, alignment); + do { + p = mi_malloc_aligned(size, alignment); } while(p == NULL && mi_try_new_handler(true)); return p; @@ -720,4 +720,4 @@ void* mi_new_n(size_t count, size_t size) { else { return mi_new(total); } -} \ No newline at end of file +} diff --git a/test/test-api.c b/test/test-api.c index d7a7be59..a837946f 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -93,7 +93,7 @@ int main() { result = (mi_calloc((size_t)&mi_calloc,SIZE_MAX/1000) == NULL); }); CHECK_BODY("calloc0",{ - result = (mi_usable_size(mi_calloc(0,1000)) >= 0); + result = (mi_usable_size(mi_calloc(0,1000)) <= 16); }); // --------------------------------------------------- From 9d7ac76d93c8995631bb7ed264406a12aa2564d2 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 19 Jan 2020 18:35:45 -0800 Subject: [PATCH 133/179] fix compilation under Intel C compiler (icc) --- CMakeLists.txt | 23 ++++++++++++++++------- include/mimalloc-atomic.h | 6 +++--- test/test-stress.c | 9 ++++++++- 3 files changed, 27 insertions(+), 11 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 27729584..366ffc44 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -54,7 +54,7 @@ endif() # Process options # ----------------------------------------------------------------------------- -if(CMAKE_C_COMPILER_ID MATCHES "MSVC") +if(CMAKE_C_COMPILER_ID MATCHES "MSVC|Intel") set(MI_USE_CXX "ON") endif() @@ -96,25 +96,34 @@ endif() if(MI_USE_CXX MATCHES "ON") message(STATUS "Use the C++ compiler to compile (MI_USE_CXX=ON)") set_source_files_properties(${mi_sources} PROPERTIES LANGUAGE CXX ) - set_source_files_properties(src/static.c test/test-api.c PROPERTIES LANGUAGE CXX ) + set_source_files_properties(src/static.c test/test-api.c test/test-stress PROPERTIES LANGUAGE CXX ) if(CMAKE_CXX_COMPILER_ID MATCHES "AppleClang|Clang") list(APPEND mi_cflags -Wno-deprecated) endif() + if(CMAKE_CXX_COMPILER_ID MATCHES "Intel") + list(APPEND mi_cflags -Kc++) + endif() endif() # Compiler flags if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU") list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas) + if(CMAKE_C_COMPILER_ID MATCHES "GNU") + list(APPEND mi_cflags -Wno-invalid-memory-model) + list(APPEND mi_cflags -fvisibility=hidden) + endif() +endif() + +if(CMAKE_C_COMPILER_ID MATCHES "Intel") + list(APPEND mi_cflags -Wall -fvisibility=hidden) +endif() + +if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel") if(MI_LOCAL_DYNAMIC_TLS MATCHES "ON") list(APPEND mi_cflags -ftls-model=local-dynamic) else() list(APPEND mi_cflags -ftls-model=initial-exec) endif() - if(CMAKE_C_COMPILER_ID MATCHES "GNU") - list(APPEND mi_cflags -Wno-invalid-memory-model) - list(APPEND mi_cflags -fvisibility=hidden) - list(APPEND mi_cflags -fbranch-target-load-optimize) - endif() endif() # extra needed libraries diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h index ecdfba0d..5d140f0c 100644 --- a/include/mimalloc-atomic.h +++ b/include/mimalloc-atomic.h @@ -9,7 +9,7 @@ terms of the MIT license. A copy of the license can be found in the file #define MIMALLOC_ATOMIC_H // ------------------------------------------------------ -// Atomics +// Atomics // We need to be portable between C, C++, and MSVC. // ------------------------------------------------------ @@ -29,7 +29,7 @@ terms of the MIT license. A copy of the license can be found in the file // Atomic operations specialized for mimalloc // ------------------------------------------------------ -// Atomically add a 64-bit value; returns the previous value. +// Atomically add a 64-bit value; returns the previous value. // Note: not using _Atomic(int64_t) as it is only used for statistics. static inline void mi_atomic_add64(volatile int64_t* p, int64_t add); @@ -43,7 +43,7 @@ static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x); -// Atomically compare and exchange a value; returns `true` if successful. +// Atomically compare and exchange a value; returns `true` if successful. // May fail spuriously. Memory ordering as release on success, and relaxed on failure. // (Note: expected and desired are in opposite order from atomic_compare_exchange) static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected); diff --git a/test/test-stress.c b/test/test-stress.c index 42628d7c..83f9b87b 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -255,7 +255,6 @@ static void* atomic_exchange_ptr(volatile void** p, void* newval) { #else #include -#include static void* thread_entry(void* param) { stress((uintptr_t)param); @@ -275,8 +274,16 @@ static void run_os_threads(size_t nthreads) { custom_free(threads); } +#ifdef __cplusplus +#include +static void* atomic_exchange_ptr(volatile void** p, void* newval) { + return std::atomic_exchange_explicit((volatile std::atomic*)p, newval, std::memory_order_acquire); +} +#else +#include static void* atomic_exchange_ptr(volatile void** p, void* newval) { return atomic_exchange_explicit((volatile _Atomic(void*)*)p, newval, memory_order_acquire); } +#endif #endif From 514b3152839d6a5524d64c7a00f88875c9d5ec3f Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 19 Jan 2020 21:27:46 -0800 Subject: [PATCH 134/179] add max_size member to STL allocator --- include/mimalloc.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/mimalloc.h b/include/mimalloc.h index 1250314c..add1c550 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -351,6 +351,7 @@ mi_decl_export void* mi_new_aligned_nothrow(size_t size, size_t alignment) mi_at #if (__cplusplus >= 201103L) || (_MSC_VER > 1900) // C++11 #include // true_type +#include // PTRDIFF_MAX #endif template struct mi_stl_allocator { @@ -360,6 +361,7 @@ template struct mi_stl_allocator { using propagate_on_container_move_assignment = std::true_type; using propagate_on_container_swap = std::true_type; using is_always_equal = std::true_type; + size_t max_size() const noexcept { return (PTRDIFF_MAX / sizeof(value_type)); } #endif mi_stl_allocator() mi_attr_noexcept { } mi_stl_allocator(const mi_stl_allocator& ) mi_attr_noexcept { } From a33ebb8625fde438f61a5bddd0f71fa9adb7acb2 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 19 Jan 2020 22:14:35 -0800 Subject: [PATCH 135/179] add alloc_align attribute to aligned allocation functions --- include/mimalloc.h | 48 +++++++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index add1c550..153e11c7 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -37,32 +37,40 @@ terms of the MIT license. A copy of the license can be found in the file #else #define mi_decl_allocator __declspec(restrict) #endif + #define mi_cdecl __cdecl #define mi_decl_thread __declspec(thread) #define mi_attr_malloc #define mi_attr_alloc_size(s) #define mi_attr_alloc_size2(s1,s2) - #define mi_cdecl __cdecl + #define mi_attr_alloc_align(p) #elif defined(__GNUC__) || defined(__clang__) + #define mi_cdecl // leads to warnings... __attribute__((cdecl)) #define mi_decl_thread __thread #define mi_decl_export __attribute__((visibility("default"))) #define mi_decl_allocator #define mi_attr_malloc __attribute__((malloc)) - #if defined(__clang_major__) && (__clang_major__ < 4) + #if (defined(__clang_major__) && (__clang_major__ < 4)) || (__GNUC__ < 5) #define mi_attr_alloc_size(s) #define mi_attr_alloc_size2(s1,s2) + #define mi_attr_alloc_align(p) + #elif defined(__INTEL_COMPILER) + #define mi_attr_alloc_size(s) __attribute__((alloc_size(s))) + #define mi_attr_alloc_size2(s1,s2) __attribute__((alloc_size(s1,s2))) + #define mi_attr_alloc_align(p) #else #define mi_attr_alloc_size(s) __attribute__((alloc_size(s))) #define mi_attr_alloc_size2(s1,s2) __attribute__((alloc_size(s1,s2))) + #define mi_attr_alloc_align(p) __attribute__((alloc_align(p))) #endif - #define mi_cdecl // leads to warnings... __attribute__((cdecl)) #else + #define mi_cdecl #define mi_decl_thread __thread #define mi_decl_export #define mi_decl_allocator #define mi_attr_malloc #define mi_attr_alloc_size(s) #define mi_attr_alloc_size2(s1,s2) - #define mi_cdecl + #define mi_attr_alloc_align(p) #endif // ------------------------------------------------------ @@ -140,13 +148,13 @@ mi_decl_export void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_ // allocation, but unfortunately this differs from `posix_memalign` and `aligned_alloc`. // ------------------------------------------------------------------------------------- -mi_decl_export mi_decl_allocator void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_export mi_decl_allocator void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); mi_decl_export mi_decl_allocator void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_export mi_decl_allocator void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_export mi_decl_allocator void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); mi_decl_export mi_decl_allocator void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_export mi_decl_allocator void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2); +mi_decl_export mi_decl_allocator void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2) mi_attr_alloc_align(3); mi_decl_export mi_decl_allocator void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2); -mi_decl_export mi_decl_allocator void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_export mi_decl_allocator void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(3); mi_decl_export mi_decl_allocator void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); @@ -178,13 +186,13 @@ mi_decl_export char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_noex mi_decl_export char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n) mi_attr_noexcept; mi_decl_export char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept; -mi_decl_export mi_decl_allocator void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_export mi_decl_allocator void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(3); mi_decl_export mi_decl_allocator void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); -mi_decl_export mi_decl_allocator void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_export mi_decl_allocator void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(3); mi_decl_export mi_decl_allocator void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); -mi_decl_export mi_decl_allocator void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3); +mi_decl_export mi_decl_allocator void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3) mi_attr_alloc_align(4); mi_decl_export mi_decl_allocator void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3); -mi_decl_export mi_decl_allocator void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3); +mi_decl_export mi_decl_allocator void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3) mi_attr_alloc_align(4); mi_decl_export mi_decl_allocator void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3); @@ -198,17 +206,17 @@ mi_decl_export mi_decl_allocator void* mi_heap_realloc_aligned_at(mi_heap_t* hea mi_decl_export mi_decl_allocator void* mi_rezalloc(void* p, size_t newsize) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); mi_decl_export mi_decl_allocator void* mi_recalloc(void* p, size_t newcount, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2,3); -mi_decl_export mi_decl_allocator void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_export mi_decl_allocator void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(3); mi_decl_export mi_decl_allocator void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); -mi_decl_export mi_decl_allocator void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2,3); +mi_decl_export mi_decl_allocator void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2,3) mi_attr_alloc_align(4); mi_decl_export mi_decl_allocator void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2,3); mi_decl_export mi_decl_allocator void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3); mi_decl_export mi_decl_allocator void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t newcount, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(3,4); -mi_decl_export mi_decl_allocator void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3); +mi_decl_export mi_decl_allocator void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3) mi_attr_alloc_align(4); mi_decl_export mi_decl_allocator void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3); -mi_decl_export mi_decl_allocator void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(3,4); +mi_decl_export mi_decl_allocator void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(3,4) mi_attr_alloc_align(5); mi_decl_export mi_decl_allocator void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(3,4); @@ -314,11 +322,11 @@ mi_decl_export void mi_cfree(void* p) mi_attr_noexcept; mi_decl_export void* mi__expand(void* p, size_t newsize) mi_attr_noexcept; mi_decl_export int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept; -mi_decl_export void* mi_memalign(size_t alignment, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_export void* mi_memalign(size_t alignment, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(1); mi_decl_export void* mi_valloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); mi_decl_export void* mi_pvalloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_export void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_export void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(1); mi_decl_export void* mi_reallocarray(void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2,3); mi_decl_export void* mi_aligned_recalloc(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept; @@ -335,9 +343,9 @@ mi_decl_export void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept; mi_decl_export void* mi_new(size_t size) mi_attr_malloc mi_attr_alloc_size(1); mi_decl_export void* mi_new_n(size_t count, size_t size) mi_attr_malloc mi_attr_alloc_size2(1,2); -mi_decl_export void* mi_new_aligned(size_t size, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_export void* mi_new_aligned(size_t size, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); mi_decl_export void* mi_new_nothrow(size_t size) mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_export void* mi_new_aligned_nothrow(size_t size, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_export void* mi_new_aligned_nothrow(size_t size, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); #ifdef __cplusplus } From b77be05e4001debdcdcdc27d82bebb6b04faac11 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 20 Jan 2020 12:14:34 -0800 Subject: [PATCH 136/179] only collect retired at fresh page allocation --- src/page.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/page.c b/src/page.c index d67a44de..7840a590 100644 --- a/src/page.c +++ b/src/page.c @@ -234,7 +234,7 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE); mi_assert_internal(!page->is_reset); mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page)); - mi_page_queue_push(heap, pq, page); + mi_page_queue_push(heap, pq, page); mi_assert_expensive(_mi_page_is_valid(page)); } @@ -408,7 +408,7 @@ void _mi_page_retire(mi_page_t* page) { if (mi_likely(page->xblock_size <= MI_SMALL_SIZE_MAX && !mi_page_is_in_full(page))) { if (pq->last==page && pq->first==page) { // the only page in the queue? mi_stat_counter_increase(_mi_stats_main.page_no_retire,1); - page->retire_expire = 4; + page->retire_expire = 16; mi_assert_internal(mi_page_all_free(page)); return; // dont't free after all } @@ -514,7 +514,7 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co mi_assert_internal(page->capacity + extend <= page->reserved); mi_assert_internal(bsize == mi_page_block_size(page)); void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL ); - + mi_block_t* const start = mi_page_block_at(page, page_area, bsize, page->capacity); // initialize a sequential free list @@ -678,6 +678,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p mi_stat_counter_increase(heap->tld->stats.searches, count); if (page == NULL) { + _mi_heap_collect_retired(heap, false); // perhaps make a page available page = mi_page_fresh(heap, pq); } else { @@ -686,8 +687,6 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p } mi_assert_internal(page == NULL || mi_page_immediate_available(page)); - // finally collect retired pages - _mi_heap_collect_retired(heap, false); return page; } From 146899af8aad7ee3b447f1d0ffe4939f8e3bcd88 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 20 Jan 2020 15:27:05 -0800 Subject: [PATCH 137/179] add missing members to stl allocator (#193) --- include/mimalloc.h | 51 +++++++++++++++++++++++++++++------------- test/main-override.cpp | 16 +++++++++++++ test/test-api.c | 8 +++---- 3 files changed, 56 insertions(+), 19 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 153e11c7..97c26e18 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -357,29 +357,50 @@ mi_decl_export void* mi_new_aligned_nothrow(size_t size, size_t alignment) mi_at // --------------------------------------------------------------------------------------------- #ifdef __cplusplus +#include // std::numeric_limits #if (__cplusplus >= 201103L) || (_MSC_VER > 1900) // C++11 -#include // true_type -#include // PTRDIFF_MAX +#include // std::true_type +#include // std::forward #endif template struct mi_stl_allocator { - typedef T value_type; - #if (__cplusplus >= 201103L) || (_MSC_VER > 1900) // C++11 + typedef T value_type; + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; + typedef value_type& reference; + typedef value_type const& const_reference; + typedef value_type* pointer; + typedef value_type const* const_pointer; + template struct rebind { typedef mi_stl_allocator other; }; + + mi_stl_allocator() mi_attr_noexcept { } + mi_stl_allocator(const mi_stl_allocator&) mi_attr_noexcept { } + template mi_stl_allocator(const mi_stl_allocator&) mi_attr_noexcept { } + mi_stl_allocator select_on_container_copy_construction() const { return *this; } + void deallocate(T* p, size_type) { mi_free(p); } + + #if (__cplusplus >= 201703L) // C++17 + T* allocate(size_type count) { return static_cast(mi_new_n(count, sizeof(T))); } + T* allocate(size_type count, const void*) { return allocate(count); } + #else + pointer allocate(size_type count, const void* = 0) { return static_cast(mi_new_n(count, sizeof(value_type))); } + #endif + + #if ((__cplusplus >= 201103L) || (_MSC_VER > 1900)) // C++11 using propagate_on_container_copy_assignment = std::true_type; using propagate_on_container_move_assignment = std::true_type; - using propagate_on_container_swap = std::true_type; - using is_always_equal = std::true_type; - size_t max_size() const noexcept { return (PTRDIFF_MAX / sizeof(value_type)); } - #endif - mi_stl_allocator() mi_attr_noexcept { } - mi_stl_allocator(const mi_stl_allocator& ) mi_attr_noexcept { } - template mi_stl_allocator(const mi_stl_allocator& ) mi_attr_noexcept { } - void deallocate(T* p, size_t /* count */) { mi_free(p); } - #if (__cplusplus >= 201703L) // C++17 - T* allocate(size_t count) { return (T*)mi_new_n(count, sizeof(T)); } + using propagate_on_container_swap = std::true_type; + using is_always_equal = std::true_type; + template void construct(U* p, Args&& ...args) { ::new(p) U(std::forward(args)...); } + template void destroy(U* p) mi_attr_noexcept { p->~U(); } #else - T* allocate(size_t count, const void* hint = 0) { (void)hint; return (T*)mi_new_n(count, sizeof(T)); } + void construct(pointer p, value_type const& val) { ::new(p) value_type(val); } + void destroy(pointer p) { p->~value_type(); } #endif + + size_type max_size() const mi_attr_noexcept { return (std::numeric_limits::max() / sizeof(value_type)); } + pointer address(reference x) const { return &x; } + const_pointer address(const_reference x) const { return &x; } }; template bool operator==(const mi_stl_allocator& , const mi_stl_allocator& ) mi_attr_noexcept { return true; } diff --git a/test/main-override.cpp b/test/main-override.cpp index f7a7f1bd..d082ade3 100644 --- a/test/main-override.cpp +++ b/test/main-override.cpp @@ -6,6 +6,7 @@ #include #include +#include static void* p = malloc(8); @@ -69,3 +70,18 @@ public: static Static s = Static(); +bool test_stl_allocator1() { + std::vector> vec; + vec.push_back(1); + vec.pop_back(); + return vec.size() == 0; +} + +bool test_stl_allocator2() { + struct some_struct { int i; int j; double z; }; + + std::vector> vec; + vec.push_back(some_struct()); + vec.pop_back(); + return vec.size() == 0; +} \ No newline at end of file diff --git a/test/test-api.c b/test/test-api.c index a837946f..95891754 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -202,7 +202,7 @@ bool test_heap2() { bool test_stl_allocator1() { #ifdef __cplusplus - std::vector> vec; + std::vector > vec; vec.push_back(1); vec.pop_back(); return vec.size() == 0; @@ -211,11 +211,11 @@ bool test_stl_allocator1() { #endif } +struct some_struct { int i; int j; double z; }; + bool test_stl_allocator2() { #ifdef __cplusplus - struct some_struct { int i; int j; double z; }; - - std::vector> vec; + std::vector > vec; vec.push_back(some_struct()); vec.pop_back(); return vec.size() == 0; From 3957b2fd28e95e9dcc787ccda320abee412ac82e Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 20 Jan 2020 15:41:56 -0800 Subject: [PATCH 138/179] add mi_new_realloc(n) to support C++ style reallocation that raises std::bad_alloc on out-of-memory --- include/mimalloc.h | 6 +++++- src/alloc.c | 19 +++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 97c26e18..3861ad4f 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -341,11 +341,15 @@ mi_decl_export void mi_free_size(void* p, size_t size) mi_attr_noexcept; mi_decl_export void mi_free_size_aligned(void* p, size_t size, size_t alignment) mi_attr_noexcept; mi_decl_export void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept; +// The `mi_new` wrappers implement C++ semantics on out-of-memory instead of directly returning `NULL`. +// (and call `std::get_new_handler` and potentially raise a `std::bad_alloc` exception). mi_decl_export void* mi_new(size_t size) mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_export void* mi_new_n(size_t count, size_t size) mi_attr_malloc mi_attr_alloc_size2(1,2); mi_decl_export void* mi_new_aligned(size_t size, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); mi_decl_export void* mi_new_nothrow(size_t size) mi_attr_malloc mi_attr_alloc_size(1); mi_decl_export void* mi_new_aligned_nothrow(size_t size, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); +mi_decl_export void* mi_new_n(size_t count, size_t size) mi_attr_malloc mi_attr_alloc_size2(1, 2); +mi_decl_export void* mi_new_realloc(void* p, size_t newsize) mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_export void* mi_new_reallocn(void* p, size_t newcount, size_t size) mi_attr_malloc mi_attr_alloc_size2(2, 3); #ifdef __cplusplus } diff --git a/src/alloc.c b/src/alloc.c index 7fc9023c..20339204 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -721,3 +721,22 @@ void* mi_new_n(size_t count, size_t size) { return mi_new(total); } } + +void* mi_new_realloc(void* p, size_t newsize) { + void* q; + do { + q = mi_realloc(p, newsize); + } while (q == NULL && mi_try_new_handler(false)); + return q; +} + +void* mi_new_reallocn(void* p, size_t newcount, size_t size) { + size_t total; + if (mi_unlikely(mi_count_size_overflow(newcount, size, &total))) { + mi_try_new_handler(false); // on overflow we invoke the try_new_handler once to potentially throw std::bad_alloc + return NULL; + } + else { + return mi_new_realloc(p, total); + } +} From 5bc1c52ae6e83bc65c506d682bf732507b3e0f61 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 20 Jan 2020 17:34:29 -0800 Subject: [PATCH 139/179] update documentation --- doc/mimalloc-doc.h | 49 +++- docs/annotated.html | 1 + docs/annotated_dup.js | 3 +- docs/classes.html | 8 +- docs/group__cpp.html | 396 +++++++++++++++++++++++++++++++ docs/group__cpp.js | 11 + docs/group__posix.html | 169 ------------- docs/group__posix.js | 5 - docs/mimalloc-doc_8h_source.html | 15 +- docs/modules.html | 1 + docs/modules.js | 3 +- docs/navtreeindex0.js | 22 +- docs/search/all_3.js | 3 +- docs/search/all_6.js | 13 +- docs/search/classes_0.js | 3 +- docs/search/functions_0.js | 12 +- docs/search/groups_2.js | 2 +- docs/search/groups_3.js | 3 +- docs/search/groups_4.js | 3 +- docs/search/groups_5.js | 2 +- docs/search/groups_6.js | 2 +- docs/search/groups_7.js | 2 +- docs/search/groups_8.html | 30 +++ docs/search/groups_8.js | 4 + docs/search/searchdata.js | 2 +- docs/using.html | 2 +- test/main-override.cpp | 8 +- 27 files changed, 547 insertions(+), 227 deletions(-) create mode 100644 docs/group__cpp.html create mode 100644 docs/group__cpp.js create mode 100644 docs/search/groups_8.html create mode 100644 docs/search/groups_8.js diff --git a/doc/mimalloc-doc.h b/doc/mimalloc-doc.h index ca744e4c..3f24a623 100644 --- a/doc/mimalloc-doc.h +++ b/doc/mimalloc-doc.h @@ -74,6 +74,8 @@ Further information: - \ref typed - \ref analysis - \ref options +- \ref posix +- \ref cpp */ @@ -622,7 +624,10 @@ void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, siz /// \defgroup typed Typed Macros /// -/// Typed allocation macros +/// Typed allocation macros. For example: +/// ``` +/// int* p = mi_malloc_tp(int) +/// ``` /// /// \{ @@ -805,21 +810,51 @@ void mi_free_size(void* p, size_t size); void mi_free_size_aligned(void* p, size_t size, size_t alignment); void mi_free_aligned(void* p, size_t alignment); -/// raise `std::bad_alloc` exception on failure. +/// \} + +/// \defgroup cpp C++ wrappers +/// +/// `mi_` prefixed implementations of various allocation functions +/// that use C++ semantics on out-of-memory, generally calling +/// `std::get_new_handler` and raising a `std::bad_alloc` exception on failure. +/// +/// Note: use the `mimalloc-new-delete.h` header to override the \a new +/// and \a delete operators globally. The wrappers here are mostly +/// for convience for library writers that need to interface with +/// mimalloc from C++. +/// +/// \{ + +/// like mi_malloc(), but when out of memory, use `std::get_new_handler` and raise `std::bad_alloc` exception on failure. void* mi_new(std::size_t n) noexcept(false); -/// raise `std::bad_alloc` exception on failure or overflow. +/// like mi_mallocn(), but when out of memory, use `std::get_new_handler` and raise `std::bad_alloc` exception on failure. void* mi_new_n(size_t count, size_t size) noexcept(false); -/// raise `std::bad_alloc` exception on failure. +/// like mi_malloc_aligned(), but when out of memory, use `std::get_new_handler` and raise `std::bad_alloc` exception on failure. void* mi_new_aligned(std::size_t n, std::align_val_t alignment) noexcept(false); -/// return `NULL` on failure. +/// like `mi_malloc`, but when out of memory, use `std::get_new_handler` but return \a NULL on failure. void* mi_new_nothrow(size_t n); -`` -/// return `NULL` on failure. + +/// like `mi_malloc_aligned`, but when out of memory, use `std::get_new_handler` but return \a NULL on failure. void* mi_new_aligned_nothrow(size_t n, size_t alignment); +/// like mi_realloc(), but when out of memory, use `std::get_new_handler` and raise `std::bad_alloc` exception on failure. +void* mi_new_realloc(void* p, size_t newsize); + +/// like mi_reallocn(), but when out of memory, use `std::get_new_handler` and raise `std::bad_alloc` exception on failure. +void* mi_new_reallocn(void* p, size_t newcount, size_t size); + +/// \a std::allocator implementation for mimalloc for use in STL containers. +/// For example: +/// ``` +/// std::vector > vec; +/// vec.push_back(1); +/// vec.pop_back(); +/// ``` +template struct mi_stl_allocator { } + /// \} /*! \page build Building diff --git a/docs/annotated.html b/docs/annotated.html index 4d2a8bcc..5120b803 100644 --- a/docs/annotated.html +++ b/docs/annotated.html @@ -105,6 +105,7 @@ $(document).ready(function(){initNavTree('annotated.html','');});
Here are the data structures with brief descriptions:
+
 Cmi_heap_area_tAn area of heap space contains blocks of a single size
 Cmi_stl_allocatorstd::allocator implementation for mimalloc for use in STL containers
diff --git a/docs/annotated_dup.js b/docs/annotated_dup.js index 6ed68bc3..67229123 100644 --- a/docs/annotated_dup.js +++ b/docs/annotated_dup.js @@ -1,4 +1,5 @@ var annotated_dup = [ - [ "mi_heap_area_t", "group__analysis.html#structmi__heap__area__t", "group__analysis_structmi__heap__area__t" ] + [ "mi_heap_area_t", "group__analysis.html#structmi__heap__area__t", "group__analysis_structmi__heap__area__t" ], + [ "mi_stl_allocator", "group__cpp.html#structmi__stl__allocator", null ] ]; \ No newline at end of file diff --git a/docs/classes.html b/docs/classes.html index e5ea3ea8..de960fb6 100644 --- a/docs/classes.html +++ b/docs/classes.html @@ -105,10 +105,10 @@ $(document).ready(function(){initNavTree('classes.html','');}); - - - + + + +
  m  
-
mi_heap_area_t   
mi_stl_allocator   
mi_heap_area_t   
diff --git a/docs/group__cpp.html b/docs/group__cpp.html new file mode 100644 index 00000000..caf758a8 --- /dev/null +++ b/docs/group__cpp.html @@ -0,0 +1,396 @@ + + + + + + + +mi-malloc: C++ wrappers + + + + + + + + + + + + + + + + +
+
+ + + + + + + + +
+
mi-malloc +  1.4 +
+
+ + + + + + +
+
+
+ + + +
+
+ +
+
+
+ +
+ +
+
+ + +
+ +
+ +
+ +
+
C++ wrappers
+
+
+ +

mi_ prefixed implementations of various allocation functions that use C++ semantics on out-of-memory, generally calling std::get_new_handler and raising a std::bad_alloc exception on failure. +More...

+ + + + + +

+Data Structures

struct  mi_stl_allocator< T >
 std::allocator implementation for mimalloc for use in STL containers. More...
 
+ + + + + + + + + + + + + + + + + + + + + + +

+Functions

void * mi_new (std::size_t n) noexcept(false)
 like mi_malloc(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exception on failure. More...
 
void * mi_new_n (size_t count, size_t size) noexcept(false)
 like mi_mallocn(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exception on failure. More...
 
void * mi_new_aligned (std::size_t n, std::align_val_t alignment) noexcept(false)
 like mi_malloc_aligned(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exception on failure. More...
 
void * mi_new_nothrow (size_t n)
 like mi_malloc, but when out of memory, use std::get_new_handler but return NULL on failure. More...
 
void * mi_new_aligned_nothrow (size_t n, size_t alignment)
 like mi_malloc_aligned, but when out of memory, use std::get_new_handler but return NULL on failure. More...
 
void * mi_new_realloc (void *p, size_t newsize)
 like mi_realloc(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exception on failure. More...
 
void * mi_new_reallocn (void *p, size_t newcount, size_t size)
 like mi_reallocn(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exception on failure. More...
 
+

Detailed Description

+

mi_ prefixed implementations of various allocation functions that use C++ semantics on out-of-memory, generally calling std::get_new_handler and raising a std::bad_alloc exception on failure.

+

Note: use the mimalloc-new-delete.h header to override the new and delete operators globally. The wrappers here are mostly for convience for library writers that need to interface with mimalloc from C++.

+

Data Structure Documentation

+ +

◆ mi_stl_allocator

+ +
+
+ + + + +
struct mi_stl_allocator
+
+

template<class T>
+struct mi_stl_allocator< T >

+ +

std::allocator implementation for mimalloc for use in STL containers.

+

For example:

std::vector<int, mi_stl_allocator<int> > vec;
vec.push_back(1);
vec.pop_back();
+
+
+

Function Documentation

+ +

◆ mi_new()

+ +
+
+ + + + + +
+ + + + + + + + +
void* mi_new (std::size_t n)
+
+noexcept
+
+ +

like mi_malloc(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exception on failure.

+ +
+
+ +

◆ mi_new_aligned()

+ +
+
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
void* mi_new_aligned (std::size_t n,
std::align_val_t alignment 
)
+
+noexcept
+
+ +

like mi_malloc_aligned(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exception on failure.

+ +
+
+ +

◆ mi_new_aligned_nothrow()

+ +
+
+ + + + + + + + + + + + + + + + + + +
void* mi_new_aligned_nothrow (size_t n,
size_t alignment 
)
+
+ +

like mi_malloc_aligned, but when out of memory, use std::get_new_handler but return NULL on failure.

+ +
+
+ +

◆ mi_new_n()

+ +
+
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
void* mi_new_n (size_t count,
size_t size 
)
+
+noexcept
+
+ +

like mi_mallocn(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exception on failure.

+ +
+
+ +

◆ mi_new_nothrow()

+ +
+
+ + + + + + + + +
void* mi_new_nothrow (size_t n)
+
+ +

like mi_malloc, but when out of memory, use std::get_new_handler but return NULL on failure.

+ +
+
+ +

◆ mi_new_realloc()

+ +
+
+ + + + + + + + + + + + + + + + + + +
void* mi_new_realloc (void * p,
size_t newsize 
)
+
+ +

like mi_realloc(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exception on failure.

+ +
+
+ +

◆ mi_new_reallocn()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + +
void* mi_new_reallocn (void * p,
size_t newcount,
size_t size 
)
+
+ +

like mi_reallocn(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exception on failure.

+ +
+
+
+
+ + + + diff --git a/docs/group__cpp.js b/docs/group__cpp.js new file mode 100644 index 00000000..20706646 --- /dev/null +++ b/docs/group__cpp.js @@ -0,0 +1,11 @@ +var group__cpp = +[ + [ "mi_stl_allocator", "group__cpp.html#structmi__stl__allocator", null ], + [ "mi_new", "group__cpp.html#gaad048a9fce3d02c5909cd05c6ec24545", null ], + [ "mi_new_aligned", "group__cpp.html#gaef2c2bdb4f70857902d3c8903ac095f3", null ], + [ "mi_new_aligned_nothrow", "group__cpp.html#gab5e29558926d934c3f1cae8c815f942c", null ], + [ "mi_new_n", "group__cpp.html#gae7bc4f56cd57ed3359060ff4f38bda81", null ], + [ "mi_new_nothrow", "group__cpp.html#gaeaded64eda71ed6b1d569d3e723abc4a", null ], + [ "mi_new_realloc", "group__cpp.html#gaab78a32f55149e9fbf432d5288e38e1e", null ], + [ "mi_new_reallocn", "group__cpp.html#ga756f4b2bc6a7ecd0a90baea8e90c7907", null ] +]; \ No newline at end of file diff --git a/docs/group__posix.html b/docs/group__posix.html index eaa4a10f..1aea8dc8 100644 --- a/docs/group__posix.html +++ b/docs/group__posix.html @@ -137,21 +137,6 @@ Functions   void mi_free_aligned (void *p, size_t alignment)   -void * mi_new (std::size_t n) noexcept(false) - raise std::bad_alloc exception on failure. More...
-  -void * mi_new_n (size_t count, size_t size) noexcept(false) - raise std::bad_alloc exception on failure or overflow. More...
-  -void * mi_new_aligned (std::size_t n, std::align_val_t alignment) noexcept(false) - raise std::bad_alloc exception on failure. More...
-  -void * mi_new_nothrow (size_t n) - return NULL on failure. More...
-  -void * mi_new_aligned_nothrow (size_t n, size_t alignment) - return NULL on failure. More...

Detailed Description

mi_ prefixed implementations of various Posix, Unix, and C++ allocation functions.

@@ -391,160 +376,6 @@ Functions
-
-
- -

◆ mi_new()

- -
-
- - - - - -
- - - - - - - - -
void* mi_new (std::size_t n)
-
-noexcept
-
- -

raise std::bad_alloc exception on failure.

- -
-
- -

◆ mi_new_aligned()

- -
-
- - - - - -
- - - - - - - - - - - - - - - - - - -
void* mi_new_aligned (std::size_t n,
std::align_val_t alignment 
)
-
-noexcept
-
- -

raise std::bad_alloc exception on failure.

- -
-
- -

◆ mi_new_aligned_nothrow()

- -
-
- - - - - - - - - - - - - - - - - - -
void* mi_new_aligned_nothrow (size_t n,
size_t alignment 
)
-
- -

return NULL on failure.

- -
-
- -

◆ mi_new_n()

- -
-
- - - - - -
- - - - - - - - - - - - - - - - - - -
void* mi_new_n (size_t count,
size_t size 
)
-
-noexcept
-
- -

raise std::bad_alloc exception on failure or overflow.

- -
-
- -

◆ mi_new_nothrow()

- -
-
- - - - - - - - -
void* mi_new_nothrow (size_t n)
-
- -

return NULL on failure.

-
diff --git a/docs/group__posix.js b/docs/group__posix.js index 0f2b895d..e43453d9 100644 --- a/docs/group__posix.js +++ b/docs/group__posix.js @@ -9,11 +9,6 @@ var group__posix = [ "mi_malloc_size", "group__posix.html#ga4531c9e775bb3ae12db57c1ba8a5d7de", null ], [ "mi_malloc_usable_size", "group__posix.html#ga06d07cf357bbac5c73ba5d0c0c421e17", null ], [ "mi_memalign", "group__posix.html#gaab7fa71ea93b96873f5d9883db57d40e", null ], - [ "mi_new", "group__posix.html#gaad048a9fce3d02c5909cd05c6ec24545", null ], - [ "mi_new_aligned", "group__posix.html#gaef2c2bdb4f70857902d3c8903ac095f3", null ], - [ "mi_new_aligned_nothrow", "group__posix.html#gab5e29558926d934c3f1cae8c815f942c", null ], - [ "mi_new_n", "group__posix.html#gae7bc4f56cd57ed3359060ff4f38bda81", null ], - [ "mi_new_nothrow", "group__posix.html#gaeaded64eda71ed6b1d569d3e723abc4a", null ], [ "mi_posix_memalign", "group__posix.html#gacff84f226ba9feb2031b8992e5579447", null ], [ "mi_pvalloc", "group__posix.html#gaeb325c39b887d3b90d85d1eb1712fb1e", null ], [ "mi_reallocarray", "group__posix.html#ga48fad8648a2f1dab9c87ea9448a52088", null ], diff --git a/docs/mimalloc-doc_8h_source.html b/docs/mimalloc-doc_8h_source.html index 12d0f799..f70ae81f 100644 --- a/docs/mimalloc-doc_8h_source.html +++ b/docs/mimalloc-doc_8h_source.html @@ -102,20 +102,22 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html','');});
mimalloc-doc.h
-
1 /* ----------------------------------------------------------------------------
2 Copyright (c) 2018, Microsoft Research, Daan Leijen
3 This is free software; you can redistribute it and/or modify it under the
4 terms of the MIT license. A copy of the license can be found in the file
5 "LICENSE" at the root of this distribution.
6 -----------------------------------------------------------------------------*/
7 
8 #error "documentation file only!"
9 
10 
81 
85 
89 void mi_free(void* p);
90 
95 void* mi_malloc(size_t size);
96 
101 void* mi_zalloc(size_t size);
102 
112 void* mi_calloc(size_t count, size_t size);
113 
126 void* mi_realloc(void* p, size_t newsize);
127 
138 void* mi_recalloc(void* p, size_t count, size_t size);
139 
153 void* mi_expand(void* p, size_t newsize);
154 
164 void* mi_mallocn(size_t count, size_t size);
165 
175 void* mi_reallocn(void* p, size_t count, size_t size);
176 
193 void* mi_reallocf(void* p, size_t newsize);
194 
195 
204 char* mi_strdup(const char* s);
205 
215 char* mi_strndup(const char* s, size_t n);
216 
229 char* mi_realpath(const char* fname, char* resolved_name);
230 
232 
233 // ------------------------------------------------------
234 // Extended functionality
235 // ------------------------------------------------------
236 
240 
243 #define MI_SMALL_SIZE_MAX (128*sizeof(void*))
244 
252 void* mi_malloc_small(size_t size);
253 
261 void* mi_zalloc_small(size_t size);
262 
277 size_t mi_usable_size(void* p);
278 
288 size_t mi_good_size(size_t size);
289 
297 void mi_collect(bool force);
298 
303 void mi_stats_print(void* out);
304 
310 void mi_stats_print(mi_output_fun* out, void* arg);
311 
313 void mi_stats_reset(void);
314 
316 void mi_stats_merge(void);
317 
321 void mi_thread_init(void);
322 
327 void mi_thread_done(void);
328 
334 void mi_thread_stats_print_out(mi_output_fun* out, void* arg);
335 
342 typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat, void* arg);
343 
359 void mi_register_deferred_free(mi_deferred_free_fun* deferred_free, void* arg);
360 
366 typedef void (mi_output_fun)(const char* msg, void* arg);
367 
374 void mi_register_output(mi_output_fun* out, void* arg);
375 
381 typedef void (mi_error_fun)(int err, void* arg);
382 
398 void mi_register_error(mi_error_fun* errfun, void* arg);
399 
404 bool mi_is_in_heap_region(const void* p);
405 
406 
419 int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs);
420 
433 int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs);
434 
435 
440 bool mi_is_redirected();
441 
442 
444 
445 // ------------------------------------------------------
446 // Aligned allocation
447 // ------------------------------------------------------
448 
454 
467 void* mi_malloc_aligned(size_t size, size_t alignment);
468 void* mi_zalloc_aligned(size_t size, size_t alignment);
469 void* mi_calloc_aligned(size_t count, size_t size, size_t alignment);
470 void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment);
471 
482 void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset);
483 void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset);
484 void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset);
485 void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset);
486 
488 
494 
499 struct mi_heap_s;
500 
505 typedef struct mi_heap_s mi_heap_t;
506 
509 
517 void mi_heap_delete(mi_heap_t* heap);
518 
526 void mi_heap_destroy(mi_heap_t* heap);
527 
532 
536 
543 
545 void mi_heap_collect(mi_heap_t* heap, bool force);
546 
549 void* mi_heap_malloc(mi_heap_t* heap, size_t size);
550 
554 void* mi_heap_malloc_small(mi_heap_t* heap, size_t size);
555 
558 void* mi_heap_zalloc(mi_heap_t* heap, size_t size);
559 
562 void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size);
563 
566 void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size);
567 
570 char* mi_heap_strdup(mi_heap_t* heap, const char* s);
571 
574 char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n);
575 
578 char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name);
579 
580 void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize);
581 void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size);
582 void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize);
583 
584 void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment);
585 void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset);
586 void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment);
587 void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset);
588 void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment);
589 void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset);
590 void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment);
591 void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset);
592 
594 
595 
604 
605 void* mi_rezalloc(void* p, size_t newsize);
606 void* mi_recalloc(void* p, size_t newcount, size_t size) ;
607 
608 void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment);
609 void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset);
610 void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment);
611 void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset);
612 
613 void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize);
614 void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t newcount, size_t size);
615 
616 void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment);
617 void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset);
618 void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment);
619 void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset);
620 
622 
628 
640 #define mi_malloc_tp(tp) ((tp*)mi_malloc(sizeof(tp)))
641 
643 #define mi_zalloc_tp(tp) ((tp*)mi_zalloc(sizeof(tp)))
644 
646 #define mi_calloc_tp(tp,count) ((tp*)mi_calloc(count,sizeof(tp)))
647 
649 #define mi_mallocn_tp(tp,count) ((tp*)mi_mallocn(count,sizeof(tp)))
650 
652 #define mi_reallocn_tp(p,tp,count) ((tp*)mi_reallocn(p,count,sizeof(tp)))
653 
655 #define mi_heap_malloc_tp(hp,tp) ((tp*)mi_heap_malloc(hp,sizeof(tp)))
656 
658 #define mi_heap_zalloc_tp(hp,tp) ((tp*)mi_heap_zalloc(hp,sizeof(tp)))
659 
661 #define mi_heap_calloc_tp(hp,tp,count) ((tp*)mi_heap_calloc(hp,count,sizeof(tp)))
662 
664 #define mi_heap_mallocn_tp(hp,tp,count) ((tp*)mi_heap_mallocn(hp,count,sizeof(tp)))
665 
667 #define mi_heap_reallocn_tp(hp,p,tp,count) ((tp*)mi_heap_reallocn(p,count,sizeof(tp)))
668 
670 #define mi_heap_recalloc_tp(hp,p,tp,count) ((tp*)mi_heap_recalloc(p,count,sizeof(tp)))
671 
673 
679 
686 bool mi_heap_contains_block(mi_heap_t* heap, const void* p);
687 
696 bool mi_heap_check_owned(mi_heap_t* heap, const void* p);
697 
705 bool mi_check_owned(const void* p);
706 
709 typedef struct mi_heap_area_s {
710  void* blocks;
711  size_t reserved;
712  size_t committed;
713  size_t used;
714  size_t block_size;
716 
724 typedef bool (mi_block_visit_fun)(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg);
725 
737 bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_blocks, mi_block_visit_fun* visitor, void* arg);
738 
740 
746 
748 typedef enum mi_option_e {
749  // stable options
753  // the following options are experimental
767 } mi_option_t;
768 
769 
770 bool mi_option_enabled(mi_option_t option);
771 void mi_option_enable(mi_option_t option, bool enable);
772 void mi_option_enable_default(mi_option_t option, bool enable);
773 
774 long mi_option_get(mi_option_t option);
775 void mi_option_set(mi_option_t option, long value);
776 void mi_option_set_default(mi_option_t option, long value);
777 
778 
780 
787 
788 void* mi_recalloc(void* p, size_t count, size_t size);
789 size_t mi_malloc_size(const void* p);
790 size_t mi_malloc_usable_size(const void *p);
791 
793 void mi_cfree(void* p);
794 
795 int mi_posix_memalign(void** p, size_t alignment, size_t size);
796 int mi__posix_memalign(void** p, size_t alignment, size_t size);
797 void* mi_memalign(size_t alignment, size_t size);
798 void* mi_valloc(size_t size);
799 
800 void* mi_pvalloc(size_t size);
801 void* mi_aligned_alloc(size_t alignment, size_t size);
802 void* mi_reallocarray(void* p, size_t count, size_t size);
803 
804 void mi_free_size(void* p, size_t size);
805 void mi_free_size_aligned(void* p, size_t size, size_t alignment);
806 void mi_free_aligned(void* p, size_t alignment);
807 
809 void* mi_new(std::size_t n) noexcept(false);
810 
812 void* mi_new_n(size_t count, size_t size) noexcept(false);
813 
815 void* mi_new_aligned(std::size_t n, std::align_val_t alignment) noexcept(false);
816 
818 void* mi_new_nothrow(size_t n);
819 ``
821 void* mi_new_aligned_nothrow(size_t n, size_t alignment);
822 
824 
void mi_option_enable_default(mi_option_t option, bool enable)
+
1 /* ----------------------------------------------------------------------------
2 Copyright (c) 2018, Microsoft Research, Daan Leijen
3 This is free software; you can redistribute it and/or modify it under the
4 terms of the MIT license. A copy of the license can be found in the file
5 "LICENSE" at the root of this distribution.
6 -----------------------------------------------------------------------------*/
7 
8 #error "documentation file only!"
9 
10 
81 
85 
89 void mi_free(void* p);
90 
95 void* mi_malloc(size_t size);
96 
101 void* mi_zalloc(size_t size);
102 
112 void* mi_calloc(size_t count, size_t size);
113 
126 void* mi_realloc(void* p, size_t newsize);
127 
138 void* mi_recalloc(void* p, size_t count, size_t size);
139 
153 void* mi_expand(void* p, size_t newsize);
154 
164 void* mi_mallocn(size_t count, size_t size);
165 
175 void* mi_reallocn(void* p, size_t count, size_t size);
176 
193 void* mi_reallocf(void* p, size_t newsize);
194 
195 
204 char* mi_strdup(const char* s);
205 
215 char* mi_strndup(const char* s, size_t n);
216 
229 char* mi_realpath(const char* fname, char* resolved_name);
230 
232 
233 // ------------------------------------------------------
234 // Extended functionality
235 // ------------------------------------------------------
236 
240 
243 #define MI_SMALL_SIZE_MAX (128*sizeof(void*))
244 
252 void* mi_malloc_small(size_t size);
253 
261 void* mi_zalloc_small(size_t size);
262 
277 size_t mi_usable_size(void* p);
278 
288 size_t mi_good_size(size_t size);
289 
297 void mi_collect(bool force);
298 
303 void mi_stats_print(void* out);
304 
310 void mi_stats_print(mi_output_fun* out, void* arg);
311 
313 void mi_stats_reset(void);
314 
316 void mi_stats_merge(void);
317 
321 void mi_thread_init(void);
322 
327 void mi_thread_done(void);
328 
334 void mi_thread_stats_print_out(mi_output_fun* out, void* arg);
335 
342 typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat, void* arg);
343 
359 void mi_register_deferred_free(mi_deferred_free_fun* deferred_free, void* arg);
360 
366 typedef void (mi_output_fun)(const char* msg, void* arg);
367 
374 void mi_register_output(mi_output_fun* out, void* arg);
375 
381 typedef void (mi_error_fun)(int err, void* arg);
382 
398 void mi_register_error(mi_error_fun* errfun, void* arg);
399 
404 bool mi_is_in_heap_region(const void* p);
405 
406 
419 int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs);
420 
433 int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs);
434 
435 
440 bool mi_is_redirected();
441 
442 
444 
445 // ------------------------------------------------------
446 // Aligned allocation
447 // ------------------------------------------------------
448 
454 
467 void* mi_malloc_aligned(size_t size, size_t alignment);
468 void* mi_zalloc_aligned(size_t size, size_t alignment);
469 void* mi_calloc_aligned(size_t count, size_t size, size_t alignment);
470 void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment);
471 
482 void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset);
483 void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset);
484 void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset);
485 void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset);
486 
488 
494 
499 struct mi_heap_s;
500 
505 typedef struct mi_heap_s mi_heap_t;
506 
509 
517 void mi_heap_delete(mi_heap_t* heap);
518 
526 void mi_heap_destroy(mi_heap_t* heap);
527 
532 
536 
543 
545 void mi_heap_collect(mi_heap_t* heap, bool force);
546 
549 void* mi_heap_malloc(mi_heap_t* heap, size_t size);
550 
554 void* mi_heap_malloc_small(mi_heap_t* heap, size_t size);
555 
558 void* mi_heap_zalloc(mi_heap_t* heap, size_t size);
559 
562 void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size);
563 
566 void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size);
567 
570 char* mi_heap_strdup(mi_heap_t* heap, const char* s);
571 
574 char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n);
575 
578 char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name);
579 
580 void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize);
581 void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size);
582 void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize);
583 
584 void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment);
585 void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset);
586 void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment);
587 void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset);
588 void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment);
589 void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset);
590 void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment);
591 void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset);
592 
594 
595 
604 
605 void* mi_rezalloc(void* p, size_t newsize);
606 void* mi_recalloc(void* p, size_t newcount, size_t size) ;
607 
608 void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment);
609 void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset);
610 void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment);
611 void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset);
612 
613 void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize);
614 void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t newcount, size_t size);
615 
616 void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment);
617 void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset);
618 void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment);
619 void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset);
620 
622 
628 
640 #define mi_malloc_tp(tp) ((tp*)mi_malloc(sizeof(tp)))
641 
643 #define mi_zalloc_tp(tp) ((tp*)mi_zalloc(sizeof(tp)))
644 
646 #define mi_calloc_tp(tp,count) ((tp*)mi_calloc(count,sizeof(tp)))
647 
649 #define mi_mallocn_tp(tp,count) ((tp*)mi_mallocn(count,sizeof(tp)))
650 
652 #define mi_reallocn_tp(p,tp,count) ((tp*)mi_reallocn(p,count,sizeof(tp)))
653 
655 #define mi_heap_malloc_tp(hp,tp) ((tp*)mi_heap_malloc(hp,sizeof(tp)))
656 
658 #define mi_heap_zalloc_tp(hp,tp) ((tp*)mi_heap_zalloc(hp,sizeof(tp)))
659 
661 #define mi_heap_calloc_tp(hp,tp,count) ((tp*)mi_heap_calloc(hp,count,sizeof(tp)))
662 
664 #define mi_heap_mallocn_tp(hp,tp,count) ((tp*)mi_heap_mallocn(hp,count,sizeof(tp)))
665 
667 #define mi_heap_reallocn_tp(hp,p,tp,count) ((tp*)mi_heap_reallocn(p,count,sizeof(tp)))
668 
670 #define mi_heap_recalloc_tp(hp,p,tp,count) ((tp*)mi_heap_recalloc(p,count,sizeof(tp)))
671 
673 
679 
686 bool mi_heap_contains_block(mi_heap_t* heap, const void* p);
687 
696 bool mi_heap_check_owned(mi_heap_t* heap, const void* p);
697 
705 bool mi_check_owned(const void* p);
706 
709 typedef struct mi_heap_area_s {
710  void* blocks;
711  size_t reserved;
712  size_t committed;
713  size_t used;
714  size_t block_size;
716 
724 typedef bool (mi_block_visit_fun)(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg);
725 
737 bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_blocks, mi_block_visit_fun* visitor, void* arg);
738 
740 
746 
748 typedef enum mi_option_e {
749  // stable options
753  // the following options are experimental
767 } mi_option_t;
768 
769 
770 bool mi_option_enabled(mi_option_t option);
771 void mi_option_enable(mi_option_t option, bool enable);
772 void mi_option_enable_default(mi_option_t option, bool enable);
773 
774 long mi_option_get(mi_option_t option);
775 void mi_option_set(mi_option_t option, long value);
776 void mi_option_set_default(mi_option_t option, long value);
777 
778 
780 
787 
788 void* mi_recalloc(void* p, size_t count, size_t size);
789 size_t mi_malloc_size(const void* p);
790 size_t mi_malloc_usable_size(const void *p);
791 
793 void mi_cfree(void* p);
794 
795 int mi_posix_memalign(void** p, size_t alignment, size_t size);
796 int mi__posix_memalign(void** p, size_t alignment, size_t size);
797 void* mi_memalign(size_t alignment, size_t size);
798 void* mi_valloc(size_t size);
799 
800 void* mi_pvalloc(size_t size);
801 void* mi_aligned_alloc(size_t alignment, size_t size);
802 void* mi_reallocarray(void* p, size_t count, size_t size);
803 
804 void mi_free_size(void* p, size_t size);
805 void mi_free_size_aligned(void* p, size_t size, size_t alignment);
806 void mi_free_aligned(void* p, size_t alignment);
807 
809 
822 
824 void* mi_new(std::size_t n) noexcept(false);
825 
827 void* mi_new_n(size_t count, size_t size) noexcept(false);
828 
830 void* mi_new_aligned(std::size_t n, std::align_val_t alignment) noexcept(false);
831 
833 void* mi_new_nothrow(size_t n);
834 
836 void* mi_new_aligned_nothrow(size_t n, size_t alignment);
837 
839 void* mi_new_realloc(void* p, size_t newsize);
840 
842 void* mi_new_reallocn(void* p, size_t newcount, size_t size);
843 
851 template<class T> struct mi_stl_allocator { }
852 
854 
void mi_option_enable_default(mi_option_t option, bool enable)
size_t mi_usable_size(void *p)
Return the available bytes in a memory block.
+
void * mi_new_nothrow(size_t n)
like mi_malloc, but when out of memory, use std::get_new_handler but return NULL on failure.
void * mi_reallocn(void *p, size_t count, size_t size)
Re-allocate memory to count elements of size bytes.
void * mi_malloc_aligned(size_t size, size_t alignment)
Allocate size bytes aligned by alignment.
void * mi_recalloc_aligned_at(void *p, size_t newcount, size_t size, size_t alignment, size_t offset)
void mi_stats_reset(void)
Reset statistics.
void * mi_heap_realloc_aligned(mi_heap_t *heap, void *p, size_t newsize, size_t alignment)
+
void * mi_new_realloc(void *p, size_t newsize)
like mi_realloc(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exceptio...
void * mi_recalloc(void *p, size_t count, size_t size)
Re-allocate memory to count elements of size bytes, with extra memory initialized to zero.
void * mi_mallocn(size_t count, size_t size)
Allocate count elements of size bytes.
size_t mi_malloc_size(const void *p)
int mi_posix_memalign(void **p, size_t alignment, size_t size)
void mi_stats_merge(void)
Merge thread local statistics with the main statistics and reset.
+
void * mi_new_n(size_t count, size_t size) noexcept(false)
like mi_mallocn(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exceptio...
void mi_option_set_default(mi_option_t option, long value)
-
void * mi_new_aligned(std::size_t n, std::align_val_t alignment) noexcept(false)
raise std::bad_alloc exception on failure.
void() mi_error_fun(int err, void *arg)
Type of error callback functions.
Definition: mimalloc-doc.h:381
void * mi_rezalloc(void *p, size_t newsize)
Eagerly commit segments (4MiB) (enabled by default).
Definition: mimalloc-doc.h:754
@@ -127,7 +129,6 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html','');});
Definition: mimalloc-doc.h:766
void * mi_realloc_aligned_at(void *p, size_t newsize, size_t alignment, size_t offset)
void * blocks
start of the area containing heap blocks
Definition: mimalloc-doc.h:710
-
void * mi_new_n(size_t count, size_t size) noexcept(false)
raise std::bad_alloc exception on failure or overflow.
void * mi_realloc_aligned(void *p, size_t newsize, size_t alignment)
int mi__posix_memalign(void **p, size_t alignment, size_t size)
void mi_free(void *p)
Free previously allocated memory.
@@ -145,7 +146,6 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html','');});
void * mi_heap_rezalloc(mi_heap_t *heap, void *p, size_t newsize)
The number of segments per thread to keep cached.
Definition: mimalloc-doc.h:758
void * mi_heap_calloc(mi_heap_t *heap, size_t count, size_t size)
Allocate count zero-initialized elements in a specific heap.
-
void * mi_new(std::size_t n) noexcept(false)
raise std::bad_alloc exception on failure.
void * mi_heap_calloc_aligned(mi_heap_t *heap, size_t count, size_t size, size_t alignment)
bool mi_is_redirected()
Is the C runtime malloc API redirected?
size_t block_size
size in bytes of one block
Definition: mimalloc-doc.h:714
@@ -154,6 +154,7 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html','');});
void() mi_deferred_free_fun(bool force, unsigned long long heartbeat, void *arg)
Type of deferred free functions.
Definition: mimalloc-doc.h:342
bool mi_is_in_heap_region(const void *p)
Is a pointer part of our heap?
void mi_option_enable(mi_option_t option, bool enable)
+
void * mi_new_aligned(std::size_t n, std::align_val_t alignment) noexcept(false)
like mi_malloc_aligned(), but when out of memory, use std::get_new_handler and raise std::bad_alloc e...
void * mi_realloc(void *p, size_t newsize)
Re-allocate memory to newsize bytes.
The number of huge OS pages (1GiB in size) to reserve at the start of the program.
Definition: mimalloc-doc.h:757
void * mi_heap_reallocf(mi_heap_t *heap, void *p, size_t newsize)
@@ -175,9 +176,8 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html','');});
Print error messages to stderr.
Definition: mimalloc-doc.h:751
Experimental.
Definition: mimalloc-doc.h:760
void * mi_heap_rezalloc_aligned(mi_heap_t *heap, void *p, size_t newsize, size_t alignment)
+
void * mi_new_aligned_nothrow(size_t n, size_t alignment)
like mi_malloc_aligned, but when out of memory, use std::get_new_handler but return NULL on failure.
void * mi_memalign(size_t alignment, size_t size)
-
void * mi_new_aligned_nothrow(size_t n, size_t alignment)
return NULL on failure.
-
void * mi_new_nothrow(size_t n)
return NULL on failure.
void * mi_rezalloc_aligned(void *p, size_t newsize, size_t alignment)
bool mi_heap_contains_block(mi_heap_t *heap, const void *p)
Does a heap contain a pointer to a previously allocated block?
void mi_heap_collect(mi_heap_t *heap, bool force)
Release outstanding resources in a specific heap.
@@ -207,11 +207,13 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html','');});
void mi_register_deferred_free(mi_deferred_free_fun *deferred_free, void *arg)
Register a deferred free function.
void mi_free_size(void *p, size_t size)
void mi_collect(bool force)
Eagerly free memory.
+
void * mi_new_reallocn(void *p, size_t newcount, size_t size)
like mi_reallocn(), but when out of memory, use std::get_new_handler and raise std::bad_alloc excepti...
void mi_heap_destroy(mi_heap_t *heap)
Destroy a heap, freeing all its still allocated blocks.
void * mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset)
Use large OS pages (2MiB in size) if possible.
Definition: mimalloc-doc.h:756
void * mi_heap_reallocn(mi_heap_t *heap, void *p, size_t count, size_t size)
void mi_register_output(mi_output_fun *out, void *arg)
Register an output function.
+
std::allocator implementation for mimalloc for use in STL containers.
Definition: mimalloc-doc.h:851
void * mi_heap_malloc_small(mi_heap_t *heap, size_t size)
Allocate a small object in a specific heap.
void * mi_heap_realloc(mi_heap_t *heap, void *p, size_t newsize)
size_t mi_malloc_usable_size(const void *p)
@@ -227,6 +229,7 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html','');});
long mi_option_get(mi_option_t option)
mi_heap_t * mi_heap_get_backing()
Get the backing heap.
void mi_free_aligned(void *p, size_t alignment)
+
void * mi_new(std::size_t n) noexcept(false)
like mi_malloc(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exception...
Delay in milli-seconds before resetting a page (100ms by default)
Definition: mimalloc-doc.h:761
mi_heap_t * mi_heap_new()
Create a new heap that can be used for allocation.
void * mi_heap_malloc(mi_heap_t *heap, size_t size)
Allocate in a specific heap.
diff --git a/docs/modules.html b/docs/modules.html index 0bc6036d..91bf17e8 100644 --- a/docs/modules.html +++ b/docs/modules.html @@ -113,6 +113,7 @@ $(document).ready(function(){initNavTree('modules.html','');});  Heap IntrospectionInspect the heap at runtime  Runtime OptionsSet runtime behavior  Posixmi_ prefixed implementations of various Posix, Unix, and C++ allocation functions + C++ wrappersmi_ prefixed implementations of various allocation functions that use C++ semantics on out-of-memory, generally calling std::get_new_handler and raising a std::bad_alloc exception on failure
diff --git a/docs/modules.js b/docs/modules.js index 47e99b42..b2c2a224 100644 --- a/docs/modules.js +++ b/docs/modules.js @@ -8,5 +8,6 @@ var modules = [ "Typed Macros", "group__typed.html", "group__typed" ], [ "Heap Introspection", "group__analysis.html", "group__analysis" ], [ "Runtime Options", "group__options.html", "group__options" ], - [ "Posix", "group__posix.html", "group__posix" ] + [ "Posix", "group__posix.html", "group__posix" ], + [ "C++ wrappers", "group__cpp.html", "group__cpp" ] ]; \ No newline at end of file diff --git a/docs/navtreeindex0.js b/docs/navtreeindex0.js index e2667728..047d6dbc 100644 --- a/docs/navtreeindex0.js +++ b/docs/navtreeindex0.js @@ -28,6 +28,15 @@ var NAVTREEINDEX0 = "group__analysis.html#gaa862aa8ed8d57d84cae41fc1022d71af":[5,6,4], "group__analysis.html#gadfa01e2900f0e5d515ad5506b26f6d65":[5,6,1], "group__analysis.html#structmi__heap__area__t":[5,6,0], +"group__cpp.html":[5,9], +"group__cpp.html#ga756f4b2bc6a7ecd0a90baea8e90c7907":[5,9,7], +"group__cpp.html#gaab78a32f55149e9fbf432d5288e38e1e":[5,9,6], +"group__cpp.html#gaad048a9fce3d02c5909cd05c6ec24545":[5,9,1], +"group__cpp.html#gab5e29558926d934c3f1cae8c815f942c":[5,9,3], +"group__cpp.html#gae7bc4f56cd57ed3359060ff4f38bda81":[5,9,4], +"group__cpp.html#gaeaded64eda71ed6b1d569d3e723abc4a":[5,9,5], +"group__cpp.html#gaef2c2bdb4f70857902d3c8903ac095f3":[5,9,2], +"group__cpp.html#structmi__stl__allocator":[5,9,0], "group__extended.html":[5,1], "group__extended.html#ga089c859d9eddc5f9b4bd946cd53cebee":[5,1,21], "group__extended.html#ga0ae4581e85453456a0d658b2b98bf7bf":[5,1,18], @@ -123,20 +132,15 @@ var NAVTREEINDEX0 = "group__posix.html#ga0d28d5cf61e6bfbb18c63092939fe5c9":[5,8,3], "group__posix.html#ga1326d2e4388630b5f81ca7206318b8e5":[5,8,1], "group__posix.html#ga4531c9e775bb3ae12db57c1ba8a5d7de":[5,8,6], -"group__posix.html#ga48fad8648a2f1dab9c87ea9448a52088":[5,8,16], +"group__posix.html#ga48fad8648a2f1dab9c87ea9448a52088":[5,8,11], "group__posix.html#ga705dc7a64bffacfeeb0141501a5c35d7":[5,8,2], "group__posix.html#ga72e9d7ffb5fe94d69bc722c8506e27bc":[5,8,5], -"group__posix.html#ga73baaf5951f5165ba0763d0c06b6a93b":[5,8,17], +"group__posix.html#ga73baaf5951f5165ba0763d0c06b6a93b":[5,8,12], "group__posix.html#gaab7fa71ea93b96873f5d9883db57d40e":[5,8,8], -"group__posix.html#gaad048a9fce3d02c5909cd05c6ec24545":[5,8,9], -"group__posix.html#gab5e29558926d934c3f1cae8c815f942c":[5,8,11], -"group__posix.html#gacff84f226ba9feb2031b8992e5579447":[5,8,14], +"group__posix.html#gacff84f226ba9feb2031b8992e5579447":[5,8,9], "group__posix.html#gad5a69c8fea96aa2b7a7c818c2130090a":[5,8,0], "group__posix.html#gae01389eedab8d67341ff52e2aad80ebb":[5,8,4], -"group__posix.html#gae7bc4f56cd57ed3359060ff4f38bda81":[5,8,12], -"group__posix.html#gaeaded64eda71ed6b1d569d3e723abc4a":[5,8,13], -"group__posix.html#gaeb325c39b887d3b90d85d1eb1712fb1e":[5,8,15], -"group__posix.html#gaef2c2bdb4f70857902d3c8903ac095f3":[5,8,10], +"group__posix.html#gaeb325c39b887d3b90d85d1eb1712fb1e":[5,8,10], "group__typed.html":[5,5], "group__typed.html#ga0619a62c5fd886f1016030abe91f0557":[5,5,7], "group__typed.html#ga1158b49a55dfa81f58a4426a7578f523":[5,5,9], diff --git a/docs/search/all_3.js b/docs/search/all_3.js index af76e9c8..2e08411f 100644 --- a/docs/search/all_3.js +++ b/docs/search/all_3.js @@ -1,4 +1,5 @@ var searchData= [ - ['committed',['committed',['../group__analysis.html#ab47526df656d8837ec3e97f11b83f835',1,'mi_heap_area_t']]] + ['committed',['committed',['../group__analysis.html#ab47526df656d8837ec3e97f11b83f835',1,'mi_heap_area_t']]], + ['c_2b_2b_20wrappers',['C++ wrappers',['../group__cpp.html',1,'']]] ]; diff --git a/docs/search/all_6.js b/docs/search/all_6.js index 7af11c0f..c757cbbf 100644 --- a/docs/search/all_6.js +++ b/docs/search/all_6.js @@ -73,11 +73,13 @@ var searchData= ['mi_5fmallocn',['mi_mallocn',['../group__malloc.html#ga0b05e2bf0f73e7401ae08597ff782ac6',1,'mimalloc-doc.h']]], ['mi_5fmallocn_5ftp',['mi_mallocn_tp',['../group__typed.html#gae5cb6e0fafc9f23169c5622e077afe8b',1,'mimalloc-doc.h']]], ['mi_5fmemalign',['mi_memalign',['../group__posix.html#gaab7fa71ea93b96873f5d9883db57d40e',1,'mimalloc-doc.h']]], - ['mi_5fnew',['mi_new',['../group__posix.html#gaad048a9fce3d02c5909cd05c6ec24545',1,'mimalloc-doc.h']]], - ['mi_5fnew_5faligned',['mi_new_aligned',['../group__posix.html#gaef2c2bdb4f70857902d3c8903ac095f3',1,'mimalloc-doc.h']]], - ['mi_5fnew_5faligned_5fnothrow',['mi_new_aligned_nothrow',['../group__posix.html#gab5e29558926d934c3f1cae8c815f942c',1,'mimalloc-doc.h']]], - ['mi_5fnew_5fn',['mi_new_n',['../group__posix.html#gae7bc4f56cd57ed3359060ff4f38bda81',1,'mimalloc-doc.h']]], - ['mi_5fnew_5fnothrow',['mi_new_nothrow',['../group__posix.html#gaeaded64eda71ed6b1d569d3e723abc4a',1,'mimalloc-doc.h']]], + ['mi_5fnew',['mi_new',['../group__cpp.html#gaad048a9fce3d02c5909cd05c6ec24545',1,'mimalloc-doc.h']]], + ['mi_5fnew_5faligned',['mi_new_aligned',['../group__cpp.html#gaef2c2bdb4f70857902d3c8903ac095f3',1,'mimalloc-doc.h']]], + ['mi_5fnew_5faligned_5fnothrow',['mi_new_aligned_nothrow',['../group__cpp.html#gab5e29558926d934c3f1cae8c815f942c',1,'mimalloc-doc.h']]], + ['mi_5fnew_5fn',['mi_new_n',['../group__cpp.html#gae7bc4f56cd57ed3359060ff4f38bda81',1,'mimalloc-doc.h']]], + ['mi_5fnew_5fnothrow',['mi_new_nothrow',['../group__cpp.html#gaeaded64eda71ed6b1d569d3e723abc4a',1,'mimalloc-doc.h']]], + ['mi_5fnew_5frealloc',['mi_new_realloc',['../group__cpp.html#gaab78a32f55149e9fbf432d5288e38e1e',1,'mimalloc-doc.h']]], + ['mi_5fnew_5freallocn',['mi_new_reallocn',['../group__cpp.html#ga756f4b2bc6a7ecd0a90baea8e90c7907',1,'mimalloc-doc.h']]], ['mi_5foption_5feager_5fcommit',['mi_option_eager_commit',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca1e8de72c93da7ff22d91e1e27b52ac2b',1,'mimalloc-doc.h']]], ['mi_5foption_5feager_5fcommit_5fdelay',['mi_option_eager_commit_delay',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca17a190c25be381142d87e0468c4c068c',1,'mimalloc-doc.h']]], ['mi_5foption_5feager_5fregion_5fcommit',['mi_option_eager_region_commit',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca32ce97ece29f69e82579679cf8a307ad',1,'mimalloc-doc.h']]], @@ -126,6 +128,7 @@ var searchData= ['mi_5fstats_5fmerge',['mi_stats_merge',['../group__extended.html#ga854b1de8cb067c7316286c28b2fcd3d1',1,'mimalloc-doc.h']]], ['mi_5fstats_5fprint',['mi_stats_print',['../group__extended.html#ga2d126e5c62d3badc35445e5d84166df2',1,'mi_stats_print(void *out): mimalloc-doc.h'],['../group__extended.html#ga256cc6f13a142deabbadd954a217e228',1,'mi_stats_print(mi_output_fun *out, void *arg): mimalloc-doc.h']]], ['mi_5fstats_5freset',['mi_stats_reset',['../group__extended.html#ga3bb8468b8cfcc6e2a61d98aee85c5f99',1,'mimalloc-doc.h']]], + ['mi_5fstl_5fallocator',['mi_stl_allocator',['../group__cpp.html#structmi__stl__allocator',1,'']]], ['mi_5fstrdup',['mi_strdup',['../group__malloc.html#gac7cffe13f1f458ed16789488bf92b9b2',1,'mimalloc-doc.h']]], ['mi_5fstrndup',['mi_strndup',['../group__malloc.html#gaaabf971c2571891433477e2d21a35266',1,'mimalloc-doc.h']]], ['mi_5fthread_5fdone',['mi_thread_done',['../group__extended.html#ga0ae4581e85453456a0d658b2b98bf7bf',1,'mimalloc-doc.h']]], diff --git a/docs/search/classes_0.js b/docs/search/classes_0.js index 4c5482b9..0010dd97 100644 --- a/docs/search/classes_0.js +++ b/docs/search/classes_0.js @@ -1,4 +1,5 @@ var searchData= [ - ['mi_5fheap_5farea_5ft',['mi_heap_area_t',['../group__analysis.html#structmi__heap__area__t',1,'']]] + ['mi_5fheap_5farea_5ft',['mi_heap_area_t',['../group__analysis.html#structmi__heap__area__t',1,'']]], + ['mi_5fstl_5fallocator',['mi_stl_allocator',['../group__cpp.html#structmi__stl__allocator',1,'']]] ]; diff --git a/docs/search/functions_0.js b/docs/search/functions_0.js index 098041bb..6271797a 100644 --- a/docs/search/functions_0.js +++ b/docs/search/functions_0.js @@ -59,11 +59,13 @@ var searchData= ['mi_5fmalloc_5fusable_5fsize',['mi_malloc_usable_size',['../group__posix.html#ga06d07cf357bbac5c73ba5d0c0c421e17',1,'mimalloc-doc.h']]], ['mi_5fmallocn',['mi_mallocn',['../group__malloc.html#ga0b05e2bf0f73e7401ae08597ff782ac6',1,'mimalloc-doc.h']]], ['mi_5fmemalign',['mi_memalign',['../group__posix.html#gaab7fa71ea93b96873f5d9883db57d40e',1,'mimalloc-doc.h']]], - ['mi_5fnew',['mi_new',['../group__posix.html#gaad048a9fce3d02c5909cd05c6ec24545',1,'mimalloc-doc.h']]], - ['mi_5fnew_5faligned',['mi_new_aligned',['../group__posix.html#gaef2c2bdb4f70857902d3c8903ac095f3',1,'mimalloc-doc.h']]], - ['mi_5fnew_5faligned_5fnothrow',['mi_new_aligned_nothrow',['../group__posix.html#gab5e29558926d934c3f1cae8c815f942c',1,'mimalloc-doc.h']]], - ['mi_5fnew_5fn',['mi_new_n',['../group__posix.html#gae7bc4f56cd57ed3359060ff4f38bda81',1,'mimalloc-doc.h']]], - ['mi_5fnew_5fnothrow',['mi_new_nothrow',['../group__posix.html#gaeaded64eda71ed6b1d569d3e723abc4a',1,'mimalloc-doc.h']]], + ['mi_5fnew',['mi_new',['../group__cpp.html#gaad048a9fce3d02c5909cd05c6ec24545',1,'mimalloc-doc.h']]], + ['mi_5fnew_5faligned',['mi_new_aligned',['../group__cpp.html#gaef2c2bdb4f70857902d3c8903ac095f3',1,'mimalloc-doc.h']]], + ['mi_5fnew_5faligned_5fnothrow',['mi_new_aligned_nothrow',['../group__cpp.html#gab5e29558926d934c3f1cae8c815f942c',1,'mimalloc-doc.h']]], + ['mi_5fnew_5fn',['mi_new_n',['../group__cpp.html#gae7bc4f56cd57ed3359060ff4f38bda81',1,'mimalloc-doc.h']]], + ['mi_5fnew_5fnothrow',['mi_new_nothrow',['../group__cpp.html#gaeaded64eda71ed6b1d569d3e723abc4a',1,'mimalloc-doc.h']]], + ['mi_5fnew_5frealloc',['mi_new_realloc',['../group__cpp.html#gaab78a32f55149e9fbf432d5288e38e1e',1,'mimalloc-doc.h']]], + ['mi_5fnew_5freallocn',['mi_new_reallocn',['../group__cpp.html#ga756f4b2bc6a7ecd0a90baea8e90c7907',1,'mimalloc-doc.h']]], ['mi_5foption_5fenable',['mi_option_enable',['../group__options.html#ga6d45a20a3131f18bc351b69763b38ce4',1,'mimalloc-doc.h']]], ['mi_5foption_5fenable_5fdefault',['mi_option_enable_default',['../group__options.html#ga37988264b915a7db92530cc02d5494cb',1,'mimalloc-doc.h']]], ['mi_5foption_5fenabled',['mi_option_enabled',['../group__options.html#gacebe3f6d91b4a50b54eb84e2a1da1b30',1,'mimalloc-doc.h']]], diff --git a/docs/search/groups_2.js b/docs/search/groups_2.js index 68c73dbe..29185761 100644 --- a/docs/search/groups_2.js +++ b/docs/search/groups_2.js @@ -1,4 +1,4 @@ var searchData= [ - ['extended_20functions',['Extended Functions',['../group__extended.html',1,'']]] + ['c_2b_2b_20wrappers',['C++ wrappers',['../group__cpp.html',1,'']]] ]; diff --git a/docs/search/groups_3.js b/docs/search/groups_3.js index e7e40934..68c73dbe 100644 --- a/docs/search/groups_3.js +++ b/docs/search/groups_3.js @@ -1,5 +1,4 @@ var searchData= [ - ['heap_20introspection',['Heap Introspection',['../group__analysis.html',1,'']]], - ['heap_20allocation',['Heap Allocation',['../group__heap.html',1,'']]] + ['extended_20functions',['Extended Functions',['../group__extended.html',1,'']]] ]; diff --git a/docs/search/groups_4.js b/docs/search/groups_4.js index 4f005682..e7e40934 100644 --- a/docs/search/groups_4.js +++ b/docs/search/groups_4.js @@ -1,4 +1,5 @@ var searchData= [ - ['posix',['Posix',['../group__posix.html',1,'']]] + ['heap_20introspection',['Heap Introspection',['../group__analysis.html',1,'']]], + ['heap_20allocation',['Heap Allocation',['../group__heap.html',1,'']]] ]; diff --git a/docs/search/groups_5.js b/docs/search/groups_5.js index 2533cb94..4f005682 100644 --- a/docs/search/groups_5.js +++ b/docs/search/groups_5.js @@ -1,4 +1,4 @@ var searchData= [ - ['runtime_20options',['Runtime Options',['../group__options.html',1,'']]] + ['posix',['Posix',['../group__posix.html',1,'']]] ]; diff --git a/docs/search/groups_6.js b/docs/search/groups_6.js index 647887f5..2533cb94 100644 --- a/docs/search/groups_6.js +++ b/docs/search/groups_6.js @@ -1,4 +1,4 @@ var searchData= [ - ['typed_20macros',['Typed Macros',['../group__typed.html',1,'']]] + ['runtime_20options',['Runtime Options',['../group__options.html',1,'']]] ]; diff --git a/docs/search/groups_7.js b/docs/search/groups_7.js index 2b9b4cea..647887f5 100644 --- a/docs/search/groups_7.js +++ b/docs/search/groups_7.js @@ -1,4 +1,4 @@ var searchData= [ - ['zero_20initialized_20re_2dallocation',['Zero initialized re-allocation',['../group__zeroinit.html',1,'']]] + ['typed_20macros',['Typed Macros',['../group__typed.html',1,'']]] ]; diff --git a/docs/search/groups_8.html b/docs/search/groups_8.html new file mode 100644 index 00000000..81ac9508 --- /dev/null +++ b/docs/search/groups_8.html @@ -0,0 +1,30 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/search/groups_8.js b/docs/search/groups_8.js new file mode 100644 index 00000000..2b9b4cea --- /dev/null +++ b/docs/search/groups_8.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['zero_20initialized_20re_2dallocation',['Zero initialized re-allocation',['../group__zeroinit.html',1,'']]] +]; diff --git a/docs/search/searchdata.js b/docs/search/searchdata.js index 919719e9..dd31068e 100644 --- a/docs/search/searchdata.js +++ b/docs/search/searchdata.js @@ -7,7 +7,7 @@ var indexSectionsWithContent = 4: "m", 5: "m", 6: "_m", - 7: "abehprtz", + 7: "abcehprtz", 8: "beopu" }; diff --git a/docs/using.html b/docs/using.html index eae37a5e..c5dc12e7 100644 --- a/docs/using.html +++ b/docs/using.html @@ -106,7 +106,7 @@ $(document).ready(function(){initNavTree('using.html','');});

The preferred usage is including <mimalloc.h>, linking with the shared- or static library, and using the mi_malloc API exclusively for allocation. For example,

gcc -o myprogram -lmimalloc myfile.c

mimalloc uses only safe OS calls (mmap and VirtualAlloc) and can co-exist with other allocators linked to the same program. If you use cmake, you can simply use:

find_package(mimalloc 1.0 REQUIRED)

in your CMakeLists.txt to find a locally installed mimalloc. Then use either:

target_link_libraries(myapp PUBLIC mimalloc)

to link with the shared (dynamic) library, or:

target_link_libraries(myapp PUBLIC mimalloc-static)

to link with the static library. See test\CMakeLists.txt for an example.

C++

For best performance in C++ programs, it is also recommended to override the global new and delete operators. For convience, mimalloc provides mimalloc-new-delete.h which does this for you – just include it in a single(!) source file in your project.

-

In C++, mimalloc also provides the mi_stl_allocator struct which implements the std::allocator interface. For example:

std::vector<some_struct, mi_stl_allocator<some_struct>> vec;
vec.push_back(some_struct());

Statistics

+

In C++, mimalloc also provides the mi_stl_allocator struct which implements the std::allocator interface. For example:

std::vector<some_struct, mi_stl_allocator<some_struct>> vec;
vec.push_back(some_struct());

Statistics

You can pass environment variables to print verbose messages (MIMALLOC_VERBOSE=1) and statistics (MIMALLOC_SHOW_STATS=1) (in the debug version):

> env MIMALLOC_SHOW_STATS=1 ./cfrac 175451865205073170563711388363
175451865205073170563711388363 = 374456281610909315237213 * 468551
heap stats: peak total freed unit
normal 2: 16.4 kb 17.5 mb 17.5 mb 16 b ok
normal 3: 16.3 kb 15.2 mb 15.2 mb 24 b ok
normal 4: 64 b 4.6 kb 4.6 kb 32 b ok
normal 5: 80 b 118.4 kb 118.4 kb 40 b ok
normal 6: 48 b 48 b 48 b 48 b ok
normal 17: 960 b 960 b 960 b 320 b ok
heap stats: peak total freed unit
normal: 33.9 kb 32.8 mb 32.8 mb 1 b ok
huge: 0 b 0 b 0 b 1 b ok
total: 33.9 kb 32.8 mb 32.8 mb 1 b ok
malloc requested: 32.8 mb
committed: 58.2 kb 58.2 kb 58.2 kb 1 b ok
reserved: 2.0 mb 2.0 mb 2.0 mb 1 b ok
reset: 0 b 0 b 0 b 1 b ok
segments: 1 1 1
-abandoned: 0
pages: 6 6 6
-abandoned: 0
mmaps: 3
mmap fast: 0
mmap slow: 1
threads: 0
elapsed: 2.022s
process: user: 1.781s, system: 0.016s, faults: 756, reclaims: 0, rss: 2.7 mb

The above model of using the mi_ prefixed API is not always possible though in existing programs that already use the standard malloc interface, and another option is to override the standard malloc interface completely and redirect all calls to the mimalloc library instead.

See Overriding Malloc for more info.

diff --git a/test/main-override.cpp b/test/main-override.cpp index d082ade3..fcf3970f 100644 --- a/test/main-override.cpp +++ b/test/main-override.cpp @@ -71,16 +71,16 @@ static Static s = Static(); bool test_stl_allocator1() { - std::vector> vec; + std::vector > vec; vec.push_back(1); vec.pop_back(); return vec.size() == 0; } -bool test_stl_allocator2() { - struct some_struct { int i; int j; double z; }; +struct some_struct { int i; int j; double z; }; - std::vector> vec; +bool test_stl_allocator2() { + std::vector > vec; vec.push_back(some_struct()); vec.pop_back(); return vec.size() == 0; From af2cfe255a9e4e3eb27f8ad4b13a64ebc441fde6 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 22 Jan 2020 15:05:02 -0800 Subject: [PATCH 140/179] add updated benchmarks --- readme.md | 208 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 117 insertions(+), 91 deletions(-) diff --git a/readme.md b/readme.md index b6258cfc..c5c71ac4 100644 --- a/readme.md +++ b/readme.md @@ -313,68 +313,71 @@ under your control or otherwise mixing of pointers from different heaps may occu # Performance +Last update: 2020-01-20 + We tested _mimalloc_ against many other top allocators over a wide range of benchmarks, ranging from various real world programs to synthetic benchmarks that see how the allocator behaves under more -extreme circumstances. +extreme circumstances. In our benchmark suite, _mimalloc_ outperforms other leading +allocators (_jemalloc_, _tcmalloc_, _Hoard_, etc), and has a similar memory footprint. A nice property is that it +does consistently well over the wide range of benchmarks. -In our benchmarks, _mimalloc_ always outperforms all other leading -allocators (_jemalloc_, _tcmalloc_, _Hoard_, etc), and usually uses less -memory (up to 25% more in the worst case). A nice property is that it -does *consistently* well over the wide range of benchmarks. - -Allocators are interesting as there exists no algorithm that is generally +General memory allocators are interesting as there exists no algorithm that is optimal -- for a given allocator one can usually construct a workload where it does not do so well. The goal is thus to find an allocation strategy that performs well over a wide range of benchmarks without -suffering from underperformance in less common situations (which is what -the second half of our benchmark set tests for). +suffering from (too much) underperformance in less common situations. -We show here only the results on an AMD EPYC system (Apr 2019) -- for -specific details and further benchmarks we refer to the [technical report](https://www.microsoft.com/en-us/research/publication/mimalloc-free-list-sharding-in-action). +As always, interpret these results with care since some benchmarks test synthetic +or uncommon situations that may never apply to your workloads. For example, most +allocators do not do well on `xmalloc-testN` but that includes the best +industrial allocators like _jemalloc_ and _tcmalloc_ that are used in some of +the world's largest systems (like Chrome or FreeBSD). -The benchmark suite is scripted and available separately +We show here only an overview -- for +more specific details and further benchmarks we refer to the +[technical report](https://www.microsoft.com/en-us/research/publication/mimalloc-free-list-sharding-in-action). +The benchmark suite is automated and available separately as [mimalloc-bench](https://github.com/daanx/mimalloc-bench). -## Benchmark Results +## Benchmark Results on 36-core Intel -Testing on a big Amazon EC2 instance ([r5a.4xlarge](https://aws.amazon.com/ec2/instance-types/)) -consisting of a 16-core AMD EPYC 7000 at 2.5GHz -with 128GB ECC memory, running Ubuntu 18.04.1 with LibC 2.27 and GCC 7.3.0. -The measured allocators are _mimalloc_ (mi), -Google's [_tcmalloc_](https://github.com/gperftools/gperftools) (tc) used in Chrome, -[_jemalloc_](https://github.com/jemalloc/jemalloc) (je) by Jason Evans used in Firefox and FreeBSD, -[_snmalloc_](https://github.com/microsoft/snmalloc) (sn) by Liétar et al. \[8], [_rpmalloc_](https://github.com/rampantpixels/rpmalloc) (rp) by Mattias Jansson at Rampant Pixels, -[_Hoard_](https://github.com/emeryberger/Hoard) by Emery Berger \[1], -the system allocator (glibc) (based on _PtMalloc2_), and the Intel thread -building blocks [allocator](https://github.com/intel/tbb) (tbb). +Testing on a big Amazon EC2 compute instance +([c5.18xlarge](https://aws.amazon.com/ec2/instance-types/#Compute_Optimized)) +consisting of a 72 processor Intel Xeon at 3GHz +with 144GiB ECC memory, running Ubuntu 18.04.1 with LibC 2.27 and GCC 7.4.0. +The measured allocators are _mimalloc_ (mi, tag:v1.4.0, page reset enabled) +and its secure build as _smi_, +Google's [_tcmalloc_](https://github.com/gperftools/gperftools) (tc, tag:gperftools-2.7) used in Chrome, +Facebook's [_jemalloc_](https://github.com/jemalloc/jemalloc) (je, tag:5.2.1) by Jason Evans used in Firefox and FreeBSD, +the Intel thread building blocks [allocator](https://github.com/intel/tbb) (tbb, tag:2020), +[rpmalloc](https://github.com/mjansson/rpmalloc) (rp,tag:1.4.0) by Mattias Jansson, +the original scalable [_Hoard_](https://github.com/emeryberger/Hoard) (tag:3.13) allocator by Emery Berger \[1], +the memory compacting [_Mesh_](https://github.com/plasma-umass/Mesh) (git:51222e7) allocator by +Bobby Powers _et al_ \[8], +and finally the default system allocator (glibc, 2.7.0) (based on _PtMalloc2_). -![bench-r5a-1](doc/bench-r5a-1.svg) -![bench-r5a-2](doc/bench-r5a-2.svg) +![bench-c5-18xlarge-a](doc/bench-c5-18xlarge-2020-01-20-a.svq) +![bench-c5-18xlarge-b](doc/bench-c5-18xlarge-2020-01-20-b.svq) -Memory usage: +Any benchmarks ending in `N` run on all processors in parallel. +Results are averaged over 10 runs and reported relative +to mimalloc (where 1.2 means it took 1.2× longer to run). +The legend also contains the _overall relative score_ between the +allocators where 100 points is the maximum if an allocator is fastest on +all benchmarks. -![bench-r5a-rss-1](doc/bench-r5a-rss-1.svg) -![bench-r5a-rss-1](doc/bench-r5a-rss-2.svg) +The single threaded _cfrac_ benchmark by Dave Barrett is an implementation of +continued fraction factorization which uses many small short-lived allocations. +All allocators do well on such common usage, where _mimalloc_ is just a tad +faster than _tcmalloc_ and +_jemalloc_. -(note: the _xmalloc-testN_ memory usage should be disregarded as it -allocates more the faster the program runs). - -In the first five benchmarks we can see _mimalloc_ outperforms the other -allocators moderately, but we also see that all these modern allocators -perform well -- the times of large performance differences in regular -workloads are over :-). -In _cfrac_ and _espresso_, _mimalloc_ is a tad faster than _tcmalloc_ and -_jemalloc_, but a solid 10\% faster than all other allocators on -_espresso_. The _tbb_ allocator does not do so well here and lags more than -20\% behind _mimalloc_. The _cfrac_ and _espresso_ programs do not use much -memory (~1.5MB) so it does not matter too much, but still _mimalloc_ uses -about half the resident memory of _tcmalloc_. - -The _leanN_ program is most interesting as a large realistic and -concurrent workload of the [Lean](https://github.com/leanprover/lean) theorem prover -compiling its own standard library, and there is a 8% speedup over _tcmalloc_. This is +The _leanN_ program is interesting as a large realistic and +concurrent workload of the [Lean](https://github.com/leanprover/lean) +theorem prover compiling its own standard library, and there is a 7% +speedup over _tcmalloc_. This is quite significant: if Lean spends 20% of its time in the allocator that means that _mimalloc_ is 1.3× faster than _tcmalloc_ here. (This is surprising as that is not measured in a pure @@ -383,19 +386,23 @@ outsized improvement here because _mimalloc_ has better locality in the allocation which improves performance for the *other* computations in a program as well). -The _redis_ benchmark shows more differences between the allocators where -_mimalloc_ is 14\% faster than _jemalloc_. On this benchmark _tbb_ (and _Hoard_) do -not do well and are over 40\% slower. +The single threaded _redis_ benchmark again show that most allocators do well on such workloads where _tcmalloc_ +did best this time. -The _larson_ server workload allocates and frees objects between -many threads. Larson and Krishnan \[2] observe this -behavior (which they call _bleeding_) in actual server applications, and the -benchmark simulates this. -Here, _mimalloc_ is more than 2.5× faster than _tcmalloc_ and _jemalloc_ -due to the object migration between different threads. This is a difficult -benchmark for other allocators too where _mimalloc_ is still 48% faster than the next -fastest (_snmalloc_). +The _larsonN_ server benchmark by Larson and Krishnan \[2] allocates and frees between threads. They observed this +behavior (which they call _bleeding_) in actual server applications, and the benchmark simulates this. +Here, _mimalloc_ is quite a bit faster than _tcmalloc_ and _jemalloc_ probably due to the object migration between different threads. +The _mstressN_ workload performs many allocations and re-allocations, +and migrates objects between threads (as in _larsonN_). However, it also +creates and destroys the _N_ worker threads a few times keeping some objects +alive beyond the life time of the allocating thread. We observed this +behavior in many larger server applications. + +The [_rptestN_](https://github.com/mjansson/rpmalloc-benchmark) benchmark +by Mattias Jansson is a allocator test originally designed +for _rpmalloc_, and tries to simulate realistic allocation patterns over +multiple threads. Here the differences between allocators become more apparent. The second benchmark set tests specific aspects of the allocators and shows even more extreme differences between them. @@ -404,46 +411,62 @@ The _alloc-test_, by [OLogN Technologies AG](http://ithare.com/testing-memory-allocators-ptmalloc2-tcmalloc-hoard-jemalloc-while-trying-to-simulate-real-world-loads/), is a very allocation intensive benchmark doing millions of allocations in various size classes. The test is scaled such that when an allocator performs almost identically on _alloc-test1_ as _alloc-testN_ it -means that it scales linearly. Here, _tcmalloc_, _snmalloc_, and -_Hoard_ seem to scale less well and do more than 10% worse on the -multi-core version. Even the best allocators (_tcmalloc_ and _jemalloc_) are -more than 10% slower as _mimalloc_ here. +means that it scales linearly. Here, _tcmalloc_, and +_Hoard_ seem to scale less well and do more than 10% worse on the multi-core version. Even the best industrial +allocators (_tcmalloc_, _jemalloc_, and _tbb_) are more than 10% slower as _mimalloc_ here. The _sh6bench_ and _sh8bench_ benchmarks are developed by [MicroQuill](http://www.microquill.com/) as part of SmartHeap. In _sh6bench_ _mimalloc_ does much -better than the others (more than 2× faster than _jemalloc_). +better than the others (more than 1.5× faster than _jemalloc_). We cannot explain this well but believe it is caused in part by the "reverse" free-ing pattern in _sh6bench_. -Again in _sh8bench_ the _mimalloc_ allocator handles object migration -between threads much better and is over 36% faster than the next best -allocator, _snmalloc_. Whereas _tcmalloc_ did well on _sh6bench_, the -addition of object migration caused it to be almost 3 times slower -than before. +The _sh8bench_ is a variation with object migration +between threads; whereas _tcmalloc_ did well on _sh6bench_, the addition of object migration causes it to be 10× slower than before. -The _xmalloc-testN_ benchmark by Lever and Boreham \[5] and Christian Eder, -simulates an asymmetric workload where -some threads only allocate, and others only free. The _snmalloc_ -allocator was especially developed to handle this case well as it -often occurs in concurrent message passing systems (like the [Pony] language -for which _snmalloc_ was initially developed). Here we see that +The _xmalloc-testN_ benchmark by Lever and Boreham \[5] and Christian Eder, simulates an asymmetric workload where +some threads only allocate, and others only free -- they observed this pattern in +larger server applications. Here we see that the _mimalloc_ technique of having non-contended sharded thread free -lists pays off as it even outperforms _snmalloc_ here. -Only _jemalloc_ also handles this reasonably well, while the -others underperform by a large margin. +lists pays off as it outperforms others by a very large margin. Only _rpmalloc_ and _tbb_ also scale well on this benchmark. -The _cache-scratch_ benchmark by Emery Berger \[1], and introduced with the Hoard -allocator to test for _passive-false_ sharing of cache lines. With a single thread they all +The _cache-scratch_ benchmark by Emery Berger \[1], and introduced with +the Hoard allocator to test for _passive-false_ sharing of cache lines. +With a single thread they all perform the same, but when running with multiple threads the potential allocator -induced false sharing of the cache lines causes large run-time -differences, where _mimalloc_ is more than 18× faster than _jemalloc_ and -_tcmalloc_! Crundal \[6] describes in detail why the false cache line -sharing occurs in the _tcmalloc_ design, and also discusses how this +induced false sharing of the cache lines can cause large run-time differences. +Crundal \[6] describes in detail why the false cache line sharing occurs in the _tcmalloc_ design, and also discusses how this can be avoided with some small implementation changes. -Only _snmalloc_ and _tbb_ also avoid the -cache line sharing like _mimalloc_. Kukanov and Voss \[7] describe in detail +Only the _tbb_, _rpmalloc_ and _mesh_ allocators also avoid the +cache line sharing completely, while _Hoard_ and _glibc_ seem to mitigate +the effects. Kukanov and Voss \[7] describe in detail how the design of _tbb_ avoids the false cache line sharing. +## On 24-core AMD Epyc + +For completeness, here are the results on a +[r5a.12xlarge](https://aws.amazon.com/ec2/instance-types/#Memory_Optimized) instance +having a 48 processor AMD Epyc 7000 at 2.5GHz with 384GiB of memory. +The results are similar to the Intel results but it is interesting to +see the differences in the _larsonN_, _mstressN_, and _xmalloc-testN_ benchmarks. + +![bench-r5a-12xlarge-a](doc/bench-r5a-12xlarge-2020-01-16-a.svq) +![bench-r5a-12xlarge-b](doc/bench-r5a-12xlarge-2020-01-16-b.svq) + + +## Peak Working Set + +The following figure shows the peak working set (rss) of the allocators +on the benchmarks (on the c5.18xlarge instance). + +![bench-c5-18xlarge-rss-a](doc/bench-c5-18xlarge-2020-01-20-rss-a.svq) +![bench-c5-18xlarge-rss-b](doc/bench-c5-18xlarge-2020-01-20-rss-b.svq) + +Note that the _xmalloc-testN_ memory usage should be disregarded as it +allocates more the faster the program runs. Similarly, memory usage of +_mstressN_, _rptestN_ and _sh8bench_ can vary depending on scheduling and +speed. Nevertheless, even though _mimalloc_ is fast on these benchmarks we +believe the memory usage is too high and hope to improve. # References @@ -453,14 +476,12 @@ how the design of _tbb_ avoids the false cache line sharing. the Ninth International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS-IX). Cambridge, MA, November 2000. [pdf](http://www.cs.utexas.edu/users/mckinley/papers/asplos-2000.pdf) - -- \[2] P. Larson and M. Krishnan. _Memory allocation for long-running server applications_. In ISMM, Vancouver, B.C., Canada, 1998. - [pdf](http://citeseer.ist.psu.edu/viewdoc/download;jsessionid=5F0BFB4F57832AEB6C11BF8257271088?doi=10.1.1.45.1947&rep=rep1&type=pdf) +- \[2] P. Larson and M. Krishnan. _Memory allocation for long-running server applications_. + In ISMM, Vancouver, B.C., Canada, 1998. [pdf](http://citeseer.ist.psu.edu/viewdoc/download?doi=10.1.1.45.1947&rep=rep1&type=pdf) - \[3] D. Grunwald, B. Zorn, and R. Henderson. _Improving the cache locality of memory allocation_. In R. Cartwright, editor, - Proceedings of the Conference on Programming Language Design and Implementation, pages 177–186, New York, NY, USA, June 1993. - [pdf](http://citeseer.ist.psu.edu/viewdoc/download?doi=10.1.1.43.6621&rep=rep1&type=pdf) + Proceedings of the Conference on Programming Language Design and Implementation, pages 177–186, New York, NY, USA, June 1993. [pdf](http://citeseer.ist.psu.edu/viewdoc/download?doi=10.1.1.43.6621&rep=rep1&type=pdf) - \[4] J. Barnes and P. Hut. _A hierarchical O(n*log(n)) force-calculation algorithm_. Nature, 324:446-449, 1986. @@ -468,17 +489,22 @@ how the design of _tbb_ avoids the false cache line sharing. In USENIX Annual Technical Conference, Freenix Session. San Diego, CA. Jun. 2000. Available at -- \[6] Timothy Crundal. _Reducing Active-False Sharing in TCMalloc._ - 2016. . CS16S1 project at the Australian National University. +- \[6] Timothy Crundal. _Reducing Active-False Sharing in TCMalloc_. 2016. CS16S1 project at the Australian National University. [pdf](http://courses.cecs.anu.edu.au/courses/CSPROJECTS/16S1/Reports/Timothy_Crundal_Report.pdf) - \[7] Alexey Kukanov, and Michael J Voss. _The Foundations for Scalable Multi-Core Software in Intel Threading Building Blocks._ Intel Technology Journal 11 (4). 2007 -- \[8] Paul Liétar, Theodore Butler, Sylvan Clebsch, Sophia Drossopoulou, Juliana Franco, Matthew J Parkinson, +- \[8] Bobby Powers, David Tench, Emery D. Berger, and Andrew McGregor. + _Mesh: Compacting Memory Management for C/C++_ + In Proceedings of the 40th ACM SIGPLAN Conference on Programming Language Design and Implementation (PLDI'19), June 2019, pages 333-–346. + + # Contributing From c2c56e29c7dfa757d49bc7824fa4727657782e8b Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 22 Jan 2020 15:06:44 -0800 Subject: [PATCH 141/179] add benchmark charts --- doc/bench-c5-18xlarge-2020-01-20-a.svg | 886 +++++++++++++++ doc/bench-c5-18xlarge-2020-01-20-b.svg | 1184 ++++++++++++++++++++ doc/bench-c5-18xlarge-2020-01-20-rss-a.svg | 756 +++++++++++++ doc/bench-c5-18xlarge-2020-01-20-rss-b.svg | 1027 +++++++++++++++++ doc/bench-r5a-12xlarge-2020-01-16-a.svg | 867 ++++++++++++++ doc/bench-r5a-12xlarge-2020-01-16-b.svg | 1156 +++++++++++++++++++ 6 files changed, 5876 insertions(+) create mode 100644 doc/bench-c5-18xlarge-2020-01-20-a.svg create mode 100644 doc/bench-c5-18xlarge-2020-01-20-b.svg create mode 100644 doc/bench-c5-18xlarge-2020-01-20-rss-a.svg create mode 100644 doc/bench-c5-18xlarge-2020-01-20-rss-b.svg create mode 100644 doc/bench-r5a-12xlarge-2020-01-16-a.svg create mode 100644 doc/bench-r5a-12xlarge-2020-01-16-b.svg diff --git a/doc/bench-c5-18xlarge-2020-01-20-a.svg b/doc/bench-c5-18xlarge-2020-01-20-a.svg new file mode 100644 index 00000000..0e550935 --- /dev/null +++ b/doc/bench-c5-18xlarge-2020-01-20-a.svg @@ -0,0 +1,886 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/bench-c5-18xlarge-2020-01-20-b.svg b/doc/bench-c5-18xlarge-2020-01-20-b.svg new file mode 100644 index 00000000..22bfa5c2 --- /dev/null +++ b/doc/bench-c5-18xlarge-2020-01-20-b.svg @@ -0,0 +1,1184 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/bench-c5-18xlarge-2020-01-20-rss-a.svg b/doc/bench-c5-18xlarge-2020-01-20-rss-a.svg new file mode 100644 index 00000000..6b15ebe5 --- /dev/null +++ b/doc/bench-c5-18xlarge-2020-01-20-rss-a.svg @@ -0,0 +1,756 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/bench-c5-18xlarge-2020-01-20-rss-b.svg b/doc/bench-c5-18xlarge-2020-01-20-rss-b.svg new file mode 100644 index 00000000..e3eb774c --- /dev/null +++ b/doc/bench-c5-18xlarge-2020-01-20-rss-b.svg @@ -0,0 +1,1027 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/bench-r5a-12xlarge-2020-01-16-a.svg b/doc/bench-r5a-12xlarge-2020-01-16-a.svg new file mode 100644 index 00000000..b110ff47 --- /dev/null +++ b/doc/bench-r5a-12xlarge-2020-01-16-a.svg @@ -0,0 +1,867 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/bench-r5a-12xlarge-2020-01-16-b.svg b/doc/bench-r5a-12xlarge-2020-01-16-b.svg new file mode 100644 index 00000000..f7a3287e --- /dev/null +++ b/doc/bench-r5a-12xlarge-2020-01-16-b.svg @@ -0,0 +1,1156 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file From 6f7d98d6698939f49a6ba1b13fab87068df6435e Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 22 Jan 2020 15:08:18 -0800 Subject: [PATCH 142/179] fix benchmark chart links --- readme.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/readme.md b/readme.md index c5c71ac4..388e6470 100644 --- a/readme.md +++ b/readme.md @@ -358,8 +358,8 @@ the memory compacting [_Mesh_](https://github.com/plasma-umass/Mesh) (git:51222e Bobby Powers _et al_ \[8], and finally the default system allocator (glibc, 2.7.0) (based on _PtMalloc2_). -![bench-c5-18xlarge-a](doc/bench-c5-18xlarge-2020-01-20-a.svq) -![bench-c5-18xlarge-b](doc/bench-c5-18xlarge-2020-01-20-b.svq) +![bench-c5-18xlarge-a](doc/bench-c5-18xlarge-2020-01-20-a.svg) +![bench-c5-18xlarge-b](doc/bench-c5-18xlarge-2020-01-20-b.svg) Any benchmarks ending in `N` run on all processors in parallel. Results are averaged over 10 runs and reported relative @@ -450,8 +450,8 @@ having a 48 processor AMD Epyc 7000 at 2.5GHz with 384GiB of memory. The results are similar to the Intel results but it is interesting to see the differences in the _larsonN_, _mstressN_, and _xmalloc-testN_ benchmarks. -![bench-r5a-12xlarge-a](doc/bench-r5a-12xlarge-2020-01-16-a.svq) -![bench-r5a-12xlarge-b](doc/bench-r5a-12xlarge-2020-01-16-b.svq) +![bench-r5a-12xlarge-a](doc/bench-r5a-12xlarge-2020-01-16-a.svg) +![bench-r5a-12xlarge-b](doc/bench-r5a-12xlarge-2020-01-16-b.svg) ## Peak Working Set @@ -459,8 +459,8 @@ see the differences in the _larsonN_, _mstressN_, and _xmalloc-testN_ benchmarks The following figure shows the peak working set (rss) of the allocators on the benchmarks (on the c5.18xlarge instance). -![bench-c5-18xlarge-rss-a](doc/bench-c5-18xlarge-2020-01-20-rss-a.svq) -![bench-c5-18xlarge-rss-b](doc/bench-c5-18xlarge-2020-01-20-rss-b.svq) +![bench-c5-18xlarge-rss-a](doc/bench-c5-18xlarge-2020-01-20-rss-a.svg) +![bench-c5-18xlarge-rss-b](doc/bench-c5-18xlarge-2020-01-20-rss-b.svg) Note that the _xmalloc-testN_ memory usage should be disregarded as it allocates more the faster the program runs. Similarly, memory usage of From 433598296a7e154436eabd613968d7f1ea7cd18d Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 22 Jan 2020 15:21:54 -0800 Subject: [PATCH 143/179] Fix benchmark chart display --- readme.md | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/readme.md b/readme.md index 388e6470..db58df30 100644 --- a/readme.md +++ b/readme.md @@ -56,8 +56,8 @@ Enjoy! ### Releases -* 2020-01-XX, `v1.4.0`: stable release 1.4: delayed OS page reset for (much) better performance - with page reset enabled, more eager concurrent free, addition of STL allocator. +* 2020-01-22, `v1.4.0`: stable release 1.4: delayed OS page reset with (much) better performance + (when page reset is enabled), more eager concurrent free, addition of STL allocator, fixed potential memory leak. * 2020-01-15, `v1.3.0`: stable release 1.3: bug fixes, improved randomness and [stronger free list encoding](https://github.com/microsoft/mimalloc/blob/783e3377f79ee82af43a0793910a9f2d01ac7863/include/mimalloc-internal.h#L396) in secure mode. * 2019-12-22, `v1.2.2`: stable release 1.2: minor updates. @@ -208,14 +208,17 @@ or via environment variables. to explicitly allow large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). However, sometimes the OS is very slow to reserve contiguous physical memory for large OS pages so use with care on systems that can have fragmented memory (for that reason, we generally recommend to use `MIMALLOC_RESERVE_HUGE_OS_PAGES` instead when possible). -- `MIMALLOC_EAGER_REGION_COMMIT=1`: on Windows, commit large (256MiB) regions eagerly. On Windows, these regions + - `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where N is the number of 1GiB huge OS pages. This reserves the huge pages at startup and can give quite a performance improvement on long running workloads. Usually it is better to not use `MIMALLOC_LARGE_OS_PAGES` in combination with this setting. Just like large OS pages, use with care as reserving - contiguous physical memory can take a long time when memory is fragmented. + contiguous physical memory can take a long time when memory is fragmented (but reserving the huge pages is done at + startup only once). Note that we usually need to explicitly enable huge OS pages (as on [Windows][windows-huge] and [Linux][linux-huge])). With huge OS pages, it may be beneficial to set the setting `MIMALLOC_EAGER_COMMIT_DELAY=N` (with usually `N` as 1) to delay the initial `N` segments of a thread to not allocate in the huge OS pages; this prevents threads that are short lived @@ -358,8 +361,8 @@ the memory compacting [_Mesh_](https://github.com/plasma-umass/Mesh) (git:51222e Bobby Powers _et al_ \[8], and finally the default system allocator (glibc, 2.7.0) (based on _PtMalloc2_). -![bench-c5-18xlarge-a](doc/bench-c5-18xlarge-2020-01-20-a.svg) -![bench-c5-18xlarge-b](doc/bench-c5-18xlarge-2020-01-20-b.svg) + + Any benchmarks ending in `N` run on all processors in parallel. Results are averaged over 10 runs and reported relative @@ -450,8 +453,8 @@ having a 48 processor AMD Epyc 7000 at 2.5GHz with 384GiB of memory. The results are similar to the Intel results but it is interesting to see the differences in the _larsonN_, _mstressN_, and _xmalloc-testN_ benchmarks. -![bench-r5a-12xlarge-a](doc/bench-r5a-12xlarge-2020-01-16-a.svg) -![bench-r5a-12xlarge-b](doc/bench-r5a-12xlarge-2020-01-16-b.svg) + + ## Peak Working Set @@ -459,8 +462,8 @@ see the differences in the _larsonN_, _mstressN_, and _xmalloc-testN_ benchmarks The following figure shows the peak working set (rss) of the allocators on the benchmarks (on the c5.18xlarge instance). -![bench-c5-18xlarge-rss-a](doc/bench-c5-18xlarge-2020-01-20-rss-a.svg) -![bench-c5-18xlarge-rss-b](doc/bench-c5-18xlarge-2020-01-20-rss-b.svg) + + Note that the _xmalloc-testN_ memory usage should be disregarded as it allocates more the faster the program runs. Similarly, memory usage of From b7aef989e89aa624bdd9ba487b7df45334568e64 Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 22 Jan 2020 15:27:47 -0800 Subject: [PATCH 144/179] Update readme.md --- readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readme.md b/readme.md index db58df30..1dc16c02 100644 --- a/readme.md +++ b/readme.md @@ -350,7 +350,7 @@ Testing on a big Amazon EC2 compute instance ([c5.18xlarge](https://aws.amazon.com/ec2/instance-types/#Compute_Optimized)) consisting of a 72 processor Intel Xeon at 3GHz with 144GiB ECC memory, running Ubuntu 18.04.1 with LibC 2.27 and GCC 7.4.0. -The measured allocators are _mimalloc_ (mi, tag:v1.4.0, page reset enabled) +The measured allocators are _mimalloc_ (xmi, tag:v1.4.0, page reset enabled) and its secure build as _smi_, Google's [_tcmalloc_](https://github.com/gperftools/gperftools) (tc, tag:gperftools-2.7) used in Chrome, Facebook's [_jemalloc_](https://github.com/jemalloc/jemalloc) (je, tag:5.2.1) by Jason Evans used in Firefox and FreeBSD, From 6a744a8549263696ef8d620006a0de2249e59b46 Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 22 Jan 2020 18:16:40 -0800 Subject: [PATCH 145/179] Update readme.md --- readme.md | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/readme.md b/readme.md index 1dc16c02..baac2a93 100644 --- a/readme.md +++ b/readme.md @@ -10,15 +10,15 @@ mimalloc (pronounced "me-malloc") is a general purpose allocator with excellent [performance](#performance) characteristics. Initially developed by Daan Leijen for the run-time systems of the -[Koka](https://github.com/koka-lang/koka) and [Lean](https://github.com/leanprover/lean) languages. +[Koka](https://github.com/koka-lang/koka) and [Lean](https://github.com/leanprover/lean) languages. +Latest release:`v1.4.0` (2020-01-22). It is a drop-in replacement for `malloc` and can be used in other programs without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as: ``` > LD_PRELOAD=/usr/bin/libmimalloc.so myprogram ``` - -Notable aspects of the design include: +It also has an easy way to override the allocator in [Windows](#override_on_windows). Notable aspects of the design include: - __small and consistent__: the library is about 6k LOC using simple and consistent data structures. This makes it very suitable @@ -45,9 +45,10 @@ Notable aspects of the design include: times (_wcat_), bounded space overhead (~0.2% meta-data, with at most 12.5% waste in allocation sizes), and has no internal points of contention using only atomic operations. - __fast__: In our benchmarks (see [below](#performance)), - _mimalloc_ always outperforms all other leading allocators (_jemalloc_, _tcmalloc_, _Hoard_, etc), + _mimalloc_ outperforms other leading allocators (_jemalloc_, _tcmalloc_, _Hoard_, etc), and usually uses less memory (up to 25% more in the worst case). A nice property - is that it does consistently well over a wide range of benchmarks. + is that it does consistently well over a wide range of benchmarks. There is also good huge OS page + support for larger server programs. The [documentation](https://microsoft.github.io/mimalloc) gives a full overview of the API. You can read more on the design of _mimalloc_ in the [technical report](https://www.microsoft.com/en-us/research/publication/mimalloc-free-list-sharding-in-action) which also has detailed benchmark results. @@ -56,8 +57,8 @@ Enjoy! ### Releases -* 2020-01-22, `v1.4.0`: stable release 1.4: delayed OS page reset with (much) better performance - (when page reset is enabled), more eager concurrent free, addition of STL allocator, fixed potential memory leak. +* 2020-01-22, `v1.4.0`: stable release 1.4: improved performance for delayed OS page reset, +more eager concurrent free, addition of STL allocator, fixed potential memory leak. * 2020-01-15, `v1.3.0`: stable release 1.3: bug fixes, improved randomness and [stronger free list encoding](https://github.com/microsoft/mimalloc/blob/783e3377f79ee82af43a0793910a9f2d01ac7863/include/mimalloc-internal.h#L396) in secure mode. * 2019-12-22, `v1.2.2`: stable release 1.2: minor updates. @@ -189,7 +190,7 @@ malloc requested: 32.8 mb The above model of using the `mi_` prefixed API is not always possible though in existing programs that already use the standard malloc interface, and another option is to override the standard malloc interface -completely and redirect all calls to the _mimalloc_ library instead. +completely and redirect all calls to the _mimalloc_ library instead . ## Environment Options @@ -215,7 +216,7 @@ or via environment variables. real drawbacks and may improve performance by a little. --> - `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where N is the number of 1GiB huge OS pages. This reserves the huge pages at - startup and can give quite a performance improvement on long running workloads. Usually it is better to not use + startup and can give quite a (latency) performance improvement on long running workloads. Usually it is better to not use `MIMALLOC_LARGE_OS_PAGES` in combination with this setting. Just like large OS pages, use with care as reserving contiguous physical memory can take a long time when memory is fragmented (but reserving the huge pages is done at startup only once). @@ -236,7 +237,7 @@ Overriding the standard `malloc` can be done either _dynamically_ or _statically This is the recommended way to override the standard malloc interface. -### Linux, BSD +### Override on Linux, BSD On these ELF-based systems we preload the mimalloc shared library so all calls to the standard `malloc` interface are @@ -255,7 +256,7 @@ or run with the debug version to get detailed statistics: > env MIMALLOC_SHOW_STATS=1 LD_PRELOAD=/usr/lib/libmimalloc-debug.so myprogram ``` -### MacOS +### Override on MacOS On macOS we can also preload the mimalloc shared library so all calls to the standard `malloc` interface are @@ -270,9 +271,9 @@ the [shell](https://stackoverflow.com/questions/43941322/dyld-insert-libraries-i Note: unfortunately, at this time, dynamic overriding on macOS seems broken but it is actively worked on to fix this (see issue [`#50`](https://github.com/microsoft/mimalloc/issues/50)). -### Windows +### Override on Windows -Overriding on Windows is robust but requires that you link your program explicitly with +Overriding on Windows is robust but requires that you link your program explicitly with the mimalloc DLL and use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch). Moreover, you need to ensure the `mimalloc-redirect.dll` (or `mimalloc-redirect32.dll`) is available in the same folder as the main `mimalloc-override.dll` at runtime (as it is a dependency). @@ -283,7 +284,7 @@ To ensure the mimalloc DLL is loaded at run-time it is easiest to insert some call to the mimalloc API in the `main` function, like `mi_version()` (or use the `/INCLUDE:mi_version` switch on the linker). See the `mimalloc-override-test` project for an example on how to use this. For best performance on Windows with C++, it -is highly recommended to also override the `new`/`delete` operations (by including +is also recommended to also override the `new`/`delete` operations (by including [`mimalloc-new-delete.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) a single(!) source file in your project). The environment variable `MIMALLOC_DISABLE_REDIRECT=1` can be used to disable dynamic From 0f14f431c55405b4da992ee3ac54da3726184851 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 22 Jan 2020 18:33:07 -0800 Subject: [PATCH 146/179] bump version to 1.5 for further development --- cmake/mimalloc-config-version.cmake | 2 +- include/mimalloc.h | 20 ++++++++++---------- test/CMakeLists.txt | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake index 0a982bdf..5137be80 100644 --- a/cmake/mimalloc-config-version.cmake +++ b/cmake/mimalloc-config-version.cmake @@ -1,5 +1,5 @@ set(mi_version_major 1) -set(mi_version_minor 4) +set(mi_version_minor 5) set(mi_version ${mi_version_major}.${mi_version_minor}) set(PACKAGE_VERSION ${mi_version}) diff --git a/include/mimalloc.h b/include/mimalloc.h index 3861ad4f..1c77d462 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_H #define MIMALLOC_H -#define MI_MALLOC_VERSION 140 // major + 2 digits minor +#define MI_MALLOC_VERSION 150 // major + 2 digits minor // ------------------------------------------------------ // Compiler specific attributes @@ -368,9 +368,9 @@ mi_decl_export void* mi_new_reallocn(void* p, size_t newcount, size_t size) mi_a #endif template struct mi_stl_allocator { - typedef T value_type; + typedef T value_type; typedef std::size_t size_type; - typedef std::ptrdiff_t difference_type; + typedef std::ptrdiff_t difference_type; typedef value_type& reference; typedef value_type const& const_reference; typedef value_type* pointer; @@ -383,23 +383,23 @@ template struct mi_stl_allocator { mi_stl_allocator select_on_container_copy_construction() const { return *this; } void deallocate(T* p, size_type) { mi_free(p); } - #if (__cplusplus >= 201703L) // C++17 + #if (__cplusplus >= 201703L) // C++17 T* allocate(size_type count) { return static_cast(mi_new_n(count, sizeof(T))); } - T* allocate(size_type count, const void*) { return allocate(count); } - #else + T* allocate(size_type count, const void*) { return allocate(count); } + #else pointer allocate(size_type count, const void* = 0) { return static_cast(mi_new_n(count, sizeof(value_type))); } - #endif - + #endif + #if ((__cplusplus >= 201103L) || (_MSC_VER > 1900)) // C++11 using propagate_on_container_copy_assignment = std::true_type; using propagate_on_container_move_assignment = std::true_type; using propagate_on_container_swap = std::true_type; using is_always_equal = std::true_type; template void construct(U* p, Args&& ...args) { ::new(p) U(std::forward(args)...); } - template void destroy(U* p) mi_attr_noexcept { p->~U(); } + template void destroy(U* p) mi_attr_noexcept { p->~U(); } #else void construct(pointer p, value_type const& val) { ::new(p) value_type(val); } - void destroy(pointer p) { p->~value_type(); } + void destroy(pointer p) { p->~value_type(); } #endif size_type max_size() const mi_attr_noexcept { return (std::numeric_limits::max() / sizeof(value_type)); } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 4862c0ec..ce077d14 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -13,7 +13,7 @@ if (NOT CMAKE_BUILD_TYPE) endif() # Import mimalloc (if installed) -find_package(mimalloc 1.4 REQUIRED NO_SYSTEM_ENVIRONMENT_PATH) +find_package(mimalloc 1.5 REQUIRED NO_SYSTEM_ENVIRONMENT_PATH) message(STATUS "Found mimalloc installed at: ${MIMALLOC_TARGET_DIR}") # overriding with a dynamic library From e3b16fe4efacaa220395bc671622d21c98cc17ec Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 22 Jan 2020 19:44:54 -0800 Subject: [PATCH 147/179] reduce type casts in atomic operations --- include/mimalloc-atomic.h | 85 ++++++++++++++++++++------------------- src/alloc.c | 4 +- src/arena.c | 16 ++++---- src/memory.c | 14 +++---- src/options.c | 12 +++--- src/os.c | 8 ++-- src/page.c | 14 +++---- src/segment.c | 10 ++--- src/stats.c | 22 +++++----- 9 files changed, 93 insertions(+), 92 deletions(-) diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h index 5d140f0c..8577dbc5 100644 --- a/include/mimalloc-atomic.h +++ b/include/mimalloc-atomic.h @@ -23,18 +23,16 @@ terms of the MIT license. A copy of the license can be found in the file #include #endif -#define mi_atomic_cast(tp,x) (volatile _Atomic(tp)*)(x) - // ------------------------------------------------------ // Atomic operations specialized for mimalloc // ------------------------------------------------------ // Atomically add a 64-bit value; returns the previous value. // Note: not using _Atomic(int64_t) as it is only used for statistics. -static inline void mi_atomic_add64(volatile int64_t* p, int64_t add); +static inline void mi_atomic_addi64(volatile int64_t* p, int64_t add); // Atomically add a value; returns the previous value. Memory ordering is relaxed. -static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add); +static inline uintptr_t mi_atomic_add(volatile _Atomic(uintptr_t)* p, uintptr_t add); // Atomically "and" a value; returns the previous value. Memory ordering is relaxed. static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x); @@ -42,7 +40,6 @@ static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t // Atomically "or" a value; returns the previous value. Memory ordering is relaxed. static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x); - // Atomically compare and exchange a value; returns `true` if successful. // May fail spuriously. Memory ordering as release on success, and relaxed on failure. // (Note: expected and desired are in opposite order from atomic_compare_exchange) @@ -69,57 +66,57 @@ static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x); static inline void mi_atomic_yield(void); - -// Atomically add a value; returns the previous value. -static inline uintptr_t mi_atomic_addu(volatile _Atomic(uintptr_t)* p, uintptr_t add) { - return (uintptr_t)mi_atomic_add((volatile _Atomic(intptr_t)*)p, (intptr_t)add); -} // Atomically subtract a value; returns the previous value. -static inline uintptr_t mi_atomic_subu(volatile _Atomic(uintptr_t)* p, uintptr_t sub) { - return (uintptr_t)mi_atomic_add((volatile _Atomic(intptr_t)*)p, -((intptr_t)sub)); +static inline uintptr_t mi_atomic_sub(volatile _Atomic(uintptr_t)* p, uintptr_t sub) { + return mi_atomic_add(p, (uintptr_t)(-((intptr_t)sub))); } // Atomically increment a value; returns the incremented result. static inline uintptr_t mi_atomic_increment(volatile _Atomic(uintptr_t)* p) { - return mi_atomic_addu(p, 1); + return mi_atomic_add(p, 1); } // Atomically decrement a value; returns the decremented result. static inline uintptr_t mi_atomic_decrement(volatile _Atomic(uintptr_t)* p) { - return mi_atomic_subu(p, 1); + return mi_atomic_sub(p, 1); } -// Atomically read a pointer; Memory order is relaxed. -static inline void* mi_atomic_read_ptr_relaxed(volatile _Atomic(void*) const * p) { - return (void*)mi_atomic_read_relaxed((const volatile _Atomic(uintptr_t)*)p); +// Atomically add a signed value; returns the previous value. +static inline intptr_t mi_atomic_addi(volatile _Atomic(intptr_t)* p, intptr_t add) { + return (intptr_t)mi_atomic_add((volatile _Atomic(uintptr_t)*)p, (uintptr_t)add); } +// Atomically subtract a signed value; returns the previous value. +static inline intptr_t mi_atomic_subi(volatile _Atomic(intptr_t)* p, intptr_t sub) { + return (intptr_t)mi_atomic_addi(p,-sub); +} + +// Atomically read a pointer; Memory order is relaxed (i.e. no fence, only atomic). +#define mi_atomic_read_ptr_relaxed(T,p) \ + (T*)(mi_atomic_read_relaxed((const volatile _Atomic(uintptr_t)*)(p))) + // Atomically read a pointer; Memory order is acquire. -static inline void* mi_atomic_read_ptr(volatile _Atomic(void*) const * p) { - return (void*)mi_atomic_read((const volatile _Atomic(uintptr_t)*)p); -} +#define mi_atomic_read_ptr(T,p) \ + (T*)(mi_atomic_read((const volatile _Atomic(uintptr_t)*)(p))) -// Atomically write a pointer -static inline void mi_atomic_write_ptr(volatile _Atomic(void*)* p, void* x) { - mi_atomic_write((volatile _Atomic(uintptr_t)*)p, (uintptr_t)x ); -} +// Atomically write a pointer; Memory order is acquire. +#define mi_atomic_write_ptr(T,p,x) \ + mi_atomic_write((volatile _Atomic(uintptr_t)*)(p), (uintptr_t)((T*)x)) // Atomically compare and exchange a pointer; returns `true` if successful. May fail spuriously. +// Memory order is release. (like a write) // (Note: expected and desired are in opposite order from atomic_compare_exchange) -static inline bool mi_atomic_cas_ptr_weak(volatile _Atomic(void*)* p, void* desired, void* expected) { - return mi_atomic_cas_weak((volatile _Atomic(uintptr_t)*)p, (uintptr_t)desired, (uintptr_t)expected); -} - -// Atomically compare and exchange a pointer; returns `true` if successful. +#define mi_atomic_cas_ptr_weak(T,p,desired,expected) \ + mi_atomic_cas_weak((volatile _Atomic(uintptr_t)*)(p), (uintptr_t)((T*)(desired)), (uintptr_t)((T*)(expected))) + +// Atomically compare and exchange a pointer; returns `true` if successful. Memory order is acquire_release. // (Note: expected and desired are in opposite order from atomic_compare_exchange) -static inline bool mi_atomic_cas_ptr_strong(volatile _Atomic(void*)* p, void* desired, void* expected) { - return mi_atomic_cas_strong((volatile _Atomic(uintptr_t)*)p, (uintptr_t)desired, (uintptr_t)expected); -} +#define mi_atomic_cas_ptr_strong(T,p,desired,expected) \ + mi_atomic_cas_strong((volatile _Atomic(uintptr_t)*)(p),(uintptr_t)((T*)(desired)), (uintptr_t)((T*)(expected))) // Atomically exchange a pointer value. -static inline void* mi_atomic_exchange_ptr(volatile _Atomic(void*)* p, void* exchange) { - return (void*)mi_atomic_exchange((volatile _Atomic(uintptr_t)*)p, (uintptr_t)exchange); -} +#define mi_atomic_exchange_ptr(T,p,exchange) \ + (T*)mi_atomic_exchange((volatile _Atomic(uintptr_t)*)(p), (uintptr_t)((T*)exchange)) #ifdef _MSC_VER @@ -133,8 +130,8 @@ typedef LONG64 msc_intptr_t; typedef LONG msc_intptr_t; #define MI_64(f) f #endif -static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add) { - return (intptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add); +static inline uintptr_t mi_atomic_add(volatile _Atomic(uintptr_t)* p, uintptr_t add) { + return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add); } static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x) { return (uintptr_t)MI_64(_InterlockedAnd)((volatile msc_intptr_t*)p, (msc_intptr_t)x); @@ -155,17 +152,21 @@ static inline uintptr_t mi_atomic_read(volatile _Atomic(uintptr_t) const* p) { return *p; } static inline uintptr_t mi_atomic_read_relaxed(volatile _Atomic(uintptr_t) const* p) { - return mi_atomic_read(p); + return *p; } static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x) { + #if defined(_M_IX86) || defined(_M_X64) + *p = x; + #else mi_atomic_exchange(p,x); + #endif } static inline void mi_atomic_yield(void) { YieldProcessor(); } -static inline void mi_atomic_add64(volatile _Atomic(int64_t)* p, int64_t add) { +static inline void mi_atomic_addi64(volatile _Atomic(int64_t)* p, int64_t add) { #ifdef _WIN64 - mi_atomic_add(p,add); + mi_atomic_addi(p,add); #else int64_t current; int64_t sum; @@ -182,11 +183,11 @@ static inline void mi_atomic_add64(volatile _Atomic(int64_t)* p, int64_t add) { #else #define MI_USING_STD #endif -static inline void mi_atomic_add64(volatile int64_t* p, int64_t add) { +static inline void mi_atomic_addi64(volatile int64_t* p, int64_t add) { MI_USING_STD atomic_fetch_add_explicit((volatile _Atomic(int64_t)*)p, add, memory_order_relaxed); } -static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add) { +static inline uintptr_t mi_atomic_add(volatile _Atomic(uintptr_t)* p, uintptr_t add) { MI_USING_STD return atomic_fetch_add_explicit(p, add, memory_order_relaxed); } diff --git a/src/alloc.c b/src/alloc.c index 20339204..847c1830 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -239,9 +239,9 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc // add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity) mi_block_t* dfree; do { - dfree = (mi_block_t*)heap->thread_delayed_free; + dfree = mi_atomic_read_ptr_relaxed(mi_block_t,&heap->thread_delayed_free); mi_block_set_nextx(heap,block,dfree, heap->key[0], heap->key[1]); - } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), block, dfree)); + } while (!mi_atomic_cas_ptr_weak(mi_block_t,&heap->thread_delayed_free, block, dfree)); } // and reset the MI_DELAYED_FREEING flag diff --git a/src/arena.c b/src/arena.c index f20a03e9..fde28685 100644 --- a/src/arena.c +++ b/src/arena.c @@ -55,7 +55,7 @@ bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); // A memory arena descriptor typedef struct mi_arena_s { - uint8_t* start; // the start of the memory area + _Atomic(uint8_t*) start; // the start of the memory area size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`) int numa_node; // associated NUMA node @@ -173,7 +173,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE); // try numa affine allocation for (size_t i = 0; i < MI_MAX_ARENAS; i++) { - mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[i])); + mi_arena_t* arena = mi_atomic_read_ptr_relaxed(mi_arena_t, &mi_arenas[i]); if (arena==NULL) break; // end reached if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local? (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages @@ -185,7 +185,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, } // try from another numa node instead.. for (size_t i = 0; i < MI_MAX_ARENAS; i++) { - mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[i])); + mi_arena_t* arena = mi_atomic_read_ptr_relaxed(mi_arena_t, &mi_arenas[i]); if (arena==NULL) break; // end reached if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local! (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages @@ -226,7 +226,7 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) { size_t bitmap_idx; mi_arena_id_indices(memid, &arena_idx, &bitmap_idx); mi_assert_internal(arena_idx < MI_MAX_ARENAS); - mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[arena_idx])); + mi_arena_t* arena = mi_atomic_read_ptr_relaxed(mi_arena_t,&mi_arenas[arena_idx]); mi_assert_internal(arena != NULL); if (arena == NULL) { _mi_error_message(EINVAL, "trying to free from non-existent arena: %p, size %zu, memid: 0x%zx\n", p, size, memid); @@ -252,15 +252,15 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) { static bool mi_arena_add(mi_arena_t* arena) { mi_assert_internal(arena != NULL); - mi_assert_internal((uintptr_t)arena->start % MI_SEGMENT_ALIGN == 0); + mi_assert_internal((uintptr_t)mi_atomic_read_ptr_relaxed(uint8_t,&arena->start) % MI_SEGMENT_ALIGN == 0); mi_assert_internal(arena->block_count > 0); - uintptr_t i = mi_atomic_addu(&mi_arena_count,1); + uintptr_t i = mi_atomic_increment(&mi_arena_count); if (i >= MI_MAX_ARENAS) { - mi_atomic_subu(&mi_arena_count, 1); + mi_atomic_decrement(&mi_arena_count); return false; } - mi_atomic_write_ptr(mi_atomic_cast(void*,&mi_arenas[i]), arena); + mi_atomic_write_ptr(mi_arena_t,&mi_arenas[i], arena); return true; } diff --git a/src/memory.c b/src/memory.c index 9603a26f..a442a35d 100644 --- a/src/memory.c +++ b/src/memory.c @@ -125,7 +125,7 @@ bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { if (p==NULL) return false; size_t count = mi_atomic_read_relaxed(®ions_count); for (size_t i = 0; i < count; i++) { - uint8_t* start = (uint8_t*)mi_atomic_read_ptr_relaxed(®ions[i].start); + uint8_t* start = mi_atomic_read_ptr_relaxed(uint8_t,®ions[i].start); if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true; } return false; @@ -133,9 +133,9 @@ bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { static void* mi_region_blocks_start(const mem_region_t* region, mi_bitmap_index_t bit_idx) { - void* start = mi_atomic_read_ptr(®ion->start); + uint8_t* start = mi_atomic_read_ptr(uint8_t,®ion->start); mi_assert_internal(start != NULL); - return ((uint8_t*)start + (bit_idx * MI_SEGMENT_SIZE)); + return (start + (bit_idx * MI_SEGMENT_SIZE)); } static size_t mi_memid_create(mem_region_t* region, mi_bitmap_index_t bit_idx) { @@ -200,7 +200,7 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mi_atomic_write(&r->reset, 0); *bit_idx = 0; mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL); - mi_atomic_write_ptr(&r->start, start); + mi_atomic_write_ptr(uint8_t*,&r->start, start); // and share it mi_region_info_t info; @@ -277,14 +277,14 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo mi_region_info_t info; info.value = mi_atomic_read(®ion->info); - void* start = mi_atomic_read_ptr(®ion->start); + uint8_t* start = mi_atomic_read_ptr(uint8_t,®ion->start); mi_assert_internal(!(info.x.is_large && !*is_large)); mi_assert_internal(start != NULL); *is_zero = mi_bitmap_unclaim(®ion->dirty, 1, blocks, bit_idx); *is_large = info.x.is_large; *memid = mi_memid_create(region, bit_idx); - void* p = (uint8_t*)start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE); + void* p = start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE); // commit if (*commit) { @@ -446,7 +446,7 @@ void _mi_mem_collect(mi_os_tld_t* tld) { } while(m == 0 && !mi_atomic_cas_weak(®ion->in_use, MI_BITMAP_FIELD_FULL, 0 )); if (m == 0) { // on success, free the whole region - void* start = mi_atomic_read_ptr(®ions[i].start); + uint8_t* start = mi_atomic_read_ptr(uint8_t,®ions[i].start); size_t arena_memid = mi_atomic_read_relaxed(®ions[i].arena_memid); memset(®ions[i], 0, sizeof(mem_region_t)); // and release the whole region diff --git a/src/options.c b/src/options.c index b06cbdb4..76cdbef0 100644 --- a/src/options.c +++ b/src/options.c @@ -169,7 +169,7 @@ static void mi_out_buf(const char* msg, void* arg) { size_t n = strlen(msg); if (n==0) return; // claim space - uintptr_t start = mi_atomic_addu(&out_len, n); + uintptr_t start = mi_atomic_add(&out_len, n); if (start >= MI_MAX_DELAY_OUTPUT) return; // check bound if (start+n >= MI_MAX_DELAY_OUTPUT) { @@ -181,7 +181,7 @@ static void mi_out_buf(const char* msg, void* arg) { static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf, void* arg) { if (out==NULL) return; // claim (if `no_more_buf == true`, no more output will be added after this point) - size_t count = mi_atomic_addu(&out_len, (no_more_buf ? MI_MAX_DELAY_OUTPUT : 1)); + size_t count = mi_atomic_add(&out_len, (no_more_buf ? MI_MAX_DELAY_OUTPUT : 1)); // and output the current contents if (count>MI_MAX_DELAY_OUTPUT) count = MI_MAX_DELAY_OUTPUT; out_buf[count] = 0; @@ -212,14 +212,14 @@ static mi_output_fun* volatile mi_out_default; // = NULL static volatile _Atomic(void*) mi_out_arg; // = NULL static mi_output_fun* mi_out_get_default(void** parg) { - if (parg != NULL) { *parg = mi_atomic_read_ptr(&mi_out_arg); } + if (parg != NULL) { *parg = mi_atomic_read_ptr(void,&mi_out_arg); } mi_output_fun* out = mi_out_default; return (out == NULL ? &mi_out_buf : out); } void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept { mi_out_default = (out == NULL ? &mi_out_stderr : out); // stop using the delayed output buffer - mi_atomic_write_ptr(&mi_out_arg, arg); + mi_atomic_write_ptr(void,&mi_out_arg, arg); if (out!=NULL) mi_out_buf_flush(out,true,arg); // output all the delayed output now } @@ -328,7 +328,7 @@ static void mi_error_default(int err) { void mi_register_error(mi_error_fun* fun, void* arg) { mi_error_handler = fun; // can be NULL - mi_atomic_write_ptr(&mi_error_arg, arg); + mi_atomic_write_ptr(void,&mi_error_arg, arg); } void _mi_error_message(int err, const char* fmt, ...) { @@ -339,7 +339,7 @@ void _mi_error_message(int err, const char* fmt, ...) { va_end(args); // and call the error handler which may abort (or return normally) if (mi_error_handler != NULL) { - mi_error_handler(err, mi_atomic_read_ptr(&mi_error_arg)); + mi_error_handler(err, mi_atomic_read_ptr(void,&mi_error_arg)); } else { mi_error_default(err); diff --git a/src/os.c b/src/os.c index be507b69..6e8c12d8 100644 --- a/src/os.c +++ b/src/os.c @@ -397,20 +397,20 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro // On 64-bit systems, we can do efficient aligned allocation by using // the 4TiB to 30TiB area to allocate them. #if (MI_INTPTR_SIZE >= 8) && (defined(_WIN32) || (defined(MI_OS_USE_MMAP) && !defined(MAP_ALIGNED))) -static volatile _Atomic(intptr_t) aligned_base; +static volatile _Atomic(uintptr_t) aligned_base; // Return a 4MiB aligned address that is probably available static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { if (try_alignment == 0 || try_alignment > MI_SEGMENT_SIZE) return NULL; if ((size%MI_SEGMENT_SIZE) != 0) return NULL; - intptr_t hint = mi_atomic_add(&aligned_base, size); + uintptr_t hint = mi_atomic_add(&aligned_base, size); if (hint == 0 || hint > ((intptr_t)30<<40)) { // try to wrap around after 30TiB (area after 32TiB is used for huge OS pages) - intptr_t init = ((intptr_t)4 << 40); // start at 4TiB area + uintptr_t init = ((uintptr_t)4 << 40); // start at 4TiB area #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode uintptr_t r = _mi_heap_random_next(mi_get_default_heap()); init = init + (MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)); // (randomly 20 bits)*4MiB == 0 to 4TiB #endif - mi_atomic_cas_strong(mi_atomic_cast(uintptr_t, &aligned_base), init, hint + size); + mi_atomic_cas_strong(&aligned_base, init, hint + size); hint = mi_atomic_add(&aligned_base, size); // this may still give 0 or > 30TiB but that is ok, it is a hint after all } if (hint%try_alignment != 0) return NULL; diff --git a/src/page.c b/src/page.c index 7840a590..5ac5d9a6 100644 --- a/src/page.c +++ b/src/page.c @@ -278,11 +278,11 @@ static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) { (put there by other threads if they deallocated in a full page) ----------------------------------------------------------- */ void _mi_heap_delayed_free(mi_heap_t* heap) { - // take over the list + // take over the list (note: no atomic exchange is it is often NULL) mi_block_t* block; do { - block = (mi_block_t*)heap->thread_delayed_free; - } while (block != NULL && !mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), NULL, block)); + block = mi_atomic_read_ptr_relaxed(mi_block_t,&heap->thread_delayed_free); + } while (block != NULL && !mi_atomic_cas_ptr_weak(mi_block_t,&heap->thread_delayed_free, NULL, block)); // and free them all while(block != NULL) { @@ -293,9 +293,9 @@ void _mi_heap_delayed_free(mi_heap_t* heap) { // reset the delayed_freeing flag; in that case delay it further by reinserting. mi_block_t* dfree; do { - dfree = (mi_block_t*)heap->thread_delayed_free; + dfree = mi_atomic_read_ptr_relaxed(mi_block_t,&heap->thread_delayed_free); mi_block_set_nextx(heap, block, dfree, heap->key[0], heap->key[1]); - } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), block, dfree)); + } while (!mi_atomic_cas_ptr_weak(mi_block_t,&heap->thread_delayed_free, block, dfree)); } block = next; } @@ -728,14 +728,14 @@ void _mi_deferred_free(mi_heap_t* heap, bool force) { heap->tld->heartbeat++; if (deferred_free != NULL && !heap->tld->recurse) { heap->tld->recurse = true; - deferred_free(force, heap->tld->heartbeat, mi_atomic_read_ptr_relaxed(&deferred_arg)); + deferred_free(force, heap->tld->heartbeat, mi_atomic_read_ptr_relaxed(void,&deferred_arg)); heap->tld->recurse = false; } } void mi_register_deferred_free(mi_deferred_free_fun* fn, void* arg) mi_attr_noexcept { deferred_free = fn; - mi_atomic_write_ptr(&deferred_arg, arg); + mi_atomic_write_ptr(void,&deferred_arg, arg); } diff --git a/src/segment.c b/src/segment.c index ea030d7a..a76871d0 100644 --- a/src/segment.c +++ b/src/segment.c @@ -822,7 +822,7 @@ static void mi_segments_prepend_abandoned(mi_segment_t* first) { if (first == NULL) return; // first try if the abandoned list happens to be NULL - if (mi_atomic_cas_ptr_weak(mi_atomic_cast(void*, &abandoned), first, NULL)) return; + if (mi_atomic_cas_ptr_weak(mi_segment_t, &abandoned, first, NULL)) return; // if not, find the end of the list mi_segment_t* last = first; @@ -833,9 +833,9 @@ static void mi_segments_prepend_abandoned(mi_segment_t* first) { // and atomically prepend mi_segment_t* next; do { - next = (mi_segment_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &abandoned)); + next = mi_atomic_read_ptr_relaxed(mi_segment_t,&abandoned); last->abandoned_next = next; - } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*, &abandoned), first, next)); + } while (!mi_atomic_cas_ptr_weak(mi_segment_t, &abandoned, first, next)); } static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { @@ -877,9 +877,9 @@ void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld) { // To avoid the A-B-A problem, grab the entire list atomically - mi_segment_t* segment = (mi_segment_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &abandoned)); // pre-read to avoid expensive atomic operations + mi_segment_t* segment = mi_atomic_read_ptr_relaxed(mi_segment_t,&abandoned); // pre-read to avoid expensive atomic operations if (segment == NULL) return false; - segment = (mi_segment_t*)mi_atomic_exchange_ptr(mi_atomic_cast(void*, &abandoned), NULL); + segment = mi_atomic_exchange_ptr(mi_segment_t, &abandoned, NULL); if (segment == NULL) return false; // we got a non-empty list diff --git a/src/stats.c b/src/stats.c index 57599821..a1404502 100644 --- a/src/stats.c +++ b/src/stats.c @@ -26,13 +26,13 @@ static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) { if (mi_is_in_main(stat)) { // add atomically (for abandoned pages) - mi_atomic_add64(&stat->current,amount); + mi_atomic_addi64(&stat->current,amount); if (stat->current > stat->peak) stat->peak = stat->current; // racing.. it's ok if (amount > 0) { - mi_atomic_add64(&stat->allocated,amount); + mi_atomic_addi64(&stat->allocated,amount); } else { - mi_atomic_add64(&stat->freed, -amount); + mi_atomic_addi64(&stat->freed, -amount); } } else { @@ -50,8 +50,8 @@ static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) { void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount) { if (mi_is_in_main(stat)) { - mi_atomic_add64( &stat->count, 1 ); - mi_atomic_add64( &stat->total, (int64_t)amount ); + mi_atomic_addi64( &stat->count, 1 ); + mi_atomic_addi64( &stat->total, (int64_t)amount ); } else { stat->count++; @@ -70,17 +70,17 @@ void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount) { // must be thread safe as it is called from stats_merge static void mi_stat_add(mi_stat_count_t* stat, const mi_stat_count_t* src, int64_t unit) { if (stat==src) return; - mi_atomic_add64( &stat->allocated, src->allocated * unit); - mi_atomic_add64( &stat->current, src->current * unit); - mi_atomic_add64( &stat->freed, src->freed * unit); + mi_atomic_addi64( &stat->allocated, src->allocated * unit); + mi_atomic_addi64( &stat->current, src->current * unit); + mi_atomic_addi64( &stat->freed, src->freed * unit); // peak scores do not work across threads.. - mi_atomic_add64( &stat->peak, src->peak * unit); + mi_atomic_addi64( &stat->peak, src->peak * unit); } static void mi_stat_counter_add(mi_stat_counter_t* stat, const mi_stat_counter_t* src, int64_t unit) { if (stat==src) return; - mi_atomic_add64( &stat->total, src->total * unit); - mi_atomic_add64( &stat->count, src->count * unit); + mi_atomic_addi64( &stat->total, src->total * unit); + mi_atomic_addi64( &stat->count, src->count * unit); } // must be thread safe as it is called from stats_merge From 0193a15f7e602ae081dd97f1d5f099dd4e05266a Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 22 Jan 2020 19:46:43 -0800 Subject: [PATCH 148/179] nicer message on huge OS page reservation --- src/arena.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arena.c b/src/arena.c index fde28685..acb92243 100644 --- a/src/arena.c +++ b/src/arena.c @@ -282,7 +282,7 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec _mi_warning_message("failed to reserve %zu gb huge pages\n", pages); return ENOMEM; } - _mi_verbose_message("reserved %zu gb huge pages (of the %zu gb requested)\n", pages_reserved, pages); + _mi_verbose_message("reserved %zu gb huge pages on numa node %i (of the %zu gb requested)\n", pages_reserved, numa_node, pages); size_t bcount = mi_block_count_of_size(hsize); size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS); From 3bbbe6c686f33040022030a81437aaf694e26e08 Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 22 Jan 2020 20:12:42 -0800 Subject: [PATCH 149/179] enable atomic yield when delayed_freeing is encountered --- src/heap.c | 2 +- src/page.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/heap.c b/src/heap.c index 12aa0840..bdd833c3 100644 --- a/src/heap.c +++ b/src/heap.c @@ -147,7 +147,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) // collect all pages owned by this thread mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL); - mi_assert_internal( collect != ABANDON || heap->thread_delayed_free == NULL ); + mi_assert_internal( collect != ABANDON || mi_atomic_read_ptr(mi_block_t,&heap->thread_delayed_free) == NULL ); // collect segment caches if (collect >= FORCE) { diff --git a/src/page.c b/src/page.c index 5ac5d9a6..fb75b826 100644 --- a/src/page.c +++ b/src/page.c @@ -130,7 +130,7 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool overrid tfreex = mi_tf_set_delayed(tfree, delay); old_delay = mi_tf_delayed(tfree); if (mi_unlikely(old_delay == MI_DELAYED_FREEING)) { - // mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done. + mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done. tfree = mi_tf_set_delayed(tfree, MI_NO_DELAYED_FREE); // will cause CAS to busy fail } else if (delay == old_delay) { From 66818bf632fb3197019951f9028d38c3e9da44f6 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 23 Jan 2020 09:57:55 -0800 Subject: [PATCH 150/179] use atomic yield on delayed-freeing; clarify code --- src/heap.c | 46 +++++++++++++++++++++++----------------------- src/page.c | 7 ++++--- src/segment.c | 20 ++++++++++---------- test/test-stress.c | 4 ++-- 4 files changed, 39 insertions(+), 38 deletions(-) diff --git a/src/heap.c b/src/heap.c index bdd833c3..2a4f98af 100644 --- a/src/heap.c +++ b/src/heap.c @@ -76,9 +76,9 @@ static bool mi_heap_is_valid(mi_heap_t* heap) { ----------------------------------------------------------- */ typedef enum mi_collect_e { - NORMAL, - FORCE, - ABANDON + MI_NORMAL, + MI_FORCE, + MI_ABANDON } mi_collect_t; @@ -87,12 +87,13 @@ static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t UNUSED(heap); mi_assert_internal(mi_heap_page_is_valid(heap, pq, page, NULL, NULL)); mi_collect_t collect = *((mi_collect_t*)arg_collect); - _mi_page_free_collect(page, collect >= ABANDON); + _mi_page_free_collect(page, collect >= MI_FORCE); if (mi_page_all_free(page)) { - // no more used blocks, free the page. TODO: should we retire here and be less aggressive? - _mi_page_free(page, pq, collect != NORMAL); + // no more used blocks, free the page. + // note: this will free retired pages as well. + _mi_page_free(page, pq, collect >= MI_FORCE); } - else if (collect == ABANDON) { + else if (collect == MI_ABANDON) { // still used blocks but the thread is done; abandon the page _mi_page_abandon(page, pq); } @@ -111,61 +112,60 @@ static bool mi_heap_page_never_delayed_free(mi_heap_t* heap, mi_page_queue_t* pq static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) { if (!mi_heap_is_initialized(heap)) return; - _mi_deferred_free(heap, collect > NORMAL); + _mi_deferred_free(heap, collect >= MI_FORCE); // collect (some) abandoned pages - if (collect >= NORMAL && !heap->no_reclaim) { - if (collect == NORMAL) { + if (collect >= MI_NORMAL && !heap->no_reclaim) { + if (collect == MI_NORMAL) { // this may free some segments (but also take ownership of abandoned pages) _mi_segment_try_reclaim_abandoned(heap, false, &heap->tld->segments); } else if ( #ifdef NDEBUG - collect == FORCE + collect == MI_FORCE #else - collect >= FORCE + collect >= MI_FORCE #endif && _mi_is_main_thread() && mi_heap_is_backing(heap)) { - // the main thread is abandoned, try to free all abandoned segments. + // the main thread is abandoned (end-of-program), try to reclaim all abandoned segments. // if all memory is freed by now, all segments should be freed. _mi_segment_try_reclaim_abandoned(heap, true, &heap->tld->segments); } } // if abandoning, mark all pages to no longer add to delayed_free - if (collect == ABANDON) { - //for (mi_page_t* page = heap->pages[MI_BIN_FULL].first; page != NULL; page = page->next) { - // _mi_page_use_delayed_free(page, false); // set thread_free.delayed to MI_NO_DELAYED_FREE - //} + if (collect == MI_ABANDON) { mi_heap_visit_pages(heap, &mi_heap_page_never_delayed_free, NULL, NULL); } // free thread delayed blocks. - // (if abandoning, after this there are no more local references into the pages.) + // (if abandoning, after this there are no more thread-delayed references into the pages.) _mi_heap_delayed_free(heap); // collect all pages owned by this thread mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL); - mi_assert_internal( collect != ABANDON || mi_atomic_read_ptr(mi_block_t,&heap->thread_delayed_free) == NULL ); + mi_assert_internal( collect != MI_ABANDON || mi_atomic_read_ptr(mi_block_t,&heap->thread_delayed_free) == NULL ); // collect segment caches - if (collect >= FORCE) { + if (collect >= MI_FORCE) { _mi_segment_thread_collect(&heap->tld->segments); } + #ifndef NDEBUG // collect regions - if (collect >= FORCE && _mi_is_main_thread()) { + if (collect >= MI_FORCE && _mi_is_main_thread() && mi_heap_is_backing(heap)) { _mi_mem_collect(&heap->tld->os); } + #endif } void _mi_heap_collect_abandon(mi_heap_t* heap) { - mi_heap_collect_ex(heap, ABANDON); + mi_heap_collect_ex(heap, MI_ABANDON); } void mi_heap_collect(mi_heap_t* heap, bool force) mi_attr_noexcept { - mi_heap_collect_ex(heap, (force ? FORCE : NORMAL)); + mi_heap_collect_ex(heap, (force ? MI_FORCE : MI_NORMAL)); } void mi_collect(bool force) mi_attr_noexcept { diff --git a/src/page.c b/src/page.c index fb75b826..149926e8 100644 --- a/src/page.c +++ b/src/page.c @@ -126,12 +126,12 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool overrid mi_thread_free_t tfreex; mi_delayed_t old_delay; do { - tfree = mi_atomic_read(&page->xthread_free); + tfree = mi_atomic_read(&page->xthread_free); // note: must acquire as we can break this loop and not do a CAS tfreex = mi_tf_set_delayed(tfree, delay); old_delay = mi_tf_delayed(tfree); if (mi_unlikely(old_delay == MI_DELAYED_FREEING)) { mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done. - tfree = mi_tf_set_delayed(tfree, MI_NO_DELAYED_FREE); // will cause CAS to busy fail + // tfree = mi_tf_set_delayed(tfree, MI_NO_DELAYED_FREE); // will cause CAS to busy fail } else if (delay == old_delay) { break; // avoid atomic operation if already equal @@ -139,7 +139,8 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool overrid else if (!override_never && old_delay == MI_NEVER_DELAYED_FREE) { break; // leave never-delayed flag set } - } while (!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree)); + } while ((old_delay == MI_DELAYED_FREEING) || + !mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree)); } /* ----------------------------------------------------------- diff --git a/src/segment.c b/src/segment.c index a76871d0..85e8817b 100644 --- a/src/segment.c +++ b/src/segment.c @@ -824,18 +824,18 @@ static void mi_segments_prepend_abandoned(mi_segment_t* first) { // first try if the abandoned list happens to be NULL if (mi_atomic_cas_ptr_weak(mi_segment_t, &abandoned, first, NULL)) return; - // if not, find the end of the list + // if not, find the end of the argument list mi_segment_t* last = first; while (last->abandoned_next != NULL) { last = last->abandoned_next; } // and atomically prepend - mi_segment_t* next; + mi_segment_t* anext; do { - next = mi_atomic_read_ptr_relaxed(mi_segment_t,&abandoned); - last->abandoned_next = next; - } while (!mi_atomic_cas_ptr_weak(mi_segment_t, &abandoned, first, next)); + anext = mi_atomic_read_ptr_relaxed(mi_segment_t,&abandoned); + last->abandoned_next = anext; + } while (!mi_atomic_cas_ptr_weak(mi_segment_t, &abandoned, first, anext)); } static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { @@ -897,14 +897,14 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen atmost--; } // split the list and push back the remaining segments - mi_segment_t* next = last->abandoned_next; + mi_segment_t* anext = last->abandoned_next; last->abandoned_next = NULL; - mi_segments_prepend_abandoned(next); + mi_segments_prepend_abandoned(anext); } // reclaim all segments that we kept while(segment != NULL) { - mi_segment_t* const next = segment->abandoned_next; // save the next segment + mi_segment_t* const anext = segment->abandoned_next; // save the next segment // got it. mi_atomic_decrement(&abandoned_count); @@ -943,7 +943,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen } } mi_assert(segment->abandoned == 0); - if (segment->used == 0) { // due to page_clear + if (segment->used == 0) { // due to page_clear's mi_segment_free(segment,false,tld); } else { @@ -954,7 +954,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen } // go on - segment = next; + segment = anext; } return true; diff --git a/test/test-stress.c b/test/test-stress.c index 83f9b87b..28bd4a56 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -277,12 +277,12 @@ static void run_os_threads(size_t nthreads) { #ifdef __cplusplus #include static void* atomic_exchange_ptr(volatile void** p, void* newval) { - return std::atomic_exchange_explicit((volatile std::atomic*)p, newval, std::memory_order_acquire); + return std::atomic_exchange((volatile std::atomic*)p, newval); } #else #include static void* atomic_exchange_ptr(volatile void** p, void* newval) { - return atomic_exchange_explicit((volatile _Atomic(void*)*)p, newval, memory_order_acquire); + return atomic_exchange((volatile _Atomic(void*)*)p, newval); } #endif From 0316859e0666bc7138e45789d71d2829656f85f3 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 23 Jan 2020 10:03:03 -0800 Subject: [PATCH 151/179] improve codegen for mi_free --- src/alloc.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/alloc.c b/src/alloc.c index 847c1830..3f577f2f 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -21,7 +21,7 @@ terms of the MIT license. A copy of the license can be found in the file // Fast allocation in a page: just pop from the free list. // Fall back to generic allocation only if the list is empty. -extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept { +extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept { mi_assert_internal(page->xblock_size==0||mi_page_block_size(page) >= size); mi_block_t* block = page->free; if (mi_unlikely(block == NULL)) { @@ -290,7 +290,8 @@ mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* p } -static void mi_decl_noinline mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool local, void* p) { +static void mi_decl_noinline mi_free_generic(const mi_segment_t* segment, bool local, void* p) { + mi_page_t* page = _mi_segment_page_of(segment, p); mi_block_t* block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p); _mi_free_block(page, local, block); } @@ -338,7 +339,7 @@ void mi_free(void* p) mi_attr_noexcept if (mi_likely(tid == segment->thread_id && page->flags.full_aligned == 0)) { // the thread id matches and it is not a full page, nor has aligned blocks // local, and not full or aligned - mi_block_t* block = (mi_block_t*)p; + mi_block_t* const block = (mi_block_t*)p; if (mi_unlikely(mi_check_is_double_free(page,block))) return; mi_block_set_next(page, block, page->local_free); page->local_free = block; @@ -349,7 +350,8 @@ void mi_free(void* p) mi_attr_noexcept } else { // non-local, aligned blocks, or a full page; use the more generic path - mi_free_generic(segment, page, tid == segment->thread_id, p); + // note: recalc page in generic to improve code generation + mi_free_generic(segment, tid == segment->thread_id, p); } } From 6fb434a99b72838f53f75899076e3cd949b9fb57 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 23 Jan 2020 10:31:47 -0800 Subject: [PATCH 152/179] use -fvisibility=hidden on clang as well --- CMakeLists.txt | 3 +-- include/mimalloc-internal.h | 2 +- include/mimalloc.h | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 366ffc44..95318a0e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -107,10 +107,9 @@ endif() # Compiler flags if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU") - list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas) + list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas -fvisibility=hidden) if(CMAKE_C_COMPILER_ID MATCHES "GNU") list(APPEND mi_cflags -Wno-invalid-memory-model) - list(APPEND mi_cflags -fvisibility=hidden) endif() endif() diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index eaa327be..88a0f86d 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -23,7 +23,7 @@ terms of the MIT license. A copy of the license can be found in the file #if defined(_MSC_VER) #pragma warning(disable:4127) // constant conditional due to MI_SECURE paths #define mi_decl_noinline __declspec(noinline) -#elif defined(__GNUC__) || defined(__clang__) +#elif (defined(__GNUC__) && (__GNUC__>=3)) // includes clang and icc #define mi_decl_noinline __attribute__((noinline)) #else #define mi_decl_noinline diff --git a/include/mimalloc.h b/include/mimalloc.h index 1c77d462..7cf455e6 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -43,7 +43,7 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_attr_alloc_size(s) #define mi_attr_alloc_size2(s1,s2) #define mi_attr_alloc_align(p) -#elif defined(__GNUC__) || defined(__clang__) +#elif defined(__GNUC__) // includes clang and icc #define mi_cdecl // leads to warnings... __attribute__((cdecl)) #define mi_decl_thread __thread #define mi_decl_export __attribute__((visibility("default"))) From cdc34595cfd3c26aa0d366fb70199509846b40db Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 23 Jan 2020 10:52:27 -0800 Subject: [PATCH 153/179] fix warning in msvc --- ide/vs2017/mimalloc-override-test.vcxproj | 2 +- ide/vs2017/mimalloc.vcxproj | 4 ++-- ide/vs2019/mimalloc-override-test.vcxproj | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ide/vs2017/mimalloc-override-test.vcxproj b/ide/vs2017/mimalloc-override-test.vcxproj index b8e2648b..faaa00e3 100644 --- a/ide/vs2017/mimalloc-override-test.vcxproj +++ b/ide/vs2017/mimalloc-override-test.vcxproj @@ -112,7 +112,7 @@ true ..\..\include MultiThreadedDebugDLL - false + Sync Default false diff --git a/ide/vs2017/mimalloc.vcxproj b/ide/vs2017/mimalloc.vcxproj index 55f37392..e08deec4 100644 --- a/ide/vs2017/mimalloc.vcxproj +++ b/ide/vs2017/mimalloc.vcxproj @@ -111,7 +111,7 @@ - Level3 + Level4 Disabled true true @@ -165,7 +165,7 @@ - Level3 + Level4 MaxSpeed true true diff --git a/ide/vs2019/mimalloc-override-test.vcxproj b/ide/vs2019/mimalloc-override-test.vcxproj index 79adedb0..a2497a19 100644 --- a/ide/vs2019/mimalloc-override-test.vcxproj +++ b/ide/vs2019/mimalloc-override-test.vcxproj @@ -90,7 +90,7 @@ true ..\..\include MultiThreadedDebugDLL - false + Sync Default false @@ -112,7 +112,7 @@ true ..\..\include MultiThreadedDebugDLL - false + Sync Default false From c9106e74a8bd50d8da2360c19741c74ac1cd0592 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 23 Jan 2020 11:06:25 -0800 Subject: [PATCH 154/179] remove __thread attribute from mimalloc.h --- include/mimalloc-internal.h | 9 ++++++--- include/mimalloc.h | 3 --- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 88a0f86d..6fca06b8 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -21,12 +21,15 @@ terms of the MIT license. A copy of the license can be found in the file #endif #if defined(_MSC_VER) -#pragma warning(disable:4127) // constant conditional due to MI_SECURE paths -#define mi_decl_noinline __declspec(noinline) +#pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths) +#define mi_decl_noinline __declspec(noinline) +#define mi_decl_thread __declspec(thread) #elif (defined(__GNUC__) && (__GNUC__>=3)) // includes clang and icc -#define mi_decl_noinline __attribute__((noinline)) +#define mi_decl_noinline __attribute__((noinline)) +#define mi_decl_thread __thread #else #define mi_decl_noinline +#define mi_decl_thread __thread // hope for the best :-) #endif diff --git a/include/mimalloc.h b/include/mimalloc.h index 7cf455e6..94fcd788 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -38,14 +38,12 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_decl_allocator __declspec(restrict) #endif #define mi_cdecl __cdecl - #define mi_decl_thread __declspec(thread) #define mi_attr_malloc #define mi_attr_alloc_size(s) #define mi_attr_alloc_size2(s1,s2) #define mi_attr_alloc_align(p) #elif defined(__GNUC__) // includes clang and icc #define mi_cdecl // leads to warnings... __attribute__((cdecl)) - #define mi_decl_thread __thread #define mi_decl_export __attribute__((visibility("default"))) #define mi_decl_allocator #define mi_attr_malloc __attribute__((malloc)) @@ -64,7 +62,6 @@ terms of the MIT license. A copy of the license can be found in the file #endif #else #define mi_cdecl - #define mi_decl_thread __thread #define mi_decl_export #define mi_decl_allocator #define mi_attr_malloc From 76e727f7d1828d02c51b1c0266dca9eeb61ede2d Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 23 Jan 2020 15:39:38 -0800 Subject: [PATCH 155/179] fix assertion on page destroy --- src/heap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/heap.c b/src/heap.c index 2a4f98af..ab55efae 100644 --- a/src/heap.c +++ b/src/heap.c @@ -274,6 +274,9 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_ page->used = 0; // and free the page + // mi_page_free(page,false); + page->next = NULL; + page->prev = NULL; _mi_segment_page_free(page,false /* no force? */, &heap->tld->segments); return true; // keep going From 12701b1aac8d66ffe92bd1f80bc401d285fa32a4 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 23 Jan 2020 15:48:51 -0800 Subject: [PATCH 156/179] do not reclaim segments on collect --- src/heap.c | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/src/heap.c b/src/heap.c index ab55efae..1f436b06 100644 --- a/src/heap.c +++ b/src/heap.c @@ -114,25 +114,20 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) if (!mi_heap_is_initialized(heap)) return; _mi_deferred_free(heap, collect >= MI_FORCE); - // collect (some) abandoned pages - if (collect >= MI_NORMAL && !heap->no_reclaim) { - if (collect == MI_NORMAL) { - // this may free some segments (but also take ownership of abandoned pages) - _mi_segment_try_reclaim_abandoned(heap, false, &heap->tld->segments); - } - else if ( - #ifdef NDEBUG - collect == MI_FORCE - #else - collect >= MI_FORCE - #endif - && _mi_is_main_thread() && mi_heap_is_backing(heap)) - { - // the main thread is abandoned (end-of-program), try to reclaim all abandoned segments. - // if all memory is freed by now, all segments should be freed. - _mi_segment_try_reclaim_abandoned(heap, true, &heap->tld->segments); - } + // note: never reclaim on collect but leave it to threads that need storage to reclaim + if ( + #ifdef NDEBUG + collect == MI_FORCE + #else + collect >= MI_FORCE + #endif + && _mi_is_main_thread() && mi_heap_is_backing(heap) && !heap->no_reclaim) + { + // the main thread is abandoned (end-of-program), try to reclaim all abandoned segments. + // if all memory is freed by now, all segments should be freed. + _mi_segment_try_reclaim_abandoned(heap, true, &heap->tld->segments); } + // if abandoning, mark all pages to no longer add to delayed_free if (collect == MI_ABANDON) { From f8ab4bd7dc6467ae15e1f61968a46d220d94c0d5 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 23 Jan 2020 19:49:32 -0800 Subject: [PATCH 157/179] add leak test --- test/test-stress.c | 91 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 64 insertions(+), 27 deletions(-) diff --git a/test/test-stress.c b/test/test-stress.c index 28bd4a56..67ec9f05 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -37,11 +37,11 @@ static size_t use_one_size = 0; // use single object size of `N * s #ifdef USE_STD_MALLOC -#define custom_malloc(s) malloc(s) +#define custom_calloc(n,s) calloc(n,s) #define custom_realloc(p,s) realloc(p,s) #define custom_free(p) free(p) #else -#define custom_malloc(s) mi_malloc(s) +#define custom_calloc(n,s) mi_calloc(n,s) #define custom_realloc(p,s) mi_realloc(p,s) #define custom_free(p) mi_free(p) #endif @@ -94,9 +94,12 @@ static void* alloc_items(size_t items, random_t r) { } if (items == 40) items++; // pthreads uses that size for stack increases if (use_one_size > 0) items = (use_one_size / sizeof(uintptr_t)); - uintptr_t* p = (uintptr_t*)custom_malloc(items * sizeof(uintptr_t)); + if (items==0) items = 1; + uintptr_t* p = (uintptr_t*)custom_calloc(items,sizeof(uintptr_t)); if (p != NULL) { - for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie; + for (uintptr_t i = 0; i < items; i++) { + p[i] = (items - i) ^ cookie; + } } return p; } @@ -126,7 +129,7 @@ static void stress(intptr_t tid) { void** data = NULL; size_t data_size = 0; size_t data_top = 0; - void** retained = (void**)custom_malloc(retain * sizeof(void*)); + void** retained = (void**)custom_calloc(retain,sizeof(void*)); size_t retain_top = 0; while (allocs > 0 || retain > 0) { @@ -171,7 +174,45 @@ static void stress(intptr_t tid) { //bench_end_thread(); } -static void run_os_threads(size_t nthreads); +static void run_os_threads(size_t nthreads, void (*entry)(intptr_t tid)); + +static void test_stress(void) { + uintptr_t r = 43 * 43; + for (int n = 0; n < ITER; n++) { + run_os_threads(THREADS, &stress); + for (int i = 0; i < TRANSFERS; i++) { + if (chance(50, &r) || n + 1 == ITER) { // free all on last run, otherwise free half of the transfers + void* p = atomic_exchange_ptr(&transfer[i], NULL); + free_items(p); + } + } + mi_collect(false); +#ifndef NDEBUG + if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); } +#endif + } +} + +static void leak(intptr_t tid) { + uintptr_t r = 43*tid; + void* p = alloc_items(pick(&r)%128, &r); + if (chance(10, &r)) { + intptr_t i = (pick(&r) % TRANSFERS); + void* q = atomic_exchange_ptr(&transfer[i], p); + free_items(q); + } +} + +static void test_leak(void) { + for (int n = 0; n < ITER; n++) { + run_os_threads(THREADS, &leak); + mi_collect(false); +#ifndef NDEBUG + //if ((n + 1) % 10 == 0) + { printf("- iterations left: %3d\n", ITER - (n + 1)); } +#endif + } +} int main(int argc, char** argv) { // > mimalloc-test-stress [THREADS] [SCALE] [ITER] @@ -198,19 +239,11 @@ int main(int argc, char** argv) { // Run ITER full iterations where half the objects in the transfer buffer survive to the next round. mi_stats_reset(); - uintptr_t r = 43 * 43; - for (int n = 0; n < ITER; n++) { - run_os_threads(THREADS); - for (int i = 0; i < TRANSFERS; i++) { - if (chance(50, &r) || n + 1 == ITER) { // free all on last run, otherwise free half of the transfers - void* p = atomic_exchange_ptr(&transfer[i], NULL); - free_items(p); - } - } - mi_collect(false); -#ifndef NDEBUG - if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); } -#endif + if (true) { + test_stress(); + } + else { + test_leak(); } mi_collect(true); @@ -220,18 +253,21 @@ int main(int argc, char** argv) { } +static void (*thread_entry_fun)(intptr_t) = &stress; + #ifdef _WIN32 #include static DWORD WINAPI thread_entry(LPVOID param) { - stress((intptr_t)param); + thread_entry_fun((intptr_t)param); return 0; } -static void run_os_threads(size_t nthreads) { - DWORD* tids = (DWORD*)custom_malloc(nthreads * sizeof(DWORD)); - HANDLE* thandles = (HANDLE*)custom_malloc(nthreads * sizeof(HANDLE)); +static void run_os_threads(size_t nthreads, void (*fun)(intptr_t)) { + thread_entry_fun = fun; + DWORD* tids = (DWORD*)custom_calloc(nthreads,sizeof(DWORD)); + HANDLE* thandles = (HANDLE*)custom_calloc(nthreads,sizeof(HANDLE)); for (uintptr_t i = 0; i < nthreads; i++) { thandles[i] = CreateThread(0, 4096, &thread_entry, (void*)(i), 0, &tids[i]); } @@ -246,7 +282,7 @@ static void run_os_threads(size_t nthreads) { } static void* atomic_exchange_ptr(volatile void** p, void* newval) { -#if (INTPTR_MAX == UINT32_MAX) +#if (INTPTR_MAX == INT32_MAX) return (void*)InterlockedExchange((volatile LONG*)p, (LONG)newval); #else return (void*)InterlockedExchange64((volatile LONG64*)p, (LONG64)newval); @@ -257,12 +293,13 @@ static void* atomic_exchange_ptr(volatile void** p, void* newval) { #include static void* thread_entry(void* param) { - stress((uintptr_t)param); + thread_entry_fun((uintptr_t)param); return NULL; } -static void run_os_threads(size_t nthreads) { - pthread_t* threads = (pthread_t*)custom_malloc(nthreads * sizeof(pthread_t)); +static void run_os_threads(size_t nthreads, void (*fun)(intptr_t)) { + thread_entry_fun = fun; + pthread_t* threads = (pthread_t*)custom_calloc(nthreads,sizeof(pthread_t)); memset(threads, 0, sizeof(pthread_t) * nthreads); //pthread_setconcurrency(nthreads); for (uintptr_t i = 0; i < nthreads; i++) { From 4a2a0c2d503ad5334555f4f86d7f0128b3676aae Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 23 Jan 2020 19:50:35 -0800 Subject: [PATCH 158/179] initial abandon based on fine-grained reclamation --- include/mimalloc-internal.h | 7 +- src/heap.c | 2 +- src/memory.c | 4 +- src/page.c | 49 ++--- src/segment.c | 365 +++++++++++++++++++++++++----------- 5 files changed, 289 insertions(+), 138 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 6fca06b8..3335414a 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -75,12 +75,13 @@ bool _mi_mem_unprotect(void* addr, size_t size); void _mi_mem_collect(mi_os_tld_t* tld); // "segment.c" -mi_page_t* _mi_segment_page_alloc(size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); +mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld); void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld); -bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld); -void _mi_segment_thread_collect(mi_segments_tld_t* tld); uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size, size_t* pre_size); // page start for any page +void _mi_segment_thread_collect(mi_segments_tld_t* tld); +void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld); +void _mi_abandoned_await_readers(void); // "page.c" void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc; diff --git a/src/heap.c b/src/heap.c index 1f436b06..e76a147c 100644 --- a/src/heap.c +++ b/src/heap.c @@ -125,7 +125,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) { // the main thread is abandoned (end-of-program), try to reclaim all abandoned segments. // if all memory is freed by now, all segments should be freed. - _mi_segment_try_reclaim_abandoned(heap, true, &heap->tld->segments); + _mi_abandoned_reclaim_all(heap, &heap->tld->segments); } diff --git a/src/memory.c b/src/memory.c index a442a35d..c7388054 100644 --- a/src/memory.c +++ b/src/memory.c @@ -419,6 +419,7 @@ void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_re bool any_unreset; mi_bitmap_claim(®ion->reset, 1, blocks, bit_idx, &any_unreset); if (any_unreset) { + _mi_abandoned_await_readers(); // ensure no more pending write (in case reset = decommit) _mi_mem_reset(p, blocks * MI_SEGMENT_SIZE, tld); } } @@ -451,7 +452,8 @@ void _mi_mem_collect(mi_os_tld_t* tld) { memset(®ions[i], 0, sizeof(mem_region_t)); // and release the whole region mi_atomic_write(®ion->info, 0); - if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { + if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { + _mi_abandoned_await_readers(); // ensure no pending reads _mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats); } } diff --git a/src/page.c b/src/page.c index 149926e8..c5b86b08 100644 --- a/src/page.c +++ b/src/page.c @@ -37,7 +37,7 @@ static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_sta } static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_tld_t* tld); - +static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld); #if (MI_DEBUG>=3) static size_t mi_page_list_count(mi_page_t* page, mi_block_t* head) { @@ -242,32 +242,37 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { // allocate a fresh page from a segment static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size_t block_size) { mi_assert_internal(pq==NULL||mi_heap_contains_queue(heap, pq)); - mi_page_t* page = _mi_segment_page_alloc(block_size, &heap->tld->segments, &heap->tld->os); - if (page == NULL) return NULL; - mi_assert_internal(pq==NULL || _mi_page_segment(page)->page_kind != MI_PAGE_HUGE); - mi_page_init(heap, page, block_size, heap->tld); - _mi_stat_increase( &heap->tld->stats.pages, 1); - if (pq!=NULL) mi_page_queue_push(heap, pq, page); // huge pages use pq==NULL - mi_assert_expensive(_mi_page_is_valid(page)); - return page; + mi_assert_internal(pq==NULL||block_size == pq->block_size); + mi_page_t* page = _mi_segment_page_alloc(heap, block_size, &heap->tld->segments, &heap->tld->os); + if (page == NULL) { + // this may be out-of-memory, or a page was reclaimed + if (pq!=NULL && (page = pq->first) != NULL) { + mi_assert_expensive(_mi_page_is_valid(page)); + if (!mi_page_immediate_available(page)) { + mi_page_extend_free(heap, page, heap->tld); + } + mi_assert_internal(mi_page_immediate_available(page)); + if (mi_page_immediate_available(page)) { + return page; // reclaimed page + } + } + return NULL; // out-of-memory + } + else { + // a fresh page was allocated, initialize it + mi_assert_internal(pq==NULL || _mi_page_segment(page)->page_kind != MI_PAGE_HUGE); + mi_page_init(heap, page, block_size, heap->tld); + _mi_stat_increase(&heap->tld->stats.pages, 1); + if (pq!=NULL) mi_page_queue_push(heap, pq, page); // huge pages use pq==NULL + mi_assert_expensive(_mi_page_is_valid(page)); + return page; + } } // Get a fresh page to use static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) { mi_assert_internal(mi_heap_contains_queue(heap, pq)); - - // try to reclaim an abandoned page first - mi_page_t* page = pq->first; - if (!heap->no_reclaim && - _mi_segment_try_reclaim_abandoned(heap, false, &heap->tld->segments) && - page != pq->first) - { - // we reclaimed, and we got lucky with a reclaimed page in our queue - page = pq->first; - if (page->free != NULL) return page; - } - // otherwise allocate the page - page = mi_page_fresh_alloc(heap, pq, pq->block_size); + mi_page_t* page = mi_page_fresh_alloc(heap, pq, pq->block_size); if (page==NULL) return NULL; mi_assert_internal(pq->block_size==mi_page_block_size(page)); mi_assert_internal(pq==mi_page_queue(heap, mi_page_block_size(page))); diff --git a/src/segment.c b/src/segment.c index 85e8817b..95ae6d8b 100644 --- a/src/segment.c +++ b/src/segment.c @@ -743,7 +743,9 @@ static void mi_segment_page_claim(mi_segment_t* segment, mi_page_t* page, mi_seg static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld); -static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { +// clear page data; can be called on abandoned segments +static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, bool allow_reset, mi_segments_tld_t* tld) +{ mi_assert_internal(page->segment_in_use); mi_assert_internal(mi_page_all_free(page)); mi_assert_internal(page->is_committed); @@ -773,7 +775,7 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg segment->used--; // add to the free page list for reuse/reset - if (segment->page_kind <= MI_PAGE_MEDIUM) { + if (allow_reset && segment->page_kind <= MI_PAGE_MEDIUM) { mi_pages_reset_add(segment, page, tld); } } @@ -786,7 +788,7 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) mi_reset_delayed(tld); // mark it as free now - mi_segment_page_clear(segment, page, tld); + mi_segment_page_clear(segment, page, true, tld); if (segment->used == 0) { // no more used pages; remove from the free list and free the segment @@ -814,39 +816,122 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) // live blocks (reached through other threads). Such segments // are "abandoned" and will be reclaimed by other threads to // reuse their pages and/or free them eventually -static volatile _Atomic(mi_segment_t*) abandoned; // = NULL; -static volatile _Atomic(uintptr_t) abandoned_count; // = 0; approximate count of abandoned segments -// prepend a list of abandoned segments atomically to the global abandoned list; O(n) -static void mi_segments_prepend_abandoned(mi_segment_t* first) { - if (first == NULL) return; - // first try if the abandoned list happens to be NULL - if (mi_atomic_cas_ptr_weak(mi_segment_t, &abandoned, first, NULL)) return; +#define MI_TAGGED_MASK MI_SEGMENT_MASK +typedef uintptr_t mi_tagged_segment_t; - // if not, find the end of the argument list +static mi_segment_t* mi_tagged_segment_ptr(mi_tagged_segment_t ts) { + return (mi_segment_t*)(ts & ~MI_TAGGED_MASK); +} + +static mi_tagged_segment_t mi_tagged_segment(mi_segment_t* segment, mi_tagged_segment_t ts) { + mi_assert_internal(((uintptr_t)segment & MI_TAGGED_MASK) == 0); + if (segment==NULL) return 0; // no need to tag NULL + uintptr_t tag = ((ts & MI_TAGGED_MASK) + 1) & MI_TAGGED_MASK; + return ((uintptr_t)segment | tag); +} + +static volatile _Atomic(mi_segment_t*) abandoned_visited; // = NULL +static volatile _Atomic(mi_tagged_segment_t) abandoned; // = NULL +static volatile _Atomic(uintptr_t) abandoned_readers; // = 0 + +static void mi_abandoned_visited_push(mi_segment_t* segment) { + mi_assert_internal(segment->thread_id == 0); + mi_assert_internal(segment->abandoned_next == NULL); + mi_assert_internal(segment->next == NULL && segment->prev == NULL); + mi_assert_internal(segment->used > 0); + mi_segment_t* anext; + do { + anext = mi_atomic_read_ptr_relaxed(mi_segment_t, &abandoned_visited); + segment->abandoned_next = anext; + } while (!mi_atomic_cas_ptr_weak(mi_segment_t, &abandoned_visited, segment, anext)); +} + +static bool mi_abandoned_visited_revisit(void) { + // grab the whole visited list + mi_segment_t* first = mi_atomic_exchange_ptr(mi_segment_t, &abandoned_visited, NULL); + if (first == NULL) return false; + + // first try to swap directly if the abandoned list happens to be NULL + mi_tagged_segment_t afirst = mi_tagged_segment(first,0); + if (mi_atomic_cas_weak(&abandoned, afirst, 0)) return true; + + // find the last element of the visited list: O(n) mi_segment_t* last = first; while (last->abandoned_next != NULL) { last = last->abandoned_next; } - // and atomically prepend - mi_segment_t* anext; + // and atomically prepend to the abandoned list + // (no need to increase the readers as we don't access the abandoned segments) + mi_tagged_segment_t anext; do { - anext = mi_atomic_read_ptr_relaxed(mi_segment_t,&abandoned); - last->abandoned_next = anext; - } while (!mi_atomic_cas_ptr_weak(mi_segment_t, &abandoned, first, anext)); + anext = mi_atomic_read_relaxed(&abandoned); + last->abandoned_next = mi_tagged_segment_ptr(anext); + afirst = mi_tagged_segment(first, anext); + } while (!mi_atomic_cas_weak(&abandoned, afirst, anext)); + return true; } +static void mi_abandoned_push(mi_segment_t* segment) { + mi_assert_internal(segment->thread_id == 0); + mi_assert_internal(segment->abandoned_next == NULL); + mi_assert_internal(segment->next == NULL && segment->prev == NULL); + mi_assert_internal(segment->used > 0); + mi_tagged_segment_t ts; + mi_tagged_segment_t next; + do { + ts = mi_atomic_read_relaxed(&abandoned); + segment->abandoned_next = mi_tagged_segment_ptr(ts); + next = mi_tagged_segment(segment, ts); + } while (!mi_atomic_cas_weak(&abandoned, next, ts)); +} + +void _mi_abandoned_await_readers(void) { + uintptr_t n; + do { + n = mi_atomic_read(&abandoned_readers); + if (n != 0) mi_atomic_yield(); + } while (n != 0); +} + +static mi_segment_t* mi_abandoned_pop(void) { + mi_segment_t* segment; + mi_tagged_segment_t ts = mi_atomic_read_relaxed(&abandoned); + segment = mi_tagged_segment_ptr(ts); + if (segment == NULL) { + if (!mi_abandoned_visited_revisit()) return NULL; // try to swap in the visited list on NULL + } + // Do a pop. We use a reader lock to prevent + // a segment to be decommitted while a read is still pending, and a tagged + // pointer to prevent A-B-A link corruption. + mi_atomic_increment(&abandoned_readers); // ensure no segment gets decommitted + mi_tagged_segment_t next = 0; + do { + ts = mi_atomic_read_relaxed(&abandoned); + segment = mi_tagged_segment_ptr(ts); + if (segment != NULL) { + next = mi_tagged_segment(segment->abandoned_next, ts); // note: reads segment so should not be decommitted + } + } while (segment != NULL && !mi_atomic_cas_weak(&abandoned, next, ts)); + mi_atomic_decrement(&abandoned_readers); // release reader lock + if (segment != NULL) { + segment->abandoned_next = NULL; + } + return segment; +} + + static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(segment->used == segment->abandoned); mi_assert_internal(segment->used > 0); mi_assert_internal(segment->abandoned_next == NULL); - mi_assert_expensive(mi_segment_is_valid(segment,tld)); + mi_assert_expensive(mi_segment_is_valid(segment, tld)); // remove the segment from the free page queue if needed - mi_reset_delayed(tld); - mi_pages_reset_remove_all_in_segment(segment, mi_option_is_enabled(mi_option_abandoned_page_reset), tld); + mi_reset_delayed(tld); + mi_pages_reset_remove_all_in_segment(segment, mi_option_is_enabled(mi_option_abandoned_page_reset), tld); mi_segment_remove_from_free_queue(segment, tld); mi_assert_internal(segment->next == NULL && segment->prev == NULL); @@ -855,8 +940,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_segments_track_size(-((long)segment->segment_size), tld); segment->thread_id = 0; segment->abandoned_next = NULL; - mi_segments_prepend_abandoned(segment); // prepend one-element list - mi_atomic_increment(&abandoned_count); // keep approximate count + mi_abandoned_push(segment); } void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { @@ -865,107 +949,164 @@ void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { mi_assert_internal(mi_page_heap(page) == NULL); mi_segment_t* segment = _mi_page_segment(page); mi_assert_expensive(!mi_pages_reset_contains(page, tld)); - mi_assert_expensive(mi_segment_is_valid(segment,tld)); + mi_assert_expensive(mi_segment_is_valid(segment, tld)); segment->abandoned++; _mi_stat_increase(&tld->stats->pages_abandoned, 1); mi_assert_internal(segment->abandoned <= segment->used); if (segment->used == segment->abandoned) { // all pages are abandoned, abandon the entire segment - mi_segment_abandon(segment,tld); + mi_segment_abandon(segment, tld); } } -bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld) { - // To avoid the A-B-A problem, grab the entire list atomically - mi_segment_t* segment = mi_atomic_read_ptr_relaxed(mi_segment_t,&abandoned); // pre-read to avoid expensive atomic operations - if (segment == NULL) return false; - segment = mi_atomic_exchange_ptr(mi_segment_t, &abandoned, NULL); - if (segment == NULL) return false; - // we got a non-empty list - if (!try_all) { - // take at most 1/8th of the list and append the rest back to the abandoned list again - // this is O(n) but simplifies the code a lot (as we don't have an A-B-A problem) - // and probably ok since the length will tend to be not too large. - uintptr_t atmost = mi_atomic_read(&abandoned_count)/8; // at most 1/8th of all outstanding (estimated) - if (atmost < 8) atmost = 8; // but at least 8 - - // find the split point - mi_segment_t* last = segment; - while (last->abandoned_next != NULL && atmost > 0) { - last = last->abandoned_next; - atmost--; - } - // split the list and push back the remaining segments - mi_segment_t* anext = last->abandoned_next; - last->abandoned_next = NULL; - mi_segments_prepend_abandoned(anext); - } - - // reclaim all segments that we kept - while(segment != NULL) { - mi_segment_t* const anext = segment->abandoned_next; // save the next segment - - // got it. - mi_atomic_decrement(&abandoned_count); - segment->thread_id = _mi_thread_id(); - segment->abandoned_next = NULL; - mi_segments_track_size((long)segment->segment_size,tld); - mi_assert_internal(segment->next == NULL && segment->prev == NULL); - mi_assert_expensive(mi_segment_is_valid(segment,tld)); - _mi_stat_decrease(&tld->stats->segments_abandoned,1); - - // add its abandoned pages to the current thread - mi_assert(segment->abandoned == segment->used); - for (size_t i = 0; i < segment->capacity; i++) { - mi_page_t* page = &segment->pages[i]; - if (page->segment_in_use) { - mi_assert_internal(!page->is_reset); - mi_assert_internal(page->is_committed); - mi_assert_internal(mi_page_not_in_queue(page, tld)); - mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE); - mi_assert_internal(mi_page_heap(page) == NULL); +static bool mi_segment_pages_collect(mi_segment_t* segment, size_t block_size, mi_segments_tld_t* tld) +{ + mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE); + bool has_page = false; + for (size_t i = 0; i < segment->capacity; i++) { + mi_page_t* page = &segment->pages[i]; + if (page->segment_in_use) { + mi_assert_internal(!page->is_reset); + mi_assert_internal(page->is_committed); + mi_assert_internal(mi_page_not_in_queue(page, tld)); + mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE); + mi_assert_internal(mi_page_heap(page) == NULL); + mi_assert_internal(page->next == NULL); + // ensure used count is up to date and collect potential concurrent frees + _mi_page_free_collect(page, false); + if (mi_page_all_free(page)) { + // if everything free already, clear the page directly segment->abandoned--; - mi_assert(page->next == NULL); _mi_stat_decrease(&tld->stats->pages_abandoned, 1); - // set the heap again and allow delayed free again - mi_page_set_heap(page, heap); - _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set) - _mi_page_free_collect(page, false); // ensure used count is up to date - if (mi_page_all_free(page)) { - // if everything free already, clear the page directly - mi_segment_page_clear(segment,page,tld); - } - else { - // otherwise reclaim it into the heap - _mi_page_reclaim(heap,page); - } + mi_segment_page_clear(segment, page, false, tld); // no reset allowed (as the segment is still abandoned) + has_page = true; + } + else if (page->xblock_size == block_size && page->used < page->reserved) { + // a page has available free blocks of the right size + has_page = true; } } - mi_assert(segment->abandoned == 0); - if (segment->used == 0) { // due to page_clear's - mi_segment_free(segment,false,tld); + } + return has_page; +} + +#define MI_RECLAIMED ((mi_segment_t*)1) + +static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, size_t block_size, mi_page_kind_t page_kind, mi_segments_tld_t* tld) { + UNUSED_RELEASE(page_kind); + mi_assert_internal(page_kind == segment->page_kind); + mi_assert_internal(segment->abandoned_next == NULL); + bool right_page_reclaimed = false; + + segment->thread_id = _mi_thread_id(); + mi_segments_track_size((long)segment->segment_size, tld); + mi_assert_internal(segment->next == NULL && segment->prev == NULL); + mi_assert_expensive(mi_segment_is_valid(segment, tld)); + _mi_stat_decrease(&tld->stats->segments_abandoned, 1); + + for (size_t i = 0; i < segment->capacity; i++) { + mi_page_t* page = &segment->pages[i]; + if (page->segment_in_use) { + mi_assert_internal(!page->is_reset); + mi_assert_internal(page->is_committed); + mi_assert_internal(mi_page_not_in_queue(page, tld)); + mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE); + mi_assert_internal(mi_page_heap(page) == NULL); + segment->abandoned--; + mi_assert(page->next == NULL); + _mi_stat_decrease(&tld->stats->pages_abandoned, 1); + // set the heap again and allow delayed free again + mi_page_set_heap(page, heap); + _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set) + mi_assert_internal(!mi_page_all_free(page)); + // TODO: should we not collect again given that we just collected? + _mi_page_free_collect(page, false); // ensure used count is up to date + if (mi_page_all_free(page)) { + // if everything free already, clear the page directly + mi_segment_page_clear(segment, page, true, tld); // reset is ok now + } + else { + // otherwise reclaim it into the heap + _mi_page_reclaim(heap, page); + if (block_size == page->xblock_size) { + right_page_reclaimed = true; + } + } + } + } + mi_assert_internal(segment->abandoned == 0); + if (right_page_reclaimed) { + // add the segment's free pages to the free small segment queue + if (segment->page_kind <= MI_PAGE_MEDIUM && mi_segment_has_free(segment)) { + mi_segment_insert_in_free_queue(segment, tld); + } + // and return reclaimed: at the page allocation the page is already in the queue now + return MI_RECLAIMED; + } + else { + // otherwise return the segment as it will contain some free pages + mi_assert_internal(segment->used < segment->capacity); + return segment; + } +} + +static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size, mi_page_kind_t page_kind, mi_segments_tld_t* tld) +{ + mi_segment_t* segment; + int max_tries = 8; // limit the work to bound allocation times + while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) { + bool has_page = mi_segment_pages_collect(segment,block_size,tld); // try to free up pages (due to concurrent frees) + if (has_page && segment->page_kind == page_kind) { + // found a free page of the right kind, or page of the right block_size with free space + return mi_segment_reclaim(segment, heap, block_size, page_kind, tld); + } + else if (segment->used==0) { + // free the segment to make it available to other threads + mi_segment_os_free(segment, segment->segment_size, tld); } else { - // add its free pages to the the current thread free small segment queue - if (segment->page_kind <= MI_PAGE_MEDIUM && mi_segment_has_free(segment)) { - mi_segment_insert_in_free_queue(segment,tld); - } + // push on the visited list so it gets not looked at too quickly again + mi_abandoned_visited_push(segment); } - - // go on - segment = anext; } - - return true; + return NULL; } +static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t block_size, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) +{ + mi_assert_internal(page_kind <= MI_PAGE_LARGE); + mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE); + mi_segment_t* segment = mi_segment_try_reclaim(heap, block_size, page_kind, tld); + if (segment == MI_RECLAIMED) { + return NULL; // pretend out-of-memory as the page will be in the page queue + } + else if (segment == NULL) { + return mi_segment_alloc(0, page_kind, page_shift, tld, os_tld); + } + else { + return segment; + } +} + +void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld) { + mi_segment_t* segment; + while ((segment = mi_abandoned_pop()) != NULL) { + mi_segment_t* res = mi_segment_reclaim(segment, heap, 0, segment->page_kind, tld); + mi_assert_internal(res != NULL); + if (res != MI_RECLAIMED && res != NULL) { + mi_assert_internal(res == segment); + if (segment->page_kind <= MI_PAGE_MEDIUM && mi_segment_has_free(segment)) { + mi_segment_insert_in_free_queue(segment, tld); + } + } + } +} /* ----------------------------------------------------------- Small page allocation ----------------------------------------------------------- */ - static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(mi_segment_has_free(segment)); mi_assert_expensive(mi_segment_is_valid(segment, tld)); @@ -986,13 +1127,15 @@ static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tl return mi_segment_find_free(segment, tld); } -static mi_page_t* mi_segment_page_alloc(mi_page_kind_t kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { +static mi_page_t* mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_page_kind_t kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { // find an available segment the segment free queue mi_segment_queue_t* const free_queue = mi_segment_free_queue_of_kind(kind, tld); if (mi_segment_queue_is_empty(free_queue)) { // possibly allocate a fresh segment - mi_segment_t* segment = mi_segment_alloc(0, kind, page_shift, tld, os_tld); - if (segment == NULL) return NULL; // return NULL if out-of-memory + mi_segment_t* segment = mi_segment_reclaim_or_alloc(heap, block_size, kind, page_shift, tld, os_tld); + if (segment == NULL) return NULL; // return NULL if out-of-memory (or reclaimed) + mi_assert_internal(segment->page_kind==kind); + mi_assert_internal(segment->used < segment->capacity); mi_segment_enqueue(free_queue, segment); } mi_assert_internal(free_queue->first != NULL); @@ -1005,20 +1148,20 @@ static mi_page_t* mi_segment_page_alloc(mi_page_kind_t kind, size_t page_shift, return page; } -static mi_page_t* mi_segment_small_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { - return mi_segment_page_alloc(MI_PAGE_SMALL,MI_SMALL_PAGE_SHIFT,tld,os_tld); +static mi_page_t* mi_segment_small_page_alloc(mi_heap_t* heap, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { + return mi_segment_page_alloc(heap, block_size, MI_PAGE_SMALL,MI_SMALL_PAGE_SHIFT,tld,os_tld); } -static mi_page_t* mi_segment_medium_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { - return mi_segment_page_alloc(MI_PAGE_MEDIUM, MI_MEDIUM_PAGE_SHIFT, tld, os_tld); +static mi_page_t* mi_segment_medium_page_alloc(mi_heap_t* heap, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { + return mi_segment_page_alloc(heap, block_size, MI_PAGE_MEDIUM, MI_MEDIUM_PAGE_SHIFT, tld, os_tld); } /* ----------------------------------------------------------- large page allocation ----------------------------------------------------------- */ -static mi_page_t* mi_segment_large_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { - mi_segment_t* segment = mi_segment_alloc(0,MI_PAGE_LARGE,MI_LARGE_PAGE_SHIFT,tld,os_tld); +static mi_page_t* mi_segment_large_page_alloc(mi_heap_t* heap, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { + mi_segment_t* segment = mi_segment_reclaim_or_alloc(heap,block_size,MI_PAGE_LARGE,MI_LARGE_PAGE_SHIFT,tld,os_tld); if (segment == NULL) return NULL; mi_page_t* page = mi_segment_find_free(segment, tld); mi_assert_internal(page != NULL); @@ -1043,16 +1186,16 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld Page allocation and free ----------------------------------------------------------- */ -mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { +mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { mi_page_t* page; if (block_size <= MI_SMALL_OBJ_SIZE_MAX) { - page = mi_segment_small_page_alloc(tld,os_tld); + page = mi_segment_small_page_alloc(heap, block_size, tld, os_tld); } else if (block_size <= MI_MEDIUM_OBJ_SIZE_MAX) { - page = mi_segment_medium_page_alloc(tld, os_tld); + page = mi_segment_medium_page_alloc(heap, block_size, tld, os_tld); } else if (block_size <= MI_LARGE_OBJ_SIZE_MAX) { - page = mi_segment_large_page_alloc(tld, os_tld); + page = mi_segment_large_page_alloc(heap, block_size, tld, os_tld); } else { page = mi_segment_huge_page_alloc(block_size,tld,os_tld); From 58fdcbb0cd6fbe426237f334ba4a7cf8decebf35 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 23 Jan 2020 21:37:14 -0800 Subject: [PATCH 159/179] fix bug in collect where has_page was not set on free pages --- src/options.c | 2 +- src/segment.c | 19 ++++++++++++++----- test/test-stress.c | 29 +++++++++++++++++++++++++---- 3 files changed, 40 insertions(+), 10 deletions(-) diff --git a/src/options.c b/src/options.c index 76cdbef0..cb5d4049 100644 --- a/src/options.c +++ b/src/options.c @@ -70,7 +70,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free { 0, UNINIT, MI_OPTION(abandoned_page_reset) },// reset free page memory when a thread terminates { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) - { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed + { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 100, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose diff --git a/src/segment.c b/src/segment.c index 95ae6d8b..a4b61377 100644 --- a/src/segment.c +++ b/src/segment.c @@ -231,6 +231,7 @@ static void mi_segment_protect(mi_segment_t* segment, bool protect, mi_os_tld_t* ----------------------------------------------------------- */ static void mi_page_reset(mi_segment_t* segment, mi_page_t* page, size_t size, mi_segments_tld_t* tld) { + mi_assert_internal(page->is_committed); if (!mi_option_is_enabled(mi_option_page_reset)) return; if (segment->mem_is_fixed || page->segment_in_use || page->is_reset) return; size_t psize; @@ -330,7 +331,7 @@ static void mi_pages_reset_remove_all_in_segment(mi_segment_t* segment, bool for if (segment->mem_is_fixed) return; // never reset in huge OS pages for (size_t i = 0; i < segment->capacity; i++) { mi_page_t* page = &segment->pages[i]; - if (!page->segment_in_use && !page->is_reset) { + if (!page->segment_in_use && page->is_committed && !page->is_reset) { mi_pages_reset_remove(page, tld); if (force_reset) { mi_page_reset(segment, page, 0, tld); @@ -544,8 +545,12 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) { } mi_assert_internal(tld->cache_count == 0); mi_assert_internal(tld->cache == NULL); - mi_assert_internal(tld->pages_reset.first == NULL); - mi_assert_internal(tld->pages_reset.last == NULL); +#if MI_DEBUG>=2 + if (!_mi_is_main_thread()) { + mi_assert_internal(tld->pages_reset.first == NULL); + mi_assert_internal(tld->pages_reset.last == NULL); + } +#endif } @@ -979,7 +984,7 @@ static bool mi_segment_pages_collect(mi_segment_t* segment, size_t block_size, m // if everything free already, clear the page directly segment->abandoned--; _mi_stat_decrease(&tld->stats->pages_abandoned, 1); - mi_segment_page_clear(segment, page, false, tld); // no reset allowed (as the segment is still abandoned) + mi_segment_page_clear(segment, page, false, tld); // no (delayed) reset allowed (as the segment is still abandoned) has_page = true; } else if (page->xblock_size == block_size && page->used < page->reserved) { @@ -987,6 +992,9 @@ static bool mi_segment_pages_collect(mi_segment_t* segment, size_t block_size, m has_page = true; } } + else { + has_page = true; + } } return has_page; } @@ -1046,7 +1054,8 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, } else { // otherwise return the segment as it will contain some free pages - mi_assert_internal(segment->used < segment->capacity); + // (except for abandoned_reclaim_all which uses a block_size of zero) + mi_assert_internal(segment->used < segment->capacity || block_size == 0); return segment; } } diff --git a/test/test-stress.c b/test/test-stress.c index 67ec9f05..7869cc8c 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -57,6 +57,7 @@ const uintptr_t cookie = 0xbf58476d1ce4e5b9UL; const uintptr_t cookie = 0x1ce4e5b9UL; #endif +static uintptr_t ticks(void); static void* atomic_exchange_ptr(volatile void** p, void* newval); typedef uintptr_t* random_t; @@ -121,7 +122,7 @@ static void free_items(void* p) { static void stress(intptr_t tid) { //bench_start_thread(); - uintptr_t r = tid * 43; + uintptr_t r = (tid * 43)^ticks(); const size_t max_item_shift = 5; // 128 const size_t max_item_retained_shift = max_item_shift + 2; size_t allocs = 100 * ((size_t)SCALE) * (tid % 8 + 1); // some threads do more @@ -194,9 +195,9 @@ static void test_stress(void) { } static void leak(intptr_t tid) { - uintptr_t r = 43*tid; + uintptr_t r = (43*tid)^ticks(); void* p = alloc_items(pick(&r)%128, &r); - if (chance(10, &r)) { + if (chance(50, &r)) { intptr_t i = (pick(&r) % TRANSFERS); void* q = atomic_exchange_ptr(&transfer[i], p); free_items(q); @@ -259,7 +260,13 @@ static void (*thread_entry_fun)(intptr_t) = &stress; #include -static DWORD WINAPI thread_entry(LPVOID param) { +static uintptr_t ticks(void) { + LARGE_INTEGER t; + QueryPerformanceCounter(&t); + return (uintptr_t)t.QuadPart; +} + +static DWORD WINAPI thread_entry(LPVOID param) { thread_entry_fun((intptr_t)param); return 0; } @@ -323,4 +330,18 @@ static void* atomic_exchange_ptr(volatile void** p, void* newval) { } #endif +#include +#ifdef CLOCK_REALTIME +uintptr_t ticks(void) { + struct timespec t; + clock_gettime(CLOCK_REALTIME, &t); + return (uintptr_t)t.tv_sec * 1000) + ((uintptr_t)t.tv_nsec / 1000000); +} +#else +// low resolution timer +uintptr_t _mi_clock_now(void) { + return ((uintptr_t)clock() / ((uintptr_t)CLOCKS_PER_SEC / 1000)); +} +#endif + #endif From e68293741edb043e9e8bdbfa06896d5c187024f7 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 23 Jan 2020 21:44:32 -0800 Subject: [PATCH 160/179] fix assertion, add check for page committed before doing reset --- src/segment.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/segment.c b/src/segment.c index 85e8817b..3914d770 100644 --- a/src/segment.c +++ b/src/segment.c @@ -231,6 +231,7 @@ static void mi_segment_protect(mi_segment_t* segment, bool protect, mi_os_tld_t* ----------------------------------------------------------- */ static void mi_page_reset(mi_segment_t* segment, mi_page_t* page, size_t size, mi_segments_tld_t* tld) { + mi_assert_internal(page->is_committed); if (!mi_option_is_enabled(mi_option_page_reset)) return; if (segment->mem_is_fixed || page->segment_in_use || page->is_reset) return; size_t psize; @@ -330,7 +331,7 @@ static void mi_pages_reset_remove_all_in_segment(mi_segment_t* segment, bool for if (segment->mem_is_fixed) return; // never reset in huge OS pages for (size_t i = 0; i < segment->capacity; i++) { mi_page_t* page = &segment->pages[i]; - if (!page->segment_in_use && !page->is_reset) { + if (!page->segment_in_use && page->is_committed && !page->is_reset) { mi_pages_reset_remove(page, tld); if (force_reset) { mi_page_reset(segment, page, 0, tld); @@ -544,8 +545,12 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) { } mi_assert_internal(tld->cache_count == 0); mi_assert_internal(tld->cache == NULL); - mi_assert_internal(tld->pages_reset.first == NULL); - mi_assert_internal(tld->pages_reset.last == NULL); +#if MI_DEBUG>=2 + if (!_mi_is_main_thread()) { + mi_assert_internal(tld->pages_reset.first == NULL); + mi_assert_internal(tld->pages_reset.last == NULL); + } +#endif } From 8cf4882a85f9ab64c77bc93898b71aedf27a1dbb Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 24 Jan 2020 10:38:25 -0800 Subject: [PATCH 161/179] fix linux build --- test/test-stress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test-stress.c b/test/test-stress.c index 6e2b20e9..40ddbd47 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -335,7 +335,7 @@ static void* atomic_exchange_ptr(volatile void** p, void* newval) { uintptr_t ticks(void) { struct timespec t; clock_gettime(CLOCK_REALTIME, &t); - return (uintptr_t)t.tv_sec * 1000) + ((uintptr_t)t.tv_nsec / 1000000); + return ((uintptr_t)t.tv_sec * 1000) + ((uintptr_t)t.tv_nsec / 1000000); } #else // low resolution timer From 28c14d99c317063fc6a869a9261f125a30106fe5 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 24 Jan 2020 11:03:12 -0800 Subject: [PATCH 162/179] clean up comments --- src/memory.c | 59 +++++++++++++++++++++++++++------------------------- 1 file changed, 31 insertions(+), 28 deletions(-) diff --git a/src/memory.c b/src/memory.c index a442a35d..287de414 100644 --- a/src/memory.c +++ b/src/memory.c @@ -57,7 +57,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, boo // Constants #if (MI_INTPTR_SIZE==8) -#define MI_HEAP_REGION_MAX_SIZE (256 * GiB) // 48KiB for the region map +#define MI_HEAP_REGION_MAX_SIZE (256 * GiB) // 64KiB for the region map #elif (MI_INTPTR_SIZE==4) #define MI_HEAP_REGION_MAX_SIZE (3 * GiB) // ~ KiB for the region map #else @@ -72,14 +72,13 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, boo #define MI_REGION_MAX_OBJ_BLOCKS (MI_REGION_MAX_BLOCKS/4) // 64MiB #define MI_REGION_MAX_OBJ_SIZE (MI_REGION_MAX_OBJ_BLOCKS*MI_SEGMENT_SIZE) -// Region info is a pointer to the memory region and two bits for -// its flags: is_large, and is_committed. +// Region info typedef union mi_region_info_u { - uintptr_t value; + uintptr_t value; struct { - bool valid; - bool is_large; - short numa_node; + bool valid; // initialized? + bool is_large; // allocated in fixed large/huge OS pages + short numa_node; // the associated NUMA node (where -1 means no associated node) } x; } mi_region_info_t; @@ -87,12 +86,12 @@ typedef union mi_region_info_u { // A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with // a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block. typedef struct mem_region_s { - volatile _Atomic(uintptr_t) info; // is_large, and associated numa node + 1 (so 0 is no association) - volatile _Atomic(void*) start; // start of the memory area (and flags) + volatile _Atomic(uintptr_t) info; // mi_region_info_t.value + volatile _Atomic(void*) start; // start of the memory area mi_bitmap_field_t in_use; // bit per in-use block mi_bitmap_field_t dirty; // track if non-zero per block - mi_bitmap_field_t commit; // track if committed per block (if `!info.is_committed)) - mi_bitmap_field_t reset; // track reset per block + mi_bitmap_field_t commit; // track if committed per block + mi_bitmap_field_t reset; // track if reset per block volatile _Atomic(uintptr_t) arena_memid; // if allocated from a (huge page) arena- } mem_region_t; @@ -239,11 +238,13 @@ static bool mi_region_try_claim(int numa_node, size_t blocks, bool allow_large, { // try all regions for a free slot const size_t count = mi_atomic_read(®ions_count); - size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? + size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? Starting at 0 seems to increase latency though for (size_t visited = 0; visited < count; visited++, idx++) { if (idx >= count) idx = 0; // wrap around mem_region_t* r = ®ions[idx]; + // if this region suits our demand (numa node matches, large OS page matches) if (mi_region_is_suitable(r, numa_node, allow_large)) { + // then try to atomically claim a segment(s) in this region if (mi_bitmap_try_find_claim_field(&r->in_use, 0, blocks, bit_idx)) { tld->region_idx = idx; // remember the last found position *region = r; @@ -263,15 +264,15 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld)); // try to claim in existing regions if (!mi_region_try_claim(numa_node, blocks, *is_large, ®ion, &bit_idx, tld)) { - // otherwise try to allocate a fresh region + // otherwise try to allocate a fresh region and claim in there if (!mi_region_try_alloc_os(blocks, *commit, *is_large, ®ion, &bit_idx, tld)) { // out of regions or memory return NULL; } } - - // found a region and claimed `blocks` at `bit_idx` + // ------------------------------------------------ + // found a region and claimed `blocks` at `bit_idx`, initialize them now mi_assert_internal(region != NULL); mi_assert_internal(mi_bitmap_is_claimed(®ion->in_use, 1, blocks, bit_idx)); @@ -346,25 +347,27 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l size = _mi_align_up(size, _mi_os_page_size()); // allocate from regions if possible + void* p = NULL; size_t arena_memid; const size_t blocks = mi_region_block_count(size); if (blocks <= MI_REGION_MAX_OBJ_BLOCKS && alignment <= MI_SEGMENT_ALIGN) { - void* p = mi_region_try_alloc(blocks, commit, large, is_zero, memid, tld); - mi_assert_internal(p == NULL || (uintptr_t)p % alignment == 0); - if (p != NULL) { - #if (MI_DEBUG>=2) - if (*commit) { ((uint8_t*)p)[0] = 0; } - #endif - return p; + p = mi_region_try_alloc(blocks, commit, large, is_zero, memid, tld); + if (p == NULL) { + _mi_warning_message("unable to allocate from region: size %zu\n", size); } - _mi_warning_message("unable to allocate from region: size %zu\n", size); + } + if (p == NULL) { + // and otherwise fall back to the OS + p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_zero, &arena_memid, tld); + *memid = mi_memid_create_from_arena(arena_memid); } - // and otherwise fall back to the OS - void* p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_zero, &arena_memid, tld); - *memid = mi_memid_create_from_arena(arena_memid); - mi_assert_internal( p == NULL || (uintptr_t)p % alignment == 0); - if (p != NULL && *commit) { ((uint8_t*)p)[0] = 0; } + if (p != NULL) { + mi_assert_internal((uintptr_t)p % alignment == 0); +#if (MI_DEBUG>=2) + if (*commit) { ((uint8_t*)p)[0] = 0; } // ensure the memory is committed +#endif + } return p; } From 4ae51096ecdee7f1d4b309f38f6c272a8f61d473 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 24 Jan 2020 15:45:03 -0800 Subject: [PATCH 163/179] add warning on region exhaustion --- src/memory.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/memory.c b/src/memory.c index 287de414..96047b79 100644 --- a/src/memory.c +++ b/src/memory.c @@ -92,7 +92,8 @@ typedef struct mem_region_s { mi_bitmap_field_t dirty; // track if non-zero per block mi_bitmap_field_t commit; // track if committed per block mi_bitmap_field_t reset; // track if reset per block - volatile _Atomic(uintptr_t) arena_memid; // if allocated from a (huge page) arena- + volatile _Atomic(uintptr_t) arena_memid; // if allocated from a (huge page) arena + uintptr_t padding; // round to 8 fields } mem_region_t; // The region map @@ -187,6 +188,7 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, if (idx >= MI_REGION_MAX) { mi_atomic_decrement(®ions_count); _mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats); + _mi_warning_message("maximum regions used: %zu GiB (perhaps recompile with a larger setting for MI_HEAP_REGION_MAX_SIZE)", _mi_divide_up(MI_HEAP_REGION_MAX_SIZE, GiB)); return false; } From e070eba112f80b4b4c007cc8cd6696463bf1884b Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 24 Jan 2020 16:30:52 -0800 Subject: [PATCH 164/179] fix tagged null encoding, search segment cache before reclaim --- src/options.c | 2 +- src/segment.c | 149 +++++++++++++++++++++++++++++++-------------- test/test-stress.c | 16 ++--- 3 files changed, 111 insertions(+), 56 deletions(-) diff --git a/src/options.c b/src/options.c index cb5d4049..af051aa2 100644 --- a/src/options.c +++ b/src/options.c @@ -67,7 +67,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread - { 0, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free + { 1, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free { 0, UNINIT, MI_OPTION(abandoned_page_reset) },// reset free page memory when a thread terminates { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed diff --git a/src/segment.c b/src/segment.c index a4b61377..7aced87d 100644 --- a/src/segment.c +++ b/src/segment.c @@ -15,27 +15,25 @@ terms of the MIT license. A copy of the license can be found in the file static uint8_t* mi_segment_raw_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); -/* ----------------------------------------------------------- +/* -------------------------------------------------------------------------------- Segment allocation - We allocate pages inside big OS allocated "segments" - (4mb on 64-bit). This is to avoid splitting VMA's on Linux - and reduce fragmentation on other OS's. Each thread - owns its own segments. + We allocate pages inside bigger "segments" (4mb on 64-bit). This is to avoid + splitting VMA's on Linux and reduce fragmentation on other OS's. + Each thread owns its own segments. Currently we have: - small pages (64kb), 64 in one segment - medium pages (512kb), 8 in one segment - large pages (4mb), 1 in one segment - - huge blocks > MI_LARGE_OBJ_SIZE_MAX (512kb) are directly allocated by the OS + - huge blocks > MI_LARGE_OBJ_SIZE_MAX become large segment with 1 page - In any case the memory for a segment is virtual and only - committed on demand (i.e. we are careful to not touch the memory - until we actually allocate a block there) + In any case the memory for a segment is virtual and usually committed on demand. + (i.e. we are careful to not touch the memory until we actually allocate a block there) If a thread ends, it "abandons" pages with used blocks and there is an abandoned segment list whose segments can be reclaimed by still running threads, much like work-stealing. ------------------------------------------------------------ */ +-------------------------------------------------------------------------------- */ /* ----------------------------------------------------------- @@ -559,8 +557,11 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) { ----------------------------------------------------------- */ // Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` . -static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) +static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { + // the segment parameter is non-null if it came from our cache + mi_assert_internal(segment==NULL || (required==0 && page_kind <= MI_PAGE_LARGE)); + // calculate needed sizes first size_t capacity; if (page_kind == MI_PAGE_HUGE) { @@ -587,8 +588,9 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, bool is_zero = false; // Try to get it from our thread local cache first - mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld); if (segment != NULL) { + // came from cache + mi_assert_internal(segment->segment_size == segment_size); if (page_kind <= MI_PAGE_MEDIUM && segment->page_kind == page_kind && segment->segment_size == segment_size) { pages_still_good = true; } @@ -674,6 +676,9 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, return segment; } +static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { + return mi_segment_init(NULL, required, page_kind, page_shift, tld, os_tld); +} static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) { UNUSED(force); @@ -814,15 +819,23 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) /* ----------------------------------------------------------- - Abandonment +Abandonment + +When threads terminate, they can leave segments with +live blocks (reached through other threads). Such segments +are "abandoned" and will be reclaimed by other threads to +reuse their pages and/or free them eventually + +We maintain a global list of abandoned segments that are +reclaimed on demand. Since this is shared among threads +the implementation needs to avoid the A-B-A problem on +popping abandoned segments which is why tagged pointers are +used. ----------------------------------------------------------- */ -// When threads terminate, they can leave segments with -// live blocks (reached through other threads). Such segments -// are "abandoned" and will be reclaimed by other threads to -// reuse their pages and/or free them eventually - - +// Use the bottom 20-bits (on 64-bit) of the aligned segment +// pointers to put in a tag that increments on update to avoid +// the A-B-A problem. #define MI_TAGGED_MASK MI_SEGMENT_MASK typedef uintptr_t mi_tagged_segment_t; @@ -832,15 +845,23 @@ static mi_segment_t* mi_tagged_segment_ptr(mi_tagged_segment_t ts) { static mi_tagged_segment_t mi_tagged_segment(mi_segment_t* segment, mi_tagged_segment_t ts) { mi_assert_internal(((uintptr_t)segment & MI_TAGGED_MASK) == 0); - if (segment==NULL) return 0; // no need to tag NULL uintptr_t tag = ((ts & MI_TAGGED_MASK) + 1) & MI_TAGGED_MASK; return ((uintptr_t)segment | tag); } +// This is a list of visited abandoned pages that were full at the time. +// this list migrates to `abandoned` when that becomes NULL. static volatile _Atomic(mi_segment_t*) abandoned_visited; // = NULL + +// The abandoned page list. static volatile _Atomic(mi_tagged_segment_t) abandoned; // = NULL + +// We also maintain a count of current readers of the abandoned list +// in order to prevent resetting/decommitting segment memory if it might +// still be read. static volatile _Atomic(uintptr_t) abandoned_readers; // = 0 +// Push on the visited list static void mi_abandoned_visited_push(mi_segment_t* segment) { mi_assert_internal(segment->thread_id == 0); mi_assert_internal(segment->abandoned_next == NULL); @@ -853,14 +874,23 @@ static void mi_abandoned_visited_push(mi_segment_t* segment) { } while (!mi_atomic_cas_ptr_weak(mi_segment_t, &abandoned_visited, segment, anext)); } -static bool mi_abandoned_visited_revisit(void) { +// Move the visited list to the abandoned list. +static bool mi_abandoned_visited_revisit(void) +{ + // quick check if the visited list is empty + if (mi_atomic_read_ptr_relaxed(mi_segment_t,&abandoned_visited)==NULL) return false; + // grab the whole visited list mi_segment_t* first = mi_atomic_exchange_ptr(mi_segment_t, &abandoned_visited, NULL); if (first == NULL) return false; // first try to swap directly if the abandoned list happens to be NULL - mi_tagged_segment_t afirst = mi_tagged_segment(first,0); - if (mi_atomic_cas_weak(&abandoned, afirst, 0)) return true; + const mi_tagged_segment_t ts = mi_atomic_read_relaxed(&abandoned); + mi_tagged_segment_t afirst; + if (mi_tagged_segment_ptr(ts)==NULL) { + afirst = mi_tagged_segment(first, ts); + if (mi_atomic_cas_strong(&abandoned, afirst, ts)) return true; + } // find the last element of the visited list: O(n) mi_segment_t* last = first; @@ -879,6 +909,7 @@ static bool mi_abandoned_visited_revisit(void) { return true; } +// Push on the abandoned list. static void mi_abandoned_push(mi_segment_t* segment) { mi_assert_internal(segment->thread_id == 0); mi_assert_internal(segment->abandoned_next == NULL); @@ -893,6 +924,7 @@ static void mi_abandoned_push(mi_segment_t* segment) { } while (!mi_atomic_cas_weak(&abandoned, next, ts)); } +// Wait until there are no more pending reads on segments that used to be in the abandoned list void _mi_abandoned_await_readers(void) { uintptr_t n; do { @@ -901,23 +933,28 @@ void _mi_abandoned_await_readers(void) { } while (n != 0); } +// Pop from the abandoned list static mi_segment_t* mi_abandoned_pop(void) { mi_segment_t* segment; + // Check efficiently if it is empty (or if the visited list needs to be moved) mi_tagged_segment_t ts = mi_atomic_read_relaxed(&abandoned); segment = mi_tagged_segment_ptr(ts); - if (segment == NULL) { - if (!mi_abandoned_visited_revisit()) return NULL; // try to swap in the visited list on NULL + if (mi_likely(segment == NULL)) { + if (mi_likely(!mi_abandoned_visited_revisit())) { // try to swap in the visited list on NULL + return NULL; + } } - // Do a pop. We use a reader lock to prevent - // a segment to be decommitted while a read is still pending, and a tagged - // pointer to prevent A-B-A link corruption. + + // Do a pop. We use a reader count to prevent + // a segment to be decommitted while a read is still pending, + // and a tagged pointer to prevent A-B-A link corruption. mi_atomic_increment(&abandoned_readers); // ensure no segment gets decommitted mi_tagged_segment_t next = 0; do { ts = mi_atomic_read_relaxed(&abandoned); segment = mi_tagged_segment_ptr(ts); if (segment != NULL) { - next = mi_tagged_segment(segment->abandoned_next, ts); // note: reads segment so should not be decommitted + next = mi_tagged_segment(segment->abandoned_next, ts); // note: reads the segment's `abandoned_next` field so should not be decommitted } } while (segment != NULL && !mi_atomic_cas_weak(&abandoned, next, ts)); mi_atomic_decrement(&abandoned_readers); // release reader lock @@ -927,6 +964,9 @@ static mi_segment_t* mi_abandoned_pop(void) { return segment; } +/* ----------------------------------------------------------- + Abandon segment/page +----------------------------------------------------------- */ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(segment->used == segment->abandoned); @@ -945,7 +985,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_segments_track_size(-((long)segment->segment_size), tld); segment->thread_id = 0; segment->abandoned_next = NULL; - mi_abandoned_push(segment); + mi_abandoned_push(segment); } void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { @@ -964,6 +1004,9 @@ void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { } } +/* ----------------------------------------------------------- + Reclaim abandoned pages +----------------------------------------------------------- */ static bool mi_segment_pages_collect(mi_segment_t* segment, size_t block_size, mi_segments_tld_t* tld) { @@ -1082,22 +1125,6 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size, return NULL; } -static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t block_size, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) -{ - mi_assert_internal(page_kind <= MI_PAGE_LARGE); - mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE); - mi_segment_t* segment = mi_segment_try_reclaim(heap, block_size, page_kind, tld); - if (segment == MI_RECLAIMED) { - return NULL; // pretend out-of-memory as the page will be in the page queue - } - else if (segment == NULL) { - return mi_segment_alloc(0, page_kind, page_shift, tld, os_tld); - } - else { - return segment; - } -} - void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld) { mi_segment_t* segment; while ((segment = mi_abandoned_pop()) != NULL) { @@ -1112,6 +1139,34 @@ void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld) { } } + +/* ----------------------------------------------------------- + Reclaim or allocate +----------------------------------------------------------- */ + +static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t block_size, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) +{ + mi_assert_internal(page_kind <= MI_PAGE_LARGE); + mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE); + // 1. try to get a segment from our cache + mi_segment_t* segment = mi_segment_cache_pop(MI_SEGMENT_SIZE, tld); + if (segment != NULL) { + mi_segment_init(segment, 0, page_kind, page_shift, tld, os_tld); + return segment; + } + // 2. try to reclaim an abandoned segment + segment = mi_segment_try_reclaim(heap, block_size, page_kind, tld); + if (segment == MI_RECLAIMED) { + return NULL; // pretend out-of-memory as the page will be in the page queue of the heap + } + else if (segment != NULL) { + return segment; // reclaimed a segment with empty pages in it + } + // 3. otherwise allocate a fresh segment + return mi_segment_alloc(0, page_kind, page_shift, tld, os_tld); +} + + /* ----------------------------------------------------------- Small page allocation ----------------------------------------------------------- */ @@ -1192,7 +1247,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld } /* ----------------------------------------------------------- - Page allocation and free + Page allocation ----------------------------------------------------------- */ mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { diff --git a/test/test-stress.c b/test/test-stress.c index 40ddbd47..72e4e853 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -32,8 +32,10 @@ static int ITER = 50; // N full iterations destructing and re-creating a // static int THREADS = 8; // more repeatable if THREADS <= #processors // static int SCALE = 100; // scaling factor +#define STRESS // undefine for leak test + static bool allow_large_objects = true; // allow very large objects? -static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`? +static size_t use_one_size = 1; // use single object size of `N * sizeof(uintptr_t)`? #ifdef USE_STD_MALLOC @@ -189,7 +191,7 @@ static void test_stress(void) { } mi_collect(false); #ifndef NDEBUG - if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); } + if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); } #endif } } @@ -209,8 +211,7 @@ static void test_leak(void) { run_os_threads(THREADS, &leak); mi_collect(false); #ifndef NDEBUG - //if ((n + 1) % 10 == 0) - { printf("- iterations left: %3d\n", ITER - (n + 1)); } + if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); } #endif } } @@ -240,12 +241,11 @@ int main(int argc, char** argv) { // Run ITER full iterations where half the objects in the transfer buffer survive to the next round. mi_stats_reset(); - if (true) { +#ifdef STRESS test_stress(); - } - else { +#else test_leak(); - } +#endif mi_collect(true); mi_stats_print(NULL); From b31bc52618658bdb12cb316c13580ecd82bfd8d9 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 24 Jan 2020 19:02:13 -0800 Subject: [PATCH 165/179] add cache alignment directives for contended variables --- ide/vs2019/mimalloc.vcxproj | 4 ++-- include/mimalloc-internal.h | 4 ++++ src/arena.c | 6 +++--- src/os.c | 4 ++-- src/segment.c | 26 +++++++++++++------------- test/test-stress.c | 6 +++--- 6 files changed, 27 insertions(+), 23 deletions(-) diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index 037e380d..a98e78ba 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -100,7 +100,7 @@ MI_DEBUG=3;%(PreprocessorDefinitions); CompileAsCpp false - stdcpp17 + Default @@ -119,7 +119,7 @@ MI_DEBUG=3;%(PreprocessorDefinitions); CompileAsCpp false - stdcpp17 + Default diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 3335414a..902d2fdf 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -20,16 +20,20 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_trace_message(...) #endif +#define MI_CACHE_LINE 64 #if defined(_MSC_VER) #pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths) #define mi_decl_noinline __declspec(noinline) #define mi_decl_thread __declspec(thread) +#define mi_decl_cache_align __declspec(align(MI_CACHE_LINE)) #elif (defined(__GNUC__) && (__GNUC__>=3)) // includes clang and icc #define mi_decl_noinline __attribute__((noinline)) #define mi_decl_thread __thread +#define mi_decl_cache_align __attribute__((aligned(MI_CACHE_LINE))) #else #define mi_decl_noinline #define mi_decl_thread __thread // hope for the best :-) +#define mi_decl_cache_align #endif diff --git a/src/arena.c b/src/arena.c index acb92243..ac599f32 100644 --- a/src/arena.c +++ b/src/arena.c @@ -54,7 +54,7 @@ bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); #define MI_MAX_ARENAS (64) // not more than 256 (since we use 8 bits in the memid) // A memory arena descriptor -typedef struct mi_arena_s { +typedef mi_decl_cache_align struct mi_arena_s { _Atomic(uint8_t*) start; // the start of the memory area size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`) @@ -70,8 +70,8 @@ typedef struct mi_arena_s { // The available arenas -static _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS]; -static _Atomic(uintptr_t) mi_arena_count; // = 0 +static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS]; +static mi_decl_cache_align _Atomic(uintptr_t) mi_arena_count; // = 0 /* ----------------------------------------------------------- diff --git a/src/os.c b/src/os.c index 6e8c12d8..b8dfaa70 100644 --- a/src/os.c +++ b/src/os.c @@ -397,7 +397,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro // On 64-bit systems, we can do efficient aligned allocation by using // the 4TiB to 30TiB area to allocate them. #if (MI_INTPTR_SIZE >= 8) && (defined(_WIN32) || (defined(MI_OS_USE_MMAP) && !defined(MAP_ALIGNED))) -static volatile _Atomic(uintptr_t) aligned_base; +static volatile mi_decl_cache_align _Atomic(uintptr_t) aligned_base; // Return a 4MiB aligned address that is probably available static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { @@ -905,7 +905,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) #if (MI_INTPTR_SIZE >= 8) // To ensure proper alignment, use our own area for huge OS pages -static _Atomic(uintptr_t) mi_huge_start; // = 0 +static mi_decl_cache_align _Atomic(uintptr_t) mi_huge_start; // = 0 // Claim an aligned address range for huge pages static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { diff --git a/src/segment.c b/src/segment.c index 7aced87d..a26ac449 100644 --- a/src/segment.c +++ b/src/segment.c @@ -365,9 +365,6 @@ static void mi_reset_delayed(mi_segments_tld_t* tld) { } - - - /* ----------------------------------------------------------- Segment size calculations ----------------------------------------------------------- */ @@ -829,13 +826,15 @@ reuse their pages and/or free them eventually We maintain a global list of abandoned segments that are reclaimed on demand. Since this is shared among threads the implementation needs to avoid the A-B-A problem on -popping abandoned segments which is why tagged pointers are -used. +popping abandoned segments: +We use tagged pointers to avoid accidentially identifying +reused segments, much like stamped references in Java. +Secondly, we maintain a reader counter to avoid resetting +or decommitting segments that have a pending read operation. ----------------------------------------------------------- */ -// Use the bottom 20-bits (on 64-bit) of the aligned segment -// pointers to put in a tag that increments on update to avoid -// the A-B-A problem. +// Use the bottom 20-bits (on 64-bit) of the aligned segment pointers +// to put in a tag that increments on update to avoid the A-B-A problem. #define MI_TAGGED_MASK MI_SEGMENT_MASK typedef uintptr_t mi_tagged_segment_t; @@ -850,16 +849,17 @@ static mi_tagged_segment_t mi_tagged_segment(mi_segment_t* segment, mi_tagged_se } // This is a list of visited abandoned pages that were full at the time. -// this list migrates to `abandoned` when that becomes NULL. -static volatile _Atomic(mi_segment_t*) abandoned_visited; // = NULL +// this list migrates to `abandoned` when that becomes NULL. The use of +// this list reduces contention and the rate at which segments are visited. +static mi_decl_cache_align volatile _Atomic(mi_segment_t*) abandoned_visited; // = NULL -// The abandoned page list. -static volatile _Atomic(mi_tagged_segment_t) abandoned; // = NULL +// The abandoned page list (tagged as it supports pop) +static mi_decl_cache_align volatile _Atomic(mi_tagged_segment_t) abandoned; // = NULL // We also maintain a count of current readers of the abandoned list // in order to prevent resetting/decommitting segment memory if it might // still be read. -static volatile _Atomic(uintptr_t) abandoned_readers; // = 0 +static mi_decl_cache_align volatile _Atomic(uintptr_t) abandoned_readers; // = 0 // Push on the visited list static void mi_abandoned_visited_push(mi_segment_t* segment) { diff --git a/test/test-stress.c b/test/test-stress.c index 72e4e853..19f10360 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -32,10 +32,10 @@ static int ITER = 50; // N full iterations destructing and re-creating a // static int THREADS = 8; // more repeatable if THREADS <= #processors // static int SCALE = 100; // scaling factor -#define STRESS // undefine for leak test +// #define STRESS // undefine for leak test static bool allow_large_objects = true; // allow very large objects? -static size_t use_one_size = 1; // use single object size of `N * sizeof(uintptr_t)`? +static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`? #ifdef USE_STD_MALLOC @@ -198,7 +198,7 @@ static void test_stress(void) { static void leak(intptr_t tid) { uintptr_t r = (43*tid)^ticks(); - void* p = alloc_items(pick(&r)%128, &r); + void* p = alloc_items(1 /*pick(&r)%128*/, &r); if (chance(50, &r)) { intptr_t i = (pick(&r) % TRANSFERS); void* q = atomic_exchange_ptr(&transfer[i], p); From 47300eeda3e78a909492f67f7c2b77289a7be383 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 24 Jan 2020 20:17:33 -0800 Subject: [PATCH 166/179] avoid memset --- src/init.c | 7 ++++--- src/segment.c | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/init.c b/src/init.c index 18a18f60..366acbf5 100644 --- a/src/init.c +++ b/src/init.c @@ -140,6 +140,7 @@ mi_stats_t _mi_stats_main = { MI_STATS_NULL }; Initialization and freeing of the thread local heaps ----------------------------------------------------------- */ +// note: in x64 in release build `sizeof(mi_thread_data_t)` is under 4KiB (= OS page size). typedef struct mi_thread_data_s { mi_heap_t heap; // must come first due to cast in `_mi_heap_done` mi_tld_t tld; @@ -154,12 +155,13 @@ static bool _mi_heap_init(void) { mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_get_default_heap()); } else { - // use `_mi_os_alloc` to allocate directly from the OS + // use `_mi_os_alloc` to allocate directly from the OS mi_thread_data_t* td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t),&_mi_stats_main); // Todo: more efficient allocation? if (td == NULL) { _mi_error_message(ENOMEM, "failed to allocate thread local heap memory\n"); return false; } + // OS allocated so already zero initialized mi_tld_t* tld = &td->tld; mi_heap_t* heap = &td->heap; memcpy(heap, &_mi_heap_empty, sizeof(*heap)); @@ -168,8 +170,7 @@ static bool _mi_heap_init(void) { heap->cookie = _mi_heap_random_next(heap) | 1; heap->key[0] = _mi_heap_random_next(heap); heap->key[1] = _mi_heap_random_next(heap); - heap->tld = tld; - memset(tld, 0, sizeof(*tld)); + heap->tld = tld; tld->heap_backing = heap; tld->segments.stats = &tld->stats; tld->segments.os = &tld->os; diff --git a/src/segment.c b/src/segment.c index a26ac449..f6554520 100644 --- a/src/segment.c +++ b/src/segment.c @@ -948,6 +948,7 @@ static mi_segment_t* mi_abandoned_pop(void) { // Do a pop. We use a reader count to prevent // a segment to be decommitted while a read is still pending, // and a tagged pointer to prevent A-B-A link corruption. + // (this is called from `memory.c:_mi_mem_free` for example) mi_atomic_increment(&abandoned_readers); // ensure no segment gets decommitted mi_tagged_segment_t next = 0; do { From ecece572847f70553f2a2c8f9d754e1f16756986 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 24 Jan 2020 20:20:43 -0800 Subject: [PATCH 167/179] fix bug in committed check in arena allocation --- src/arena.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/arena.c b/src/arena.c index acb92243..55747bb1 100644 --- a/src/arena.c +++ b/src/arena.c @@ -107,6 +107,7 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* size_t idx = mi_atomic_read(&arena->search_idx); // start from last search for (size_t visited = 0; visited < fcount; visited++, idx++) { if (idx >= fcount) idx = 0; // wrap around + // try to atomically claim a range of bits if (mi_bitmap_try_find_claim_field(arena->blocks_inuse, idx, blocks, bitmap_idx)) { mi_atomic_write(&arena->search_idx, idx); // start search from here next time return true; @@ -135,8 +136,8 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n // always committed *commit = true; } - else if (commit) { - // ensure commit now + else if (*commit) { + // arena not committed as a whole, but commit requested: ensure commit now bool any_uncommitted; mi_bitmap_claim(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted); if (any_uncommitted) { From 2b667bd3aef92ebda22a660e068798ce31b6eed4 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Sat, 25 Jan 2020 14:47:09 +0000 Subject: [PATCH 168/179] enable arc4random abi under apple --- src/random.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/random.c b/src/random.c index c40a96da..6fef2434 100644 --- a/src/random.c +++ b/src/random.c @@ -176,7 +176,7 @@ static bool os_random_buf(void* buf, size_t buf_len) { return true; } */ -#elif defined(ANDROID) || defined(XP_DARWIN) || defined(__DragonFly__) || \ +#elif defined(ANDROID) || defined(XP_DARWIN) || defined(__APPLE__) || defined(__DragonFly__) || \ defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ defined(__wasi__) #include @@ -325,4 +325,4 @@ static void chacha_test(void) chacha_block(&r); mi_assert_internal(array_equals(r.output, r_out, 16)); } -*/ \ No newline at end of file +*/ From 5e32d00aab55449acfd2658256a7d6ddb1d1f446 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 25 Jan 2020 12:26:08 -0800 Subject: [PATCH 169/179] add visit count to abandoned to limit list length --- include/mimalloc-types.h | 6 +++-- src/segment.c | 57 +++++++++++++++++++++++++++------------- test/test-stress.c | 7 ++--- 3 files changed, 47 insertions(+), 23 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 0c6dc666..48d86a25 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -241,10 +241,12 @@ typedef struct mi_segment_s { bool mem_is_committed; // `true` if the whole segment is eagerly committed // segment fields - struct mi_segment_s* next; // must be the first segment field -- see `segment.c:segment_alloc` + struct mi_segment_s* next; // must be the first segment field -- see `segment.c:segment_alloc` struct mi_segment_s* prev; struct mi_segment_s* abandoned_next; - size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) + size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) + size_t abandoned_visits; // count how often this segment is visited in the abandoned list (to force reclaim it it is too long) + size_t used; // count of pages in use (`used <= capacity`) size_t capacity; // count of available pages (`#free + used`) size_t segment_size;// for huge pages this may be different from `MI_SEGMENT_SIZE` diff --git a/src/segment.c b/src/segment.c index f6554520..715d632a 100644 --- a/src/segment.c +++ b/src/segment.c @@ -831,6 +831,14 @@ We use tagged pointers to avoid accidentially identifying reused segments, much like stamped references in Java. Secondly, we maintain a reader counter to avoid resetting or decommitting segments that have a pending read operation. + +Note: the current implementation is one possible design; +another way might be to keep track of abandoned segments +in the regions. This would have the advantage of keeping +all concurrent code in one place and not needing to deal +with ABA issues. The drawback is that it is unclear how to +scan abandoned segments efficiently in that case as they +would be spread among all other segments in the regions. ----------------------------------------------------------- */ // Use the bottom 20-bits (on 64-bit) of the aligned segment pointers @@ -986,6 +994,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_segments_track_size(-((long)segment->segment_size), tld); segment->thread_id = 0; segment->abandoned_next = NULL; + segment->abandoned_visits = 0; mi_abandoned_push(segment); } @@ -1009,6 +1018,7 @@ void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { Reclaim abandoned pages ----------------------------------------------------------- */ +// Possibly clear pages and check if free space is available static bool mi_segment_pages_collect(mi_segment_t* segment, size_t block_size, mi_segments_tld_t* tld) { mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE); @@ -1045,13 +1055,13 @@ static bool mi_segment_pages_collect(mi_segment_t* segment, size_t block_size, m #define MI_RECLAIMED ((mi_segment_t*)1) -static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, size_t block_size, mi_page_kind_t page_kind, mi_segments_tld_t* tld) { - UNUSED_RELEASE(page_kind); - mi_assert_internal(page_kind == segment->page_kind); +// Reclaim a segment +static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, size_t block_size, mi_segments_tld_t* tld) { mi_assert_internal(segment->abandoned_next == NULL); bool right_page_reclaimed = false; segment->thread_id = _mi_thread_id(); + segment->abandoned_visits = 0; mi_segments_track_size((long)segment->segment_size, tld); mi_assert_internal(segment->next == NULL && segment->prev == NULL); mi_assert_expensive(mi_segment_is_valid(segment, tld)); @@ -1104,20 +1114,45 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, } } +// Reclaim a segment without returning it +static void mi_segment_reclaim_force(mi_segment_t* segment, mi_heap_t* heap, mi_segments_tld_t* tld) { + mi_segment_t* res = mi_segment_reclaim(segment, heap, 0, tld); + mi_assert_internal(res != MI_RECLAIMED); // due to block_size == 0 + if (res!=MI_RECLAIMED && res != NULL) { + mi_assert_internal(res == segment); + if (res->page_kind <= MI_PAGE_MEDIUM && mi_segment_has_free(res)) { + mi_segment_insert_in_free_queue(res, tld); + } + } +} + +void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld) { + mi_segment_t* segment; + while ((segment = mi_abandoned_pop()) != NULL) { + mi_segment_reclaim_force(segment, heap, tld); + } +} + + static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size, mi_page_kind_t page_kind, mi_segments_tld_t* tld) { mi_segment_t* segment; int max_tries = 8; // limit the work to bound allocation times while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) { + segment->abandoned_visits++; bool has_page = mi_segment_pages_collect(segment,block_size,tld); // try to free up pages (due to concurrent frees) if (has_page && segment->page_kind == page_kind) { // found a free page of the right kind, or page of the right block_size with free space - return mi_segment_reclaim(segment, heap, block_size, page_kind, tld); + return mi_segment_reclaim(segment, heap, block_size, tld); } else if (segment->used==0) { // free the segment to make it available to other threads mi_segment_os_free(segment, segment->segment_size, tld); } + else if (segment->abandoned_visits >= 3) { + // always reclaim on 3rd visit to limit the list length + mi_segment_reclaim_force(segment, heap, tld); + } else { // push on the visited list so it gets not looked at too quickly again mi_abandoned_visited_push(segment); @@ -1126,20 +1161,6 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size, return NULL; } -void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld) { - mi_segment_t* segment; - while ((segment = mi_abandoned_pop()) != NULL) { - mi_segment_t* res = mi_segment_reclaim(segment, heap, 0, segment->page_kind, tld); - mi_assert_internal(res != NULL); - if (res != MI_RECLAIMED && res != NULL) { - mi_assert_internal(res == segment); - if (segment->page_kind <= MI_PAGE_MEDIUM && mi_segment_has_free(segment)) { - mi_segment_insert_in_free_queue(segment, tld); - } - } - } -} - /* ----------------------------------------------------------- Reclaim or allocate diff --git a/test/test-stress.c b/test/test-stress.c index 19f10360..ab4571db 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -32,7 +32,7 @@ static int ITER = 50; // N full iterations destructing and re-creating a // static int THREADS = 8; // more repeatable if THREADS <= #processors // static int SCALE = 100; // scaling factor -// #define STRESS // undefine for leak test +#define STRESS // undefine for leak test static bool allow_large_objects = true; // allow very large objects? static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`? @@ -124,7 +124,7 @@ static void free_items(void* p) { static void stress(intptr_t tid) { //bench_start_thread(); - uintptr_t r = (tid * 43); // ^ ticks(); + uintptr_t r = (tid * 43); // rand(); const size_t max_item_shift = 5; // 128 const size_t max_item_retained_shift = max_item_shift + 2; size_t allocs = 100 * ((size_t)SCALE) * (tid % 8 + 1); // some threads do more @@ -180,7 +180,8 @@ static void stress(intptr_t tid) { static void run_os_threads(size_t nthreads, void (*entry)(intptr_t tid)); static void test_stress(void) { - uintptr_t r = 43 * 43; + srand(0x7feb352d); + uintptr_t r = rand(); for (int n = 0; n < ITER; n++) { run_os_threads(THREADS, &stress); for (int i = 0; i < TRANSFERS; i++) { From f4630d43a71409f1963b910ffb247e137c42d85c Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 25 Jan 2020 12:49:14 -0800 Subject: [PATCH 170/179] allow reset on large pages; check commit status before reset --- src/segment.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/segment.c b/src/segment.c index 715d632a..2d2263ea 100644 --- a/src/segment.c +++ b/src/segment.c @@ -231,7 +231,7 @@ static void mi_segment_protect(mi_segment_t* segment, bool protect, mi_os_tld_t* static void mi_page_reset(mi_segment_t* segment, mi_page_t* page, size_t size, mi_segments_tld_t* tld) { mi_assert_internal(page->is_committed); if (!mi_option_is_enabled(mi_option_page_reset)) return; - if (segment->mem_is_fixed || page->segment_in_use || page->is_reset) return; + if (segment->mem_is_fixed || page->segment_in_use || !page->is_committed || page->is_reset) return; size_t psize; void* start = mi_segment_raw_page_start(segment, page, &psize); page->is_reset = true; @@ -281,12 +281,12 @@ static bool mi_page_reset_is_expired(mi_page_t* page, mi_msecs_t now) { } static void mi_pages_reset_add(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { - mi_assert_internal(!page->segment_in_use); + mi_assert_internal(!page->segment_in_use || !page->is_committed); mi_assert_internal(mi_page_not_in_queue(page,tld)); mi_assert_expensive(!mi_pages_reset_contains(page, tld)); mi_assert_internal(_mi_page_segment(page)==segment); if (!mi_option_is_enabled(mi_option_page_reset)) return; - if (segment->mem_is_fixed || page->segment_in_use || page->is_reset) return; + if (segment->mem_is_fixed || page->segment_in_use || !page->is_committed || page->is_reset) return; if (mi_option_get(mi_option_reset_delay) == 0) { // reset immediately? @@ -782,7 +782,7 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, bool a segment->used--; // add to the free page list for reuse/reset - if (allow_reset && segment->page_kind <= MI_PAGE_MEDIUM) { + if (allow_reset) { mi_pages_reset_add(segment, page, tld); } } @@ -1095,7 +1095,10 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, right_page_reclaimed = true; } } - } + } + else if (page->is_committed && !page->is_reset) { // not in-use, and not reset yet + mi_pages_reset_add(segment, page, tld); + } } mi_assert_internal(segment->abandoned == 0); if (right_page_reclaimed) { From 19a0d9dfa0f1ed1145d6943d971511b2a2d1060d Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 25 Jan 2020 12:51:56 -0800 Subject: [PATCH 171/179] clean up stress test --- test/test-stress.c | 29 +++++------------------------ 1 file changed, 5 insertions(+), 24 deletions(-) diff --git a/test/test-stress.c b/test/test-stress.c index ab4571db..1b559a59 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -59,7 +59,6 @@ const uintptr_t cookie = 0xbf58476d1ce4e5b9UL; const uintptr_t cookie = 0x1ce4e5b9UL; #endif -static uintptr_t ticks(void); static void* atomic_exchange_ptr(volatile void** p, void* newval); typedef uintptr_t* random_t; @@ -180,7 +179,6 @@ static void stress(intptr_t tid) { static void run_os_threads(size_t nthreads, void (*entry)(intptr_t tid)); static void test_stress(void) { - srand(0x7feb352d); uintptr_t r = rand(); for (int n = 0; n < ITER; n++) { run_os_threads(THREADS, &stress); @@ -197,8 +195,9 @@ static void test_stress(void) { } } +#ifndef STRESS static void leak(intptr_t tid) { - uintptr_t r = (43*tid)^ticks(); + uintptr_t r = rand(); void* p = alloc_items(1 /*pick(&r)%128*/, &r); if (chance(50, &r)) { intptr_t i = (pick(&r) % TRANSFERS); @@ -207,7 +206,7 @@ static void leak(intptr_t tid) { } } -static void test_leak(void) { +static void test_leak(void) { for (int n = 0; n < ITER; n++) { run_os_threads(THREADS, &leak); mi_collect(false); @@ -216,6 +215,7 @@ static void test_leak(void) { #endif } } +#endif int main(int argc, char** argv) { // > mimalloc-test-stress [THREADS] [SCALE] [ITER] @@ -241,6 +241,7 @@ int main(int argc, char** argv) { //bench_start_program(); // Run ITER full iterations where half the objects in the transfer buffer survive to the next round. + srand(0x7feb352d); mi_stats_reset(); #ifdef STRESS test_stress(); @@ -261,12 +262,6 @@ static void (*thread_entry_fun)(intptr_t) = &stress; #include -static uintptr_t ticks(void) { - LARGE_INTEGER t; - QueryPerformanceCounter(&t); - return (uintptr_t)t.QuadPart; -} - static DWORD WINAPI thread_entry(LPVOID param) { thread_entry_fun((intptr_t)param); return 0; @@ -331,18 +326,4 @@ static void* atomic_exchange_ptr(volatile void** p, void* newval) { } #endif -#include -#ifdef CLOCK_REALTIME -uintptr_t ticks(void) { - struct timespec t; - clock_gettime(CLOCK_REALTIME, &t); - return ((uintptr_t)t.tv_sec * 1000) + ((uintptr_t)t.tv_nsec / 1000000); -} -#else -// low resolution timer -uintptr_t _mi_clock_now(void) { - return ((uintptr_t)clock() / ((uintptr_t)CLOCKS_PER_SEC / 1000)); -} -#endif - #endif From 62b8fb26b11f7b5e496add0cc6c9c1c9da3e0791 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 25 Jan 2020 13:27:47 -0800 Subject: [PATCH 172/179] fix freeing of segments on forced reclaim --- src/{memory.c => region.c} | 0 src/segment.c | 5 ++++- 2 files changed, 4 insertions(+), 1 deletion(-) rename src/{memory.c => region.c} (100%) diff --git a/src/memory.c b/src/region.c similarity index 100% rename from src/memory.c rename to src/region.c diff --git a/src/segment.c b/src/segment.c index 2d2263ea..e536ae59 100644 --- a/src/segment.c +++ b/src/segment.c @@ -1123,7 +1123,10 @@ static void mi_segment_reclaim_force(mi_segment_t* segment, mi_heap_t* heap, mi_ mi_assert_internal(res != MI_RECLAIMED); // due to block_size == 0 if (res!=MI_RECLAIMED && res != NULL) { mi_assert_internal(res == segment); - if (res->page_kind <= MI_PAGE_MEDIUM && mi_segment_has_free(res)) { + if (res->used == 0) { + mi_segment_free(segment, false, tld); + } + else if (res->page_kind <= MI_PAGE_MEDIUM && mi_segment_has_free(res)) { mi_segment_insert_in_free_queue(res, tld); } } From 7785139201dbac8bc9515d7f5fa148f3e0c7827d Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 25 Jan 2020 13:28:24 -0800 Subject: [PATCH 173/179] fix warning on gcc on attribute ignore in templates --- src/arena.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arena.c b/src/arena.c index 21d0affc..7bf8099b 100644 --- a/src/arena.c +++ b/src/arena.c @@ -54,7 +54,7 @@ bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); #define MI_MAX_ARENAS (64) // not more than 256 (since we use 8 bits in the memid) // A memory arena descriptor -typedef mi_decl_cache_align struct mi_arena_s { +typedef struct mi_arena_s { _Atomic(uint8_t*) start; // the start of the memory area size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`) From 4faf412f53ac49ee04584b015c826a7bb1d67177 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 25 Jan 2020 13:28:49 -0800 Subject: [PATCH 174/179] move 'memory.c' to 'region.c' --- CMakeLists.txt | 2 +- ide/vs2017/mimalloc-override.vcxproj | 4 ++-- ide/vs2017/mimalloc-override.vcxproj.filters | 4 ++-- ide/vs2017/mimalloc.vcxproj | 4 ++-- ide/vs2017/mimalloc.vcxproj.filters | 4 ++-- ide/vs2019/mimalloc-override.vcxproj | 4 ++-- ide/vs2019/mimalloc-override.vcxproj.filters | 4 ++-- ide/vs2019/mimalloc.vcxproj | 4 ++-- ide/vs2019/mimalloc.vcxproj.filters | 4 ++-- src/static.c | 2 +- 10 files changed, 18 insertions(+), 18 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 95318a0e..b60e64a4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,7 +21,7 @@ set(mi_sources src/random.c src/os.c src/arena.c - src/memory.c + src/region.c src/segment.c src/page.c src/alloc.c diff --git a/ide/vs2017/mimalloc-override.vcxproj b/ide/vs2017/mimalloc-override.vcxproj index 4225a2f9..26c8080b 100644 --- a/ide/vs2017/mimalloc-override.vcxproj +++ b/ide/vs2017/mimalloc-override.vcxproj @@ -234,7 +234,7 @@ - + @@ -251,4 +251,4 @@ - \ No newline at end of file + diff --git a/ide/vs2017/mimalloc-override.vcxproj.filters b/ide/vs2017/mimalloc-override.vcxproj.filters index 037fbcbb..02652658 100644 --- a/ide/vs2017/mimalloc-override.vcxproj.filters +++ b/ide/vs2017/mimalloc-override.vcxproj.filters @@ -61,7 +61,7 @@ Source Files - + Source Files @@ -77,4 +77,4 @@ Source Files - \ No newline at end of file + diff --git a/ide/vs2017/mimalloc.vcxproj b/ide/vs2017/mimalloc.vcxproj index e08deec4..9d6af0e5 100644 --- a/ide/vs2017/mimalloc.vcxproj +++ b/ide/vs2017/mimalloc.vcxproj @@ -220,7 +220,7 @@ - + true @@ -245,4 +245,4 @@ - \ No newline at end of file + diff --git a/ide/vs2017/mimalloc.vcxproj.filters b/ide/vs2017/mimalloc.vcxproj.filters index 5fe74aa0..43660519 100644 --- a/ide/vs2017/mimalloc.vcxproj.filters +++ b/ide/vs2017/mimalloc.vcxproj.filters @@ -47,7 +47,7 @@ Source Files - + Source Files @@ -80,4 +80,4 @@ Header Files - \ No newline at end of file + diff --git a/ide/vs2019/mimalloc-override.vcxproj b/ide/vs2019/mimalloc-override.vcxproj index ac19e321..17b6f4c0 100644 --- a/ide/vs2019/mimalloc-override.vcxproj +++ b/ide/vs2019/mimalloc-override.vcxproj @@ -237,7 +237,7 @@ - + @@ -254,4 +254,4 @@ - \ No newline at end of file + diff --git a/ide/vs2019/mimalloc-override.vcxproj.filters b/ide/vs2019/mimalloc-override.vcxproj.filters index a8c5a5de..83d6f7fe 100644 --- a/ide/vs2019/mimalloc-override.vcxproj.filters +++ b/ide/vs2019/mimalloc-override.vcxproj.filters @@ -22,7 +22,7 @@ Source Files - + Source Files @@ -78,4 +78,4 @@ {39cb7e38-69d0-43fb-8406-6a0f7cefc3b4} - \ No newline at end of file + diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index a98e78ba..a1372204 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -223,7 +223,7 @@ - + true @@ -248,4 +248,4 @@ - \ No newline at end of file + diff --git a/ide/vs2019/mimalloc.vcxproj.filters b/ide/vs2019/mimalloc.vcxproj.filters index 61de4afe..4704fb2e 100644 --- a/ide/vs2019/mimalloc.vcxproj.filters +++ b/ide/vs2019/mimalloc.vcxproj.filters @@ -22,7 +22,7 @@ Source Files - + Source Files @@ -81,4 +81,4 @@ {852a14ae-6dde-4e95-8077-ca705e97e5af} - \ No newline at end of file + diff --git a/src/static.c b/src/static.c index 0519453e..ec9370eb 100644 --- a/src/static.c +++ b/src/static.c @@ -17,7 +17,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "random.c" #include "os.c" #include "arena.c" -#include "memory.c" +#include "region.c" #include "segment.c" #include "page.c" #include "heap.c" From 394b796ea0aec69b2f97ad51cce16ed432ca6e69 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 25 Jan 2020 13:43:56 -0800 Subject: [PATCH 175/179] fix over-eager page reset in segment reclamation --- src/segment.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/src/segment.c b/src/segment.c index e536ae59..194aa793 100644 --- a/src/segment.c +++ b/src/segment.c @@ -1019,26 +1019,18 @@ void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { ----------------------------------------------------------- */ // Possibly clear pages and check if free space is available -static bool mi_segment_pages_collect(mi_segment_t* segment, size_t block_size, mi_segments_tld_t* tld) +static bool mi_segment_check_free(mi_segment_t* segment, size_t block_size) { mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE); bool has_page = false; for (size_t i = 0; i < segment->capacity; i++) { mi_page_t* page = &segment->pages[i]; - if (page->segment_in_use) { - mi_assert_internal(!page->is_reset); - mi_assert_internal(page->is_committed); - mi_assert_internal(mi_page_not_in_queue(page, tld)); - mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE); - mi_assert_internal(mi_page_heap(page) == NULL); - mi_assert_internal(page->next == NULL); + if (page->segment_in_use) { // ensure used count is up to date and collect potential concurrent frees _mi_page_free_collect(page, false); if (mi_page_all_free(page)) { - // if everything free already, clear the page directly - segment->abandoned--; - _mi_stat_decrease(&tld->stats->pages_abandoned, 1); - mi_segment_page_clear(segment, page, false, tld); // no (delayed) reset allowed (as the segment is still abandoned) + // if everything free already, page can be reused for some block size + // note: don't clear yet as we can only reset it once it is reclaimed has_page = true; } else if (page->xblock_size == block_size && page->used < page->reserved) { @@ -1047,6 +1039,7 @@ static bool mi_segment_pages_collect(mi_segment_t* segment, size_t block_size, m } } else { + // whole empty page has_page = true; } } @@ -1081,7 +1074,6 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, // set the heap again and allow delayed free again mi_page_set_heap(page, heap); _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set) - mi_assert_internal(!mi_page_all_free(page)); // TODO: should we not collect again given that we just collected? _mi_page_free_collect(page, false); // ensure used count is up to date if (mi_page_all_free(page)) { @@ -1097,7 +1089,8 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, } } else if (page->is_committed && !page->is_reset) { // not in-use, and not reset yet - mi_pages_reset_add(segment, page, tld); + // note: no not reset as this includes pages that were not touched before + // mi_pages_reset_add(segment, page, tld); } } mi_assert_internal(segment->abandoned == 0); @@ -1146,7 +1139,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size, int max_tries = 8; // limit the work to bound allocation times while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) { segment->abandoned_visits++; - bool has_page = mi_segment_pages_collect(segment,block_size,tld); // try to free up pages (due to concurrent frees) + bool has_page = mi_segment_check_free(segment,block_size); // try to free up pages (due to concurrent frees) if (has_page && segment->page_kind == page_kind) { // found a free page of the right kind, or page of the right block_size with free space return mi_segment_reclaim(segment, heap, block_size, tld); From d4927adddc2c3b748934d3e45c4ddb673c6076ee Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 25 Jan 2020 14:30:02 -0800 Subject: [PATCH 176/179] add extra assertion that all segments are free on thread termination --- src/init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/init.c b/src/init.c index 366acbf5..f8411187 100644 --- a/src/init.c +++ b/src/init.c @@ -203,6 +203,7 @@ static bool _mi_heap_done(mi_heap_t* heap) { // free if not the main thread if (heap != &_mi_heap_main) { + mi_assert_internal(heap->tld->segments.count == 0); _mi_os_free(heap, sizeof(mi_thread_data_t), &_mi_stats_main); } #if (MI_DEBUG > 0) From e628fc70676e8e2176fe66e8275480c14ad29ca3 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 26 Jan 2020 12:39:11 -0800 Subject: [PATCH 177/179] cleanup reclaim logic --- include/mimalloc-internal.h | 24 +++----- src/page.c | 40 +++++------- src/segment.c | 117 +++++++++++++++++++----------------- 3 files changed, 87 insertions(+), 94 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 902d2fdf..c7d7a1da 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -411,30 +411,24 @@ static inline mi_thread_free_t mi_tf_set_block(mi_thread_free_t tf, mi_block_t* return mi_tf_make(block, mi_tf_delayed(tf)); } -// are all blocks in a page freed? +// are all blocks in a page freed? +// note: needs up-to-date used count, (as the `xthread_free` list may not be empty). see `_mi_page_collect_free`. static inline bool mi_page_all_free(const mi_page_t* page) { mi_assert_internal(page != NULL); return (page->used == 0); } -// are there immediately available blocks +// are there any available blocks? +static inline bool mi_page_has_any_available(const mi_page_t* page) { + mi_assert_internal(page != NULL && page->reserved > 0); + return (page->used < page->reserved || (mi_page_thread_free(page) != NULL)); +} + +// are there immediately available blocks, i.e. blocks available on the free list. static inline bool mi_page_immediate_available(const mi_page_t* page) { mi_assert_internal(page != NULL); return (page->free != NULL); } -// are there free blocks in this page? -static inline bool mi_page_has_free(mi_page_t* page) { - mi_assert_internal(page != NULL); - bool hasfree = (mi_page_immediate_available(page) || page->local_free != NULL || (mi_page_thread_free(page) != NULL)); - mi_assert_internal(hasfree || page->used == page->capacity); - return hasfree; -} - -// are all blocks in use? -static inline bool mi_page_all_used(mi_page_t* page) { - mi_assert_internal(page != NULL); - return !mi_page_has_free(page); -} // is more than 7/8th of a page in use? static inline bool mi_page_mostly_used(const mi_page_t* page) { diff --git a/src/page.c b/src/page.c index c5b86b08..e552a61e 100644 --- a/src/page.c +++ b/src/page.c @@ -234,6 +234,7 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { mi_assert_internal(mi_page_thread_free_flag(page) != MI_NEVER_DELAYED_FREE); mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE); mi_assert_internal(!page->is_reset); + // TODO: push on full queue immediately if it is full? mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page)); mi_page_queue_push(heap, pq, page); mi_assert_expensive(_mi_page_is_valid(page)); @@ -245,28 +246,16 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size mi_assert_internal(pq==NULL||block_size == pq->block_size); mi_page_t* page = _mi_segment_page_alloc(heap, block_size, &heap->tld->segments, &heap->tld->os); if (page == NULL) { - // this may be out-of-memory, or a page was reclaimed - if (pq!=NULL && (page = pq->first) != NULL) { - mi_assert_expensive(_mi_page_is_valid(page)); - if (!mi_page_immediate_available(page)) { - mi_page_extend_free(heap, page, heap->tld); - } - mi_assert_internal(mi_page_immediate_available(page)); - if (mi_page_immediate_available(page)) { - return page; // reclaimed page - } - } - return NULL; // out-of-memory - } - else { - // a fresh page was allocated, initialize it - mi_assert_internal(pq==NULL || _mi_page_segment(page)->page_kind != MI_PAGE_HUGE); - mi_page_init(heap, page, block_size, heap->tld); - _mi_stat_increase(&heap->tld->stats.pages, 1); - if (pq!=NULL) mi_page_queue_push(heap, pq, page); // huge pages use pq==NULL - mi_assert_expensive(_mi_page_is_valid(page)); - return page; + // this may be out-of-memory, or an abandoned page was reclaimed (and in our queue) + return NULL; } + // a fresh page was found, initialize it + mi_assert_internal(pq==NULL || _mi_page_segment(page)->page_kind != MI_PAGE_HUGE); + mi_page_init(heap, page, block_size, heap->tld); + _mi_stat_increase(&heap->tld->stats.pages, 1); + if (pq!=NULL) mi_page_queue_push(heap, pq, page); // huge pages use pq==NULL + mi_assert_expensive(_mi_page_is_valid(page)); + return page; } // Get a fresh page to use @@ -648,7 +637,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi -------------------------------------------------------------*/ // Find a page with free blocks of `page->block_size`. -static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq) +static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq, bool first_try) { // search through the pages in "next fit" order size_t count = 0; @@ -686,13 +675,16 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p if (page == NULL) { _mi_heap_collect_retired(heap, false); // perhaps make a page available page = mi_page_fresh(heap, pq); + if (page == NULL && first_try) { + // out-of-memory _or_ an abandoned page with free blocks was reclaimed, try once again + page = mi_page_queue_find_free_ex(heap, pq, false); + } } else { mi_assert(pq->first == page); page->retire_expire = 0; } mi_assert_internal(page == NULL || mi_page_immediate_available(page)); - return page; } @@ -716,7 +708,7 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) { return page; // fast path } } - return mi_page_queue_find_free_ex(heap, pq); + return mi_page_queue_find_free_ex(heap, pq, true); } diff --git a/src/segment.c b/src/segment.c index 194aa793..c7a9662b 100644 --- a/src/segment.c +++ b/src/segment.c @@ -669,6 +669,11 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ // set protection mi_segment_protect(segment, true, tld->os); + // insert in free lists for small and medium pages + if (page_kind <= MI_PAGE_MEDIUM) { + mi_segment_insert_in_free_queue(segment, tld); + } + //fprintf(stderr,"mimalloc: alloc segment at %p\n", (void*)segment); return segment; } @@ -1019,21 +1024,25 @@ void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { ----------------------------------------------------------- */ // Possibly clear pages and check if free space is available -static bool mi_segment_check_free(mi_segment_t* segment, size_t block_size) +static bool mi_segment_check_free(mi_segment_t* segment, size_t block_size, bool* all_pages_free) { mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE); bool has_page = false; + size_t pages_used = 0; + size_t pages_used_empty = 0; for (size_t i = 0; i < segment->capacity; i++) { mi_page_t* page = &segment->pages[i]; if (page->segment_in_use) { + pages_used++; // ensure used count is up to date and collect potential concurrent frees _mi_page_free_collect(page, false); if (mi_page_all_free(page)) { // if everything free already, page can be reused for some block size - // note: don't clear yet as we can only reset it once it is reclaimed + // note: don't clear the page yet as we can only OS reset it once it is reclaimed + pages_used_empty++; has_page = true; } - else if (page->xblock_size == block_size && page->used < page->reserved) { + else if (page->xblock_size == block_size && mi_page_has_any_available(page)) { // a page has available free blocks of the right size has_page = true; } @@ -1043,15 +1052,19 @@ static bool mi_segment_check_free(mi_segment_t* segment, size_t block_size) has_page = true; } } + mi_assert_internal(pages_used == segment->used && pages_used >= pages_used_empty); + if (all_pages_free != NULL) { + *all_pages_free = ((pages_used - pages_used_empty) == 0); + } return has_page; } -#define MI_RECLAIMED ((mi_segment_t*)1) -// Reclaim a segment -static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, size_t block_size, mi_segments_tld_t* tld) { +// Reclaim a segment; returns NULL if the segment was freed +// set `right_page_reclaimed` to `true` if it reclaimed a page of the right `block_size` that was not full. +static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, size_t requested_block_size, bool* right_page_reclaimed, mi_segments_tld_t* tld) { mi_assert_internal(segment->abandoned_next == NULL); - bool right_page_reclaimed = false; + if (right_page_reclaimed != NULL) { *right_page_reclaimed = false; } segment->thread_id = _mi_thread_id(); segment->abandoned_visits = 0; @@ -1071,10 +1084,10 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, segment->abandoned--; mi_assert(page->next == NULL); _mi_stat_decrease(&tld->stats->pages_abandoned, 1); - // set the heap again and allow delayed free again + // set the heap again and allow heap thread delayed free again. mi_page_set_heap(page, heap); _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set) - // TODO: should we not collect again given that we just collected? + // TODO: should we not collect again given that we just collected in `check_free`? _mi_page_free_collect(page, false); // ensure used count is up to date if (mi_page_all_free(page)) { // if everything free already, clear the page directly @@ -1083,77 +1096,67 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, else { // otherwise reclaim it into the heap _mi_page_reclaim(heap, page); - if (block_size == page->xblock_size) { - right_page_reclaimed = true; + if (requested_block_size == page->xblock_size && mi_page_has_any_available(page)) { + if (right_page_reclaimed != NULL) { *right_page_reclaimed = true; } } } } else if (page->is_committed && !page->is_reset) { // not in-use, and not reset yet - // note: no not reset as this includes pages that were not touched before + // note: do not reset as this includes pages that were not touched before // mi_pages_reset_add(segment, page, tld); } } mi_assert_internal(segment->abandoned == 0); - if (right_page_reclaimed) { - // add the segment's free pages to the free small segment queue + if (segment->used == 0) { + mi_assert_internal(right_page_reclaimed == NULL || !(*right_page_reclaimed)); + mi_segment_free(segment, false, tld); + return NULL; + } + else { if (segment->page_kind <= MI_PAGE_MEDIUM && mi_segment_has_free(segment)) { mi_segment_insert_in_free_queue(segment, tld); } - // and return reclaimed: at the page allocation the page is already in the queue now - return MI_RECLAIMED; - } - else { - // otherwise return the segment as it will contain some free pages - // (except for abandoned_reclaim_all which uses a block_size of zero) - mi_assert_internal(segment->used < segment->capacity || block_size == 0); return segment; } } -// Reclaim a segment without returning it -static void mi_segment_reclaim_force(mi_segment_t* segment, mi_heap_t* heap, mi_segments_tld_t* tld) { - mi_segment_t* res = mi_segment_reclaim(segment, heap, 0, tld); - mi_assert_internal(res != MI_RECLAIMED); // due to block_size == 0 - if (res!=MI_RECLAIMED && res != NULL) { - mi_assert_internal(res == segment); - if (res->used == 0) { - mi_segment_free(segment, false, tld); - } - else if (res->page_kind <= MI_PAGE_MEDIUM && mi_segment_has_free(res)) { - mi_segment_insert_in_free_queue(res, tld); - } - } -} void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld) { mi_segment_t* segment; while ((segment = mi_abandoned_pop()) != NULL) { - mi_segment_reclaim_force(segment, heap, tld); + mi_segment_reclaim(segment, heap, 0, NULL, tld); } } - -static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size, mi_page_kind_t page_kind, mi_segments_tld_t* tld) +static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size, mi_page_kind_t page_kind, bool* reclaimed, mi_segments_tld_t* tld) { + *reclaimed = false; mi_segment_t* segment; int max_tries = 8; // limit the work to bound allocation times while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) { segment->abandoned_visits++; - bool has_page = mi_segment_check_free(segment,block_size); // try to free up pages (due to concurrent frees) - if (has_page && segment->page_kind == page_kind) { - // found a free page of the right kind, or page of the right block_size with free space - return mi_segment_reclaim(segment, heap, block_size, tld); + bool all_pages_free; + bool has_page = mi_segment_check_free(segment,block_size,&all_pages_free); // try to free up pages (due to concurrent frees) + if (all_pages_free) { + // free the segment (by forced reclaim) to make it available to other threads. + // note1: we prefer to free a segment as that might lead to reclaiming another + // segment that is still partially used. + // note2: we could in principle optimize this by skipping reclaim and directly + // freeing but that would violate some invariants temporarily) + mi_segment_reclaim(segment, heap, 0, NULL, tld); } - else if (segment->used==0) { - // free the segment to make it available to other threads - mi_segment_os_free(segment, segment->segment_size, tld); + else if (has_page && segment->page_kind == page_kind) { + // found a free page of the right kind, or page of the right block_size with free space + // we return the result of reclaim (which is usually `segment`) as it might free + // the segment due to concurrent frees (in which case `NULL` is returned). + return mi_segment_reclaim(segment, heap, block_size, reclaimed, tld); } else if (segment->abandoned_visits >= 3) { - // always reclaim on 3rd visit to limit the list length - mi_segment_reclaim_force(segment, heap, tld); + // always reclaim on 3rd visit to limit the list length. + mi_segment_reclaim(segment, heap, 0, NULL, tld); } else { - // push on the visited list so it gets not looked at too quickly again + // otherwise, push on the visited list so it gets not looked at too quickly again mi_abandoned_visited_push(segment); } } @@ -1176,12 +1179,16 @@ static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t block_s return segment; } // 2. try to reclaim an abandoned segment - segment = mi_segment_try_reclaim(heap, block_size, page_kind, tld); - if (segment == MI_RECLAIMED) { - return NULL; // pretend out-of-memory as the page will be in the page queue of the heap + bool reclaimed; + segment = mi_segment_try_reclaim(heap, block_size, page_kind, &reclaimed, tld); + if (reclaimed) { + // reclaimed the right page right into the heap + mi_assert_internal(segment != NULL && segment->page_kind == page_kind && page_kind <= MI_PAGE_LARGE); + return NULL; // pretend out-of-memory as the page will be in the page queue of the heap with available blocks } else if (segment != NULL) { - return segment; // reclaimed a segment with empty pages in it + // reclaimed a segment with empty pages (of `page_kind`) in it + return segment; } // 3. otherwise allocate a fresh segment return mi_segment_alloc(0, page_kind, page_shift, tld, os_tld); @@ -1216,12 +1223,12 @@ static mi_page_t* mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_p // find an available segment the segment free queue mi_segment_queue_t* const free_queue = mi_segment_free_queue_of_kind(kind, tld); if (mi_segment_queue_is_empty(free_queue)) { - // possibly allocate a fresh segment - mi_segment_t* segment = mi_segment_reclaim_or_alloc(heap, block_size, kind, page_shift, tld, os_tld); + // possibly allocate or reclaim a fresh segment + mi_segment_t* const segment = mi_segment_reclaim_or_alloc(heap, block_size, kind, page_shift, tld, os_tld); if (segment == NULL) return NULL; // return NULL if out-of-memory (or reclaimed) + mi_assert_internal(free_queue->first == segment); mi_assert_internal(segment->page_kind==kind); mi_assert_internal(segment->used < segment->capacity); - mi_segment_enqueue(free_queue, segment); } mi_assert_internal(free_queue->first != NULL); mi_page_t* const page = mi_segment_page_alloc_in(free_queue->first, tld); From 42586de10437308293f5967cc4c6527c0d67a76c Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 27 Jan 2020 23:13:57 -0800 Subject: [PATCH 178/179] fix is_zero setting in regions --- src/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/memory.c b/src/memory.c index 96047b79..55122887 100644 --- a/src/memory.c +++ b/src/memory.c @@ -284,7 +284,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo mi_assert_internal(!(info.x.is_large && !*is_large)); mi_assert_internal(start != NULL); - *is_zero = mi_bitmap_unclaim(®ion->dirty, 1, blocks, bit_idx); + *is_zero = mi_bitmap_claim(®ion->dirty, 1, blocks, bit_idx, NULL); *is_large = info.x.is_large; *memid = mi_memid_create(region, bit_idx); void* p = start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE); From 9c166d88f0ca6ce5322856e58ac730972ca5404f Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 27 Jan 2020 23:15:24 -0800 Subject: [PATCH 179/179] increase retire page size --- src/page.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/page.c b/src/page.c index 149926e8..28e5dfdb 100644 --- a/src/page.c +++ b/src/page.c @@ -386,6 +386,8 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { _mi_segment_page_free(page, force, segments_tld); } +#define MI_MAX_RETIRE_SIZE (4*MI_SMALL_SIZE_MAX) + // Retire a page with no more used blocks // Important to not retire too quickly though as new // allocations might coming. @@ -406,7 +408,7 @@ void _mi_page_retire(mi_page_t* page) { // how to check this efficiently though... // for now, we don't retire if it is the only page left of this size class. mi_page_queue_t* pq = mi_page_queue_of(page); - if (mi_likely(page->xblock_size <= MI_SMALL_SIZE_MAX && !mi_page_is_in_full(page))) { + if (mi_likely(page->xblock_size <= MI_MAX_RETIRE_SIZE && !mi_page_is_in_full(page))) { if (pq->last==page && pq->first==page) { // the only page in the queue? mi_stat_counter_increase(_mi_stats_main.page_no_retire,1); page->retire_expire = 16; @@ -421,7 +423,7 @@ void _mi_page_retire(mi_page_t* page) { // free retired pages: we don't need to look at the entire queues // since we only retire pages that are the last one in a queue. void _mi_heap_collect_retired(mi_heap_t* heap, bool force) { - for(mi_page_queue_t* pq = heap->pages; pq->block_size <= MI_SMALL_SIZE_MAX; pq++) { + for(mi_page_queue_t* pq = heap->pages; pq->block_size <= MI_MAX_RETIRE_SIZE; pq++) { mi_page_t* page = pq->first; if (page != NULL && page->retire_expire != 0) { if (mi_page_all_free(page)) {