/* ---------------------------------------------------------------------------- Copyright (c) 2019, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ /* ---------------------------------------------------------------------------- This implements a layer between the raw OS memory (VirtualAlloc/mmap/sbrk/..) and the segment and huge object allocation by mimalloc. There may be multiple implementations of this (one could be the identity going directly to the OS, another could be a simple cache etc), but the current one uses large "regions". In contrast to the rest of mimalloc, the "regions" are shared between threads and need to be accessed using atomic operations. We need this memory layer between the raw OS calls because of: 1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order to reuse memory effectively. 2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of an OS allocation/free is still (much) too expensive relative to the accesses in that object :-( (`malloc-large` tests this). This means we need a cheaper way to reuse memory. 3. This layer allows for NUMA aware allocation. Possible issues: - (2) can potentially be addressed too with a small cache per thread which is much simpler. Generally though that requires shrinking of huge pages, and may overuse memory per thread. (and is not compatible with `sbrk`). - Since the current regions are per-process, we need atomic operations to claim blocks which may be contended - In the worst case, we need to search the whole region map (16KiB for 256GiB) linearly. At what point will direct OS calls be faster? Is there a way to do this better without adding too much complexity? -----------------------------------------------------------------------------*/ #include "mimalloc.h" #include "mimalloc-internal.h" #include "mimalloc-atomic.h" #include // memset #include "bitmap.inc.c" // Internal raw OS interface size_t _mi_os_large_page_size(); bool _mi_os_protect(void* addr, size_t size); bool _mi_os_unprotect(void* addr, size_t size); bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats); bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); // arena.c void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats); void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); // Constants #if (MI_INTPTR_SIZE==8) #define MI_HEAP_REGION_MAX_SIZE (256 * GiB) // 48KiB for the region map #elif (MI_INTPTR_SIZE==4) #define MI_HEAP_REGION_MAX_SIZE (3 * GiB) // ~ KiB for the region map #else #error "define the maximum heap space allowed for regions on this platform" #endif #define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE #define MI_REGION_MAX_BLOCKS MI_BITMAP_FIELD_BITS #define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS) // 256MiB (64MiB on 32 bits) #define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE) // 1024 (48 on 32 bits) #define MI_REGION_MAX_OBJ_BLOCKS (MI_REGION_MAX_BLOCKS/4) // 64MiB #define MI_REGION_MAX_OBJ_SIZE (MI_REGION_MAX_OBJ_BLOCKS*MI_SEGMENT_SIZE) // Region info is a pointer to the memory region and two bits for // its flags: is_large, and is_committed. typedef union mi_region_info_u { uintptr_t value; struct { bool valid; bool is_large; short numa_node; }; } mi_region_info_t; // A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with // a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block. typedef struct mem_region_s { volatile _Atomic(uintptr_t) info; // is_large, and associated numa node + 1 (so 0 is no association) volatile _Atomic(void*) start; // start of the memory area (and flags) mi_bitmap_field_t in_use; // bit per in-use block mi_bitmap_field_t dirty; // track if non-zero per block mi_bitmap_field_t commit; // track if committed per block (if `!info.is_committed)) mi_bitmap_field_t reset; // track reset per block volatile _Atomic(uintptr_t) arena_memid; // if allocated from a (huge page) arena- } mem_region_t; // The region map static mem_region_t regions[MI_REGION_MAX]; // Allocated regions static volatile _Atomic(uintptr_t) regions_count; // = 0; /* ---------------------------------------------------------------------------- Utility functions -----------------------------------------------------------------------------*/ // Blocks (of 4MiB) needed for the given size. static size_t mi_region_block_count(size_t size) { return _mi_divide_up(size, MI_SEGMENT_SIZE); } /* // Return a rounded commit/reset size such that we don't fragment large OS pages into small ones. static size_t mi_good_commit_size(size_t size) { if (size > (SIZE_MAX - _mi_os_large_page_size())) return size; return _mi_align_up(size, _mi_os_large_page_size()); } */ // Return if a pointer points into a region reserved by us. bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { if (p==NULL) return false; size_t count = mi_atomic_read_relaxed(®ions_count); for (size_t i = 0; i < count; i++) { uint8_t* start = (uint8_t*)mi_atomic_read_ptr_relaxed(®ions[i].start); if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true; } return false; } static void* mi_region_blocks_start(const mem_region_t* region, mi_bitmap_index_t bit_idx) { void* start = mi_atomic_read_ptr(®ion->start); mi_assert_internal(start != NULL); return ((uint8_t*)start + (bit_idx * MI_SEGMENT_SIZE)); } static size_t mi_memid_create(mem_region_t* region, mi_bitmap_index_t bit_idx) { mi_assert_internal(bit_idx < MI_BITMAP_FIELD_BITS); size_t idx = region - regions; mi_assert_internal(®ions[idx] == region); return (idx*MI_BITMAP_FIELD_BITS + bit_idx)<<1; } static size_t mi_memid_create_from_arena(size_t arena_memid) { return (arena_memid << 1) | 1; } static bool mi_memid_is_arena(size_t id, mem_region_t** region, mi_bitmap_index_t* bit_idx, size_t* arena_memid) { if ((id&1)==1) { if (arena_memid != NULL) *arena_memid = (id>>1); return true; } else { size_t idx = (id >> 1) / MI_BITMAP_FIELD_BITS; *bit_idx = (mi_bitmap_index_t)(id>>1) % MI_BITMAP_FIELD_BITS; *region = ®ions[idx]; return false; } } /* ---------------------------------------------------------------------------- Allocate a region is allocated from the OS (or an arena) -----------------------------------------------------------------------------*/ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) { // not out of regions yet? if (mi_atomic_read_relaxed(®ions_count) >= MI_REGION_MAX - 1) return false; // try to allocate a fresh region from the OS bool region_commit = (commit && mi_option_is_enabled(mi_option_eager_region_commit)); bool region_large = (commit && allow_large); bool is_zero = false; size_t arena_memid = 0; void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, ®ion_commit, ®ion_large, &is_zero, &arena_memid, tld); if (start == NULL) return false; mi_assert_internal(!(region_large && !allow_large)); mi_assert_internal(!region_large || region_commit); // claim a fresh slot const uintptr_t idx = mi_atomic_increment(®ions_count); if (idx >= MI_REGION_MAX) { mi_atomic_decrement(®ions_count); _mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats); return false; } // allocated, initialize and claim the initial blocks mem_region_t* r = ®ions[idx]; r->arena_memid = arena_memid; mi_atomic_write(&r->in_use, 0); mi_atomic_write(&r->dirty, (is_zero ? 0 : MI_BITMAP_FIELD_FULL)); mi_atomic_write(&r->commit, (region_commit ? MI_BITMAP_FIELD_FULL : 0)); mi_atomic_write(&r->reset, 0); *bit_idx = 0; mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL); mi_atomic_write_ptr(&r->start, start); // and share it mi_region_info_t info; info.valid = true; info.is_large = region_large; info.numa_node = _mi_os_numa_node(tld); mi_atomic_write(&r->info, info.value); // now make it available to others *region = r; return true; } /* ---------------------------------------------------------------------------- Try to claim blocks in suitable regions -----------------------------------------------------------------------------*/ static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool allow_large ) { // initialized at all? mi_region_info_t info; info.value = mi_atomic_read_relaxed(®ion->info); if (info.value==0) return false; // numa correct if (numa_node >= 0) { // use negative numa node to always succeed int rnode = info.numa_node; if (rnode >= 0 && rnode != numa_node) return false; } // check allow-large if (!allow_large && info.is_large) return false; return true; } static bool mi_region_try_claim(int numa_node, size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) { // try all regions for a free slot const size_t count = mi_atomic_read(®ions_count); size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? for (size_t visited = 0; visited < count; visited++, idx++) { if (idx >= count) idx = 0; // wrap around mem_region_t* r = ®ions[idx]; if (mi_region_is_suitable(r, numa_node, allow_large)) { if (mi_bitmap_try_find_claim_field(&r->in_use, 0, blocks, bit_idx)) { tld->region_idx = idx; // remember the last found position *region = r; return true; } } } return false; } static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { mi_assert_internal(blocks <= MI_BITMAP_FIELD_BITS); mem_region_t* region; mi_bitmap_index_t bit_idx; const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld)); // try to claim in existing regions if (!mi_region_try_claim(numa_node, blocks, *is_large, ®ion, &bit_idx, tld)) { // otherwise try to allocate a fresh region if (!mi_region_try_alloc_os(blocks, *commit, *is_large, ®ion, &bit_idx, tld)) { // out of regions or memory return NULL; } } // found a region and claimed `blocks` at `bit_idx` mi_assert_internal(region != NULL); mi_assert_internal(mi_bitmap_is_claimed(®ion->in_use, 1, blocks, bit_idx)); mi_region_info_t info; info.value = mi_atomic_read(®ion->info); void* start = mi_atomic_read_ptr(®ion->start); mi_assert_internal(!(info.is_large && !*is_large)); mi_assert_internal(start != NULL); *is_zero = mi_bitmap_unclaim(®ion->dirty, 1, blocks, bit_idx); *is_large = info.is_large; *memid = mi_memid_create(region, bit_idx); void* p = (uint8_t*)start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE); // commit if (*commit) { // ensure commit bool any_uncommitted; mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, &any_uncommitted); if (any_uncommitted) { mi_assert_internal(!info.is_large); bool commit_zero; _mi_mem_commit(p, blocks * MI_SEGMENT_SIZE, &commit_zero, tld); if (commit_zero) *is_zero = true; } } else { // no need to commit, but check if already fully committed *commit = mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx); } mi_assert_internal(!*commit || mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx)); // unreset reset blocks if (mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)) { // some blocks are still reset mi_assert_internal(!info.is_large); mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit); mi_bitmap_unclaim(®ion->reset, 1, blocks, bit_idx); if (*commit || !mi_option_is_enabled(mi_option_reset_decommits)) { // only if needed bool reset_zero = false; _mi_mem_unreset(p, blocks * MI_SEGMENT_SIZE, &reset_zero, tld); if (reset_zero) *is_zero = true; } } mi_assert_internal(!mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)); #if (MI_DEBUG>=2) if (*commit) { ((uint8_t*)p)[0] = 0; } #endif // and return the allocation mi_assert_internal(p != NULL); return p; } /* ---------------------------------------------------------------------------- Allocation -----------------------------------------------------------------------------*/ // Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`. // (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`) void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { mi_assert_internal(memid != NULL && tld != NULL); mi_assert_internal(size > 0); *memid = 0; *is_zero = false; bool default_large = false; if (large==NULL) large = &default_large; // ensure `large != NULL` if (size == 0) return NULL; size = _mi_align_up(size, _mi_os_page_size()); // allocate from regions if possible size_t arena_memid; const size_t blocks = mi_region_block_count(size); if (blocks <= MI_REGION_MAX_OBJ_BLOCKS && alignment <= MI_SEGMENT_ALIGN) { void* p = mi_region_try_alloc(blocks, commit, large, is_zero, memid, tld); mi_assert_internal(p == NULL || (uintptr_t)p % alignment == 0); if (p != NULL) { #if (MI_DEBUG>=2) if (*commit) { ((uint8_t*)p)[0] = 0; } #endif return p; } _mi_warning_message("unable to allocate from region: size %zu\n", size); } // and otherwise fall back to the OS void* p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_zero, &arena_memid, tld); *memid = mi_memid_create_from_arena(arena_memid); mi_assert_internal( p == NULL || (uintptr_t)p % alignment == 0); if (p != NULL && *commit) { ((uint8_t*)p)[0] = 0; } return p; } /* ---------------------------------------------------------------------------- Free -----------------------------------------------------------------------------*/ // Free previously allocated memory with a given id. void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_reset, mi_os_tld_t* tld) { mi_assert_internal(size > 0 && tld != NULL); if (p==NULL) return; if (size==0) return; size = _mi_align_up(size, _mi_os_page_size()); size_t arena_memid = 0; mi_bitmap_index_t bit_idx; mem_region_t* region; if (mi_memid_is_arena(id,®ion,&bit_idx,&arena_memid)) { // was a direct arena allocation, pass through _mi_arena_free(p, size, arena_memid, tld->stats); } else { // allocated in a region mi_assert_internal(size <= MI_REGION_MAX_OBJ_SIZE); if (size > MI_REGION_MAX_OBJ_SIZE) return; const size_t blocks = mi_region_block_count(size); mi_assert_internal(blocks + bit_idx <= MI_BITMAP_FIELD_BITS); mi_region_info_t info; info.value = mi_atomic_read(®ion->info); mi_assert_internal(info.value != 0); void* blocks_start = mi_region_blocks_start(region, bit_idx); mi_assert_internal(blocks_start == p); // not a pointer in our area? mi_assert_internal(bit_idx + blocks <= MI_BITMAP_FIELD_BITS); if (blocks_start != p || bit_idx + blocks > MI_BITMAP_FIELD_BITS) return; // or `abort`? // committed? if (full_commit && (size % MI_SEGMENT_SIZE) == 0) { mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, NULL); } if (any_reset) { // set the is_reset bits if any pages were reset mi_bitmap_claim(®ion->reset, 1, blocks, bit_idx, NULL); } // reset the blocks to reduce the working set. if (!info.is_large && mi_option_is_enabled(mi_option_segment_reset) && (mi_option_is_enabled(mi_option_eager_commit) || mi_option_is_enabled(mi_option_reset_decommits))) // cannot reset halfway committed segments, use only `option_page_reset` instead { bool any_unreset; mi_bitmap_claim(®ion->reset, 1, blocks, bit_idx, &any_unreset); if (any_unreset) { _mi_mem_reset(p, blocks * MI_SEGMENT_SIZE, tld); } } // and unclaim bool all_unclaimed = mi_bitmap_unclaim(®ion->in_use, 1, blocks, bit_idx); mi_assert_internal(all_unclaimed); UNUSED(all_unclaimed); } } /* ---------------------------------------------------------------------------- collection -----------------------------------------------------------------------------*/ void _mi_mem_collect(mi_os_tld_t* tld) { // free every region that has no segments in use. uintptr_t rcount = mi_atomic_read_relaxed(®ions_count); for (size_t i = 0; i < rcount; i++) { mem_region_t* region = ®ions[i]; if (mi_atomic_read_relaxed(®ion->info) != 0) { // if no segments used, try to claim the whole region uintptr_t m; do { m = mi_atomic_read_relaxed(®ion->in_use); } while(m == 0 && !mi_atomic_cas_weak(®ion->in_use, MI_BITMAP_FIELD_FULL, 0 )); if (m == 0) { // on success, free the whole region void* start = mi_atomic_read_ptr(®ions[i].start); size_t arena_memid = mi_atomic_read_relaxed(®ions[i].arena_memid); memset(®ions[i], 0, sizeof(mem_region_t)); // and release the whole region mi_atomic_write(®ion->info, 0); if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { _mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats); } } } } } /* ---------------------------------------------------------------------------- Other -----------------------------------------------------------------------------*/ bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) { return _mi_os_reset(p, size, tld->stats); } bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { return _mi_os_unreset(p, size, is_zero, tld->stats); } bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { return _mi_os_commit(p, size, is_zero, tld->stats); } bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld) { return _mi_os_decommit(p, size, tld->stats); } bool _mi_mem_protect(void* p, size_t size) { return _mi_os_protect(p, size); } bool _mi_mem_unprotect(void* p, size_t size) { return _mi_os_unprotect(p, size); }