initial experiment with fixed memory arena and sliced segments

This commit is contained in:
daan 2019-10-31 00:40:41 -07:00
parent c7ec30ae25
commit f7d2c45af3
13 changed files with 39 additions and 564 deletions

View File

@ -18,6 +18,7 @@ set(mi_install_dir "lib/mimalloc-${mi_version}")
set(mi_sources set(mi_sources
src/stats.c src/stats.c
src/os.c src/os.c
src/arena.c
src/segment.c src/segment.c
src/page.c src/page.c
src/alloc.c src/alloc.c

View File

@ -231,6 +231,7 @@
</ClCompile> </ClCompile>
<ClCompile Include="..\..\src\alloc-posix.c" /> <ClCompile Include="..\..\src\alloc-posix.c" />
<ClCompile Include="..\..\src\alloc.c" /> <ClCompile Include="..\..\src\alloc.c" />
<ClCompile Include="..\..\src\arena.c" />
<ClCompile Include="..\..\src\heap.c" /> <ClCompile Include="..\..\src\heap.c" />
<ClCompile Include="..\..\src\init.c" /> <ClCompile Include="..\..\src\init.c" />
<ClCompile Include="..\..\src\options.c" /> <ClCompile Include="..\..\src\options.c" />

View File

@ -67,5 +67,8 @@
<ClCompile Include="..\..\src\alloc-posix.c"> <ClCompile Include="..\..\src\alloc-posix.c">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="..\..\src\arena.c">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup> </ItemGroup>
</Project> </Project>

View File

@ -217,6 +217,7 @@
</ClCompile> </ClCompile>
<ClCompile Include="..\..\src\alloc-posix.c" /> <ClCompile Include="..\..\src\alloc-posix.c" />
<ClCompile Include="..\..\src\alloc.c" /> <ClCompile Include="..\..\src\alloc.c" />
<ClCompile Include="..\..\src\arena.c" />
<ClCompile Include="..\..\src\heap.c" /> <ClCompile Include="..\..\src\heap.c" />
<ClCompile Include="..\..\src\init.c" /> <ClCompile Include="..\..\src\init.c" />
<ClCompile Include="..\..\src\options.c" /> <ClCompile Include="..\..\src\options.c" />

View File

@ -50,6 +50,9 @@
<ClCompile Include="..\..\src\alloc-posix.c"> <ClCompile Include="..\..\src\alloc-posix.c">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="..\..\src\arena.c">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h"> <ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">

View File

@ -231,6 +231,7 @@
</ClCompile> </ClCompile>
<ClCompile Include="..\..\src\alloc-posix.c" /> <ClCompile Include="..\..\src\alloc-posix.c" />
<ClCompile Include="..\..\src\alloc.c" /> <ClCompile Include="..\..\src\alloc.c" />
<ClCompile Include="..\..\src\arena.c" />
<ClCompile Include="..\..\src\heap.c" /> <ClCompile Include="..\..\src\heap.c" />
<ClCompile Include="..\..\src\init.c" /> <ClCompile Include="..\..\src\init.c" />
<ClCompile Include="..\..\src\options.c" /> <ClCompile Include="..\..\src\options.c" />

View File

@ -217,6 +217,7 @@
</ClCompile> </ClCompile>
<ClCompile Include="..\..\src\alloc-posix.c" /> <ClCompile Include="..\..\src\alloc-posix.c" />
<ClCompile Include="..\..\src\alloc.c" /> <ClCompile Include="..\..\src\alloc.c" />
<ClCompile Include="..\..\src\arena.c" />
<ClCompile Include="..\..\src\heap.c" /> <ClCompile Include="..\..\src\heap.c" />
<ClCompile Include="..\..\src\init.c" /> <ClCompile Include="..\..\src\init.c" />
<ClCompile Include="..\..\src\options.c" /> <ClCompile Include="..\..\src\options.c" />

View File

@ -65,6 +65,12 @@ bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats);
bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats);
bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld); void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld);
int _mi_os_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept;
// arena.c
void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats);
/* /*
// memory.c // memory.c

View File

@ -239,12 +239,14 @@ typedef mi_page_t mi_slice_t;
// the OS. Inside segments we allocated fixed size _pages_ that // the OS. Inside segments we allocated fixed size _pages_ that
// contain blocks. // contain blocks.
typedef struct mi_segment_s { typedef struct mi_segment_s {
struct mi_segment_s* next; // the list of freed segments in the cache size_t memid; // memory id for arena allocation
volatile _Atomic(struct mi_segment_s*) abandoned_next;
bool mem_is_fixed; // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages) bool mem_is_fixed; // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages)
bool mem_is_committed; // `true` if the whole segment is eagerly committed bool mem_is_committed; // `true` if the whole segment is eagerly committed
// from here is zero initialized
struct mi_segment_s* next; // the list of freed segments in the cache
volatile _Atomic(struct mi_segment_s*) abandoned_next;
size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`)
size_t used; // count of pages in use size_t used; // count of pages in use
uintptr_t cookie; // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie` uintptr_t cookie; // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie`

View File

@ -1,551 +0,0 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2019, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
/* ----------------------------------------------------------------------------
This implements a layer between the raw OS memory (VirtualAlloc/mmap/sbrk/..)
and the segment and huge object allocation by mimalloc. There may be multiple
implementations of this (one could be the identity going directly to the OS,
another could be a simple cache etc), but the current one uses large "regions".
In contrast to the rest of mimalloc, the "regions" are shared between threads and
need to be accessed using atomic operations.
We need this memory layer between the raw OS calls because of:
1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order
to reuse memory effectively.
2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of
an OS allocation/free is still (much) too expensive relative to the accesses in that
object :-( (`malloc-large` tests this). This means we need a cheaper way to
reuse memory.
3. This layer can help with a NUMA aware allocation in the future.
Possible issues:
- (2) can potentially be addressed too with a small cache per thread which is much
simpler. Generally though that requires shrinking of huge pages, and may overuse
memory per thread. (and is not compatible with `sbrk`).
- Since the current regions are per-process, we need atomic operations to
claim blocks which may be contended
- In the worst case, we need to search the whole region map (16KiB for 256GiB)
linearly. At what point will direct OS calls be faster? Is there a way to
do this better without adding too much complexity?
-----------------------------------------------------------------------------*/
#include "mimalloc.h"
#include "mimalloc-internal.h"
#include "mimalloc-atomic.h"
#include <string.h> // memset
// Internal raw OS interface
size_t _mi_os_large_page_size();
bool _mi_os_protect(void* addr, size_t size);
bool _mi_os_unprotect(void* addr, size_t size);
bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats);
bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats);
bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld);
void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats);
void* _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment);
bool _mi_os_is_huge_reserved(void* p);
// Constants
#if (MI_INTPTR_SIZE==8)
#define MI_HEAP_REGION_MAX_SIZE (256 * (1ULL << 30)) // 256GiB => 16KiB for the region map
#elif (MI_INTPTR_SIZE==4)
#define MI_HEAP_REGION_MAX_SIZE (3 * (1UL << 30)) // 3GiB => 196 bytes for the region map
#else
#error "define the maximum heap space allowed for regions on this platform"
#endif
#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE
#define MI_REGION_MAP_BITS (MI_INTPTR_SIZE * 8)
#define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_REGION_MAP_BITS)
#define MI_REGION_MAX_ALLOC_SIZE ((MI_REGION_MAP_BITS/4)*MI_SEGMENT_SIZE) // 64MiB
#define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE)
#define MI_REGION_MAP_FULL UINTPTR_MAX
typedef uintptr_t mi_region_info_t;
static inline mi_region_info_t mi_region_info_create(void* start, bool is_large, bool is_committed) {
return ((uintptr_t)start | ((uintptr_t)(is_large?1:0) << 1) | (is_committed?1:0));
}
static inline void* mi_region_info_read(mi_region_info_t info, bool* is_large, bool* is_committed) {
if (is_large) *is_large = ((info&0x02) != 0);
if (is_committed) *is_committed = ((info&0x01) != 0);
return (void*)(info & ~0x03);
}
// A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with
// a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block.
typedef struct mem_region_s {
volatile _Atomic(uintptr_t) map; // in-use bit per MI_SEGMENT_SIZE block
volatile _Atomic(mi_region_info_t) info; // start of virtual memory area, and flags
volatile _Atomic(uintptr_t) dirty_mask; // bit per block if the contents are not zero'd
} mem_region_t;
// The region map; 16KiB for a 256GiB HEAP_REGION_MAX
// TODO: in the future, maintain a map per NUMA node for numa aware allocation
static mem_region_t regions[MI_REGION_MAX];
static volatile _Atomic(uintptr_t) regions_count; // = 0; // allocated regions
/* ----------------------------------------------------------------------------
Utility functions
-----------------------------------------------------------------------------*/
// Blocks (of 4MiB) needed for the given size.
static size_t mi_region_block_count(size_t size) {
mi_assert_internal(size <= MI_REGION_MAX_ALLOC_SIZE);
return (size + MI_SEGMENT_SIZE - 1) / MI_SEGMENT_SIZE;
}
// The bit mask for a given number of blocks at a specified bit index.
static uintptr_t mi_region_block_mask(size_t blocks, size_t bitidx) {
mi_assert_internal(blocks + bitidx <= MI_REGION_MAP_BITS);
return ((((uintptr_t)1 << blocks) - 1) << bitidx);
}
// Return a rounded commit/reset size such that we don't fragment large OS pages into small ones.
static size_t mi_good_commit_size(size_t size) {
if (size > (SIZE_MAX - _mi_os_large_page_size())) return size;
return _mi_align_up(size, _mi_os_large_page_size());
}
// Return if a pointer points into a region reserved by us.
bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
if (p==NULL) return false;
size_t count = mi_atomic_read_relaxed(&regions_count);
for (size_t i = 0; i < count; i++) {
uint8_t* start = (uint8_t*)mi_region_info_read( mi_atomic_read_relaxed(&regions[i].info), NULL, NULL);
if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true;
}
return false;
}
/* ----------------------------------------------------------------------------
Commit from a region
-----------------------------------------------------------------------------*/
// Commit the `blocks` in `region` at `idx` and `bitidx` of a given `size`.
// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written
// if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call.
// (not being able to claim is not considered an error so check for `p != NULL` afterwards).
static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bitidx, size_t blocks,
size_t size, bool* commit, bool* allow_large, bool* is_zero, void** p, size_t* id, mi_os_tld_t* tld)
{
size_t mask = mi_region_block_mask(blocks,bitidx);
mi_assert_internal(mask != 0);
mi_assert_internal((mask & mi_atomic_read_relaxed(&region->map)) == mask);
mi_assert_internal(&regions[idx] == region);
// ensure the region is reserved
mi_region_info_t info = mi_atomic_read(&region->info);
if (info == 0)
{
bool region_commit = mi_option_is_enabled(mi_option_eager_region_commit);
bool region_large = *allow_large;
void* start = NULL;
if (region_large) {
start = _mi_os_try_alloc_from_huge_reserved(MI_REGION_SIZE, MI_SEGMENT_ALIGN);
if (start != NULL) { region_commit = true; }
}
if (start == NULL) {
start = _mi_os_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, region_commit, &region_large, tld);
}
mi_assert_internal(!(region_large && !*allow_large));
if (start == NULL) {
// failure to allocate from the OS! unclaim the blocks and fail
size_t map;
do {
map = mi_atomic_read_relaxed(&region->map);
} while (!mi_atomic_cas_weak(&region->map, map & ~mask, map));
return false;
}
// set the newly allocated region
info = mi_region_info_create(start,region_large,region_commit);
if (mi_atomic_cas_strong(&region->info, info, 0)) {
// update the region count
mi_atomic_increment(&regions_count);
}
else {
// failed, another thread allocated just before us!
// we assign it to a later slot instead (up to 4 tries).
for(size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) {
if (mi_atomic_cas_strong(&regions[idx+i].info, info, 0)) {
mi_atomic_increment(&regions_count);
start = NULL;
break;
}
}
if (start != NULL) {
// free it if we didn't succeed to save it to some other region
_mi_os_free_ex(start, MI_REGION_SIZE, region_commit, tld->stats);
}
// and continue with the memory at our index
info = mi_atomic_read(&region->info);
}
}
mi_assert_internal(info == mi_atomic_read(&region->info));
mi_assert_internal(info != 0);
// Commit the blocks to memory
bool region_is_committed = false;
bool region_is_large = false;
void* start = mi_region_info_read(info,&region_is_large,&region_is_committed);
mi_assert_internal(!(region_is_large && !*allow_large));
mi_assert_internal(start!=NULL);
// set dirty bits
uintptr_t m;
do {
m = mi_atomic_read(&region->dirty_mask);
} while (!mi_atomic_cas_weak(&region->dirty_mask, m | mask, m));
*is_zero = ((m & mask) == 0); // no dirty bit set in our claimed range?
void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE);
if (*commit && !region_is_committed) {
// ensure commit
bool commit_zero = false;
_mi_os_commit(blocks_start, mi_good_commit_size(size), &commit_zero, tld->stats); // only commit needed size (unless using large OS pages)
if (commit_zero) *is_zero = true;
}
else if (!*commit && region_is_committed) {
// but even when no commit is requested, we might have committed anyway (in a huge OS page for example)
*commit = true;
}
// and return the allocation
mi_assert_internal(blocks_start != NULL);
*allow_large = region_is_large;
*p = blocks_start;
*id = (idx*MI_REGION_MAP_BITS) + bitidx;
return true;
}
// Use bit scan forward to quickly find the first zero bit if it is available
#if defined(_MSC_VER)
#define MI_HAVE_BITSCAN
#include <intrin.h>
static inline size_t mi_bsf(uintptr_t x) {
if (x==0) return 8*MI_INTPTR_SIZE;
DWORD idx;
#if (MI_INTPTR_SIZE==8)
_BitScanForward64(&idx, x);
#else
_BitScanForward(&idx, x);
#endif
return idx;
}
static inline size_t mi_bsr(uintptr_t x) {
if (x==0) return 8*MI_INTPTR_SIZE;
DWORD idx;
#if (MI_INTPTR_SIZE==8)
_BitScanReverse64(&idx, x);
#else
_BitScanReverse(&idx, x);
#endif
return idx;
}
#elif defined(__GNUC__) || defined(__clang__)
#define MI_HAVE_BITSCAN
static inline size_t mi_bsf(uintptr_t x) {
return (x==0 ? 8*MI_INTPTR_SIZE : __builtin_ctzl(x));
}
static inline size_t mi_bsr(uintptr_t x) {
return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - __builtin_clzl(x));
}
#endif
// Allocate `blocks` in a `region` at `idx` of a given `size`.
// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written
// if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call.
// (not being able to claim is not considered an error so check for `p != NULL` afterwards).
static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t blocks, size_t size,
bool* commit, bool* allow_large, bool* is_zero, void** p, size_t* id, mi_os_tld_t* tld)
{
mi_assert_internal(p != NULL && id != NULL);
mi_assert_internal(blocks < MI_REGION_MAP_BITS);
const uintptr_t mask = mi_region_block_mask(blocks, 0);
const size_t bitidx_max = MI_REGION_MAP_BITS - blocks;
uintptr_t map = mi_atomic_read(&region->map);
if (map==MI_REGION_MAP_FULL) return true;
#ifdef MI_HAVE_BITSCAN
size_t bitidx = mi_bsf(~map); // quickly find the first zero bit if possible
#else
size_t bitidx = 0; // otherwise start at 0
#endif
uintptr_t m = (mask << bitidx); // invariant: m == mask shifted by bitidx
// scan linearly for a free range of zero bits
while(bitidx <= bitidx_max) {
if ((map & m) == 0) { // are the mask bits free at bitidx?
mi_assert_internal((m >> bitidx) == mask); // no overflow?
uintptr_t newmap = map | m;
mi_assert_internal((newmap^map) >> bitidx == mask);
if (!mi_atomic_cas_weak(&region->map, newmap, map)) { // TODO: use strong cas here?
// no success, another thread claimed concurrently.. keep going
map = mi_atomic_read(&region->map);
continue;
}
else {
// success, we claimed the bits
// now commit the block memory -- this can still fail
return mi_region_commit_blocks(region, idx, bitidx, blocks,
size, commit, allow_large, is_zero, p, id, tld);
}
}
else {
// on to the next bit range
#ifdef MI_HAVE_BITSCAN
size_t shift = (blocks == 1 ? 1 : mi_bsr(map & m) - bitidx + 1);
mi_assert_internal(shift > 0 && shift <= blocks);
#else
size_t shift = 1;
#endif
bitidx += shift;
m <<= shift;
}
}
// no error, but also no bits found
return true;
}
// Try to allocate `blocks` in a `region` at `idx` of a given `size`. Does a quick check before trying to claim.
// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written
// if the blocks were successfully claimed so ensure they are initialized to NULL/0 before the call.
// (not being able to claim is not considered an error so check for `p != NULL` afterwards).
static bool mi_region_try_alloc_blocks(size_t idx, size_t blocks, size_t size,
bool* commit, bool* allow_large, bool* is_zero,
void** p, size_t* id, mi_os_tld_t* tld)
{
// check if there are available blocks in the region..
mi_assert_internal(idx < MI_REGION_MAX);
mem_region_t* region = &regions[idx];
uintptr_t m = mi_atomic_read_relaxed(&region->map);
if (m != MI_REGION_MAP_FULL) { // some bits are zero
bool ok = (*commit || *allow_large); // committing or allow-large is always ok
if (!ok) {
// otherwise skip incompatible regions if possible.
// this is not guaranteed due to multiple threads allocating at the same time but
// that's ok. In secure mode, large is never allowed for any thread, so that works out;
// otherwise we might just not be able to reset/decommit individual pages sometimes.
mi_region_info_t info = mi_atomic_read_relaxed(&region->info);
bool is_large;
bool is_committed;
void* start = mi_region_info_read(info,&is_large,&is_committed);
ok = (start == NULL || (*commit || !is_committed) || (*allow_large || !is_large)); // Todo: test with one bitmap operation?
}
if (ok) {
return mi_region_alloc_blocks(region, idx, blocks, size, commit, allow_large, is_zero, p, id, tld);
}
}
return true; // no error, but no success either
}
/* ----------------------------------------------------------------------------
Allocation
-----------------------------------------------------------------------------*/
// Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`.
// (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`)
void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero,
size_t* id, mi_os_tld_t* tld)
{
mi_assert_internal(id != NULL && tld != NULL);
mi_assert_internal(size > 0);
*id = SIZE_MAX;
*is_zero = false;
bool default_large = false;
if (large==NULL) large = &default_large; // ensure `large != NULL`
// use direct OS allocation for huge blocks or alignment (with `id = SIZE_MAX`)
if (size > MI_REGION_MAX_ALLOC_SIZE || alignment > MI_SEGMENT_ALIGN) {
*is_zero = true;
return _mi_os_alloc_aligned(mi_good_commit_size(size), alignment, *commit, large, tld); // round up size
}
// always round size to OS page size multiple (so commit/decommit go over the entire range)
// TODO: use large OS page size here?
size = _mi_align_up(size, _mi_os_page_size());
// calculate the number of needed blocks
size_t blocks = mi_region_block_count(size);
mi_assert_internal(blocks > 0 && blocks <= 8*MI_INTPTR_SIZE);
// find a range of free blocks
void* p = NULL;
size_t count = mi_atomic_read(&regions_count);
size_t idx = tld->region_idx; // start at 0 to reuse low addresses? Or, use tld->region_idx to reduce contention?
for (size_t visited = 0; visited < count; visited++, idx++) {
if (idx >= count) idx = 0; // wrap around
if (!mi_region_try_alloc_blocks(idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error
if (p != NULL) break;
}
if (p == NULL) {
// no free range in existing regions -- try to extend beyond the count.. but at most 8 regions
for (idx = count; idx < mi_atomic_read_relaxed(&regions_count) + 8 && idx < MI_REGION_MAX; idx++) {
if (!mi_region_try_alloc_blocks(idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error
if (p != NULL) break;
}
}
if (p == NULL) {
// we could not find a place to allocate, fall back to the os directly
_mi_warning_message("unable to allocate from region: size %zu\n", size);
*is_zero = true;
p = _mi_os_alloc_aligned(size, alignment, commit, large, tld);
}
else {
tld->region_idx = idx; // next start of search? currently not used as we use first-fit
}
mi_assert_internal( p == NULL || (uintptr_t)p % alignment == 0);
return p;
}
/* ----------------------------------------------------------------------------
Free
-----------------------------------------------------------------------------*/
// Free previously allocated memory with a given id.
void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) {
mi_assert_internal(size > 0 && stats != NULL);
if (p==NULL) return;
if (size==0) return;
if (id == SIZE_MAX) {
// was a direct OS allocation, pass through
_mi_os_free(p, size, stats);
}
else {
// allocated in a region
mi_assert_internal(size <= MI_REGION_MAX_ALLOC_SIZE); if (size > MI_REGION_MAX_ALLOC_SIZE) return;
// we can align the size up to page size (as we allocate that way too)
// this ensures we fully commit/decommit/reset
size = _mi_align_up(size, _mi_os_page_size());
size_t idx = (id / MI_REGION_MAP_BITS);
size_t bitidx = (id % MI_REGION_MAP_BITS);
size_t blocks = mi_region_block_count(size);
size_t mask = mi_region_block_mask(blocks, bitidx);
mi_assert_internal(idx < MI_REGION_MAX); if (idx >= MI_REGION_MAX) return; // or `abort`?
mem_region_t* region = &regions[idx];
mi_assert_internal((mi_atomic_read_relaxed(&region->map) & mask) == mask ); // claimed?
mi_region_info_t info = mi_atomic_read(&region->info);
bool is_large;
bool is_eager_committed;
void* start = mi_region_info_read(info,&is_large,&is_eager_committed);
mi_assert_internal(start != NULL);
void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE);
mi_assert_internal(blocks_start == p); // not a pointer in our area?
mi_assert_internal(bitidx + blocks <= MI_REGION_MAP_BITS);
if (blocks_start != p || bitidx + blocks > MI_REGION_MAP_BITS) return; // or `abort`?
// decommit (or reset) the blocks to reduce the working set.
// TODO: implement delayed decommit/reset as these calls are too expensive
// if the memory is reused soon.
// reset: 10x slowdown on malloc-large, decommit: 17x slowdown on malloc-large
if (!is_large) {
if (mi_option_is_enabled(mi_option_segment_reset)) {
if (!is_eager_committed && // cannot reset large pages
(mi_option_is_enabled(mi_option_eager_commit) || // cannot reset halfway committed segments, use `option_page_reset` instead
mi_option_is_enabled(mi_option_reset_decommits))) // but we can decommit halfway committed segments
{
_mi_os_reset(p, size, stats);
//_mi_os_decommit(p, size, stats); // todo: and clear dirty bits?
}
}
<<<<<<< HEAD
// else { _mi_os_reset(p,size,stats); }
}
=======
}
>>>>>>> dev
if (!is_eager_committed) {
// adjust commit statistics as we commit again when re-using the same slot
_mi_stat_decrease(&stats->committed, mi_good_commit_size(size));
}
// TODO: should we free empty regions? currently only done _mi_mem_collect.
// this frees up virtual address space which might be useful on 32-bit systems?
// and unclaim
uintptr_t map;
uintptr_t newmap;
do {
map = mi_atomic_read_relaxed(&region->map);
newmap = map & ~mask;
} while (!mi_atomic_cas_weak(&region->map, newmap, map));
}
}
/* ----------------------------------------------------------------------------
collection
-----------------------------------------------------------------------------*/
void _mi_mem_collect(mi_stats_t* stats) {
// free every region that has no segments in use.
for (size_t i = 0; i < regions_count; i++) {
mem_region_t* region = &regions[i];
if (mi_atomic_read_relaxed(&region->map) == 0) {
// if no segments used, try to claim the whole region
uintptr_t m;
do {
m = mi_atomic_read_relaxed(&region->map);
} while(m == 0 && !mi_atomic_cas_weak(&region->map, ~((uintptr_t)0), 0 ));
if (m == 0) {
// on success, free the whole region (unless it was huge reserved)
bool is_eager_committed;
void* start = mi_region_info_read(mi_atomic_read(&region->info), NULL, &is_eager_committed);
if (start != NULL && !_mi_os_is_huge_reserved(start)) {
_mi_os_free_ex(start, MI_REGION_SIZE, is_eager_committed, stats);
}
// and release
mi_atomic_write(&region->info,0);
mi_atomic_write(&region->map,0);
}
}
}
}
/* ----------------------------------------------------------------------------
Other
-----------------------------------------------------------------------------*/
bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats) {
return _mi_os_commit(p, size, is_zero, stats);
}
bool _mi_mem_decommit(void* p, size_t size, mi_stats_t* stats) {
return _mi_os_decommit(p, size, stats);
}
bool _mi_mem_reset(void* p, size_t size, mi_stats_t* stats) {
return _mi_os_reset(p, size, stats);
}
bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats) {
return _mi_os_unreset(p, size, is_zero, stats);
}
bool _mi_mem_protect(void* p, size_t size) {
return _mi_os_protect(p, size);
}
bool _mi_mem_unprotect(void* p, size_t size) {
return _mi_os_unprotect(p, size);
}

View File

@ -868,13 +868,13 @@ static void mi_os_free_huge_reserved() {
*/ */
#if !(MI_INTPTR_SIZE >= 8 && (defined(_WIN32) || defined(MI_OS_USE_MMAP))) #if !(MI_INTPTR_SIZE >= 8 && (defined(_WIN32) || defined(MI_OS_USE_MMAP)))
int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept { int _mi_os_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept {
UNUSED(pages); UNUSED(max_secs); UNUSED(pages); UNUSED(max_secs);
if (pages_reserved != NULL) *pages_reserved = 0; if (pages_reserved != NULL) *pages_reserved = 0;
return ENOMEM; return ENOMEM;
} }
#else #else
int mi_reserve_huge_os_pages( size_t pages, double max_secs, size_t* pages_reserved ) mi_attr_noexcept int _mi_os_reserve_huge_os_pages( size_t pages, double max_secs, size_t* pages_reserved ) mi_attr_noexcept
{ {
if (pages_reserved != NULL) *pages_reserved = 0; if (pages_reserved != NULL) *pages_reserved = 0;
if (max_secs==0) return ETIMEDOUT; // timeout if (max_secs==0) return ETIMEDOUT; // timeout

View File

@ -284,7 +284,8 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
if (MI_SECURE>0) { if (MI_SECURE>0) {
_mi_os_unprotect(segment, mi_segment_size(segment)); // ensure no more guard pages are set _mi_os_unprotect(segment, mi_segment_size(segment)); // ensure no more guard pages are set
} }
_mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats); // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats);
_mi_arena_free(segment, mi_segment_size(segment), segment->memid, tld->stats);
} }
@ -598,29 +599,35 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m
// Try to get from our cache first // Try to get from our cache first
mi_segment_t* segment = mi_segment_cache_pop(segment_slices, tld); mi_segment_t* segment = mi_segment_cache_pop(segment_slices, tld);
bool is_zero = false;
if (segment==NULL) { if (segment==NULL) {
// Allocate the segment from the OS // Allocate the segment from the OS
bool mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy bool mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy
segment = (mi_segment_t*)_mi_os_alloc_aligned(segment_size, MI_SEGMENT_SIZE, commit, &mem_large, os_tld); size_t memid = 0;
// segment = (mi_segment_t*)_mi_os_alloc_aligned(segment_size, MI_SEGMENT_SIZE, commit, &mem_large, os_tld);
segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_zero, &memid, os_tld);
if (segment == NULL) return NULL; // failed to allocate if (segment == NULL) return NULL; // failed to allocate
mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0);
if (!commit) { if (!commit) {
// at least commit the info slices // at least commit the info slices
mi_assert_internal(MI_COMMIT_SIZE > info_slices*MI_SEGMENT_SLICE_SIZE); mi_assert_internal(MI_COMMIT_SIZE > info_slices*MI_SEGMENT_SLICE_SIZE);
bool is_zero = false;
_mi_os_commit(segment, MI_COMMIT_SIZE, &is_zero, tld->stats); _mi_os_commit(segment, MI_COMMIT_SIZE, &is_zero, tld->stats);
} }
segment->memid = memid;
segment->mem_is_fixed = mem_large; segment->mem_is_fixed = mem_large;
segment->mem_is_committed = commit; segment->mem_is_committed = commit;
mi_segments_track_size((long)(segment_size), tld); mi_segments_track_size((long)(segment_size), tld);
mi_segment_map_allocated_at(segment); mi_segment_map_allocated_at(segment);
} }
// zero the segment info? -- not needed as it is zero initialized from the OS // zero the segment info? -- not always needed as it is zero initialized from the OS
// memset(segment, 0, info_size); if (!is_zero) {
ptrdiff_t ofs = offsetof(mi_segment_t, next);
size_t prefix = offsetof(mi_segment_t, slices) - ofs;
memset((uint8_t*)segment+ofs, 0, prefix + sizeof(mi_slice_t)*segment_slices);
}
// initialize segment info // initialize segment info
memset(segment,0,offsetof(mi_segment_t,slices));
segment->segment_slices = segment_slices; segment->segment_slices = segment_slices;
segment->segment_info_slices = info_slices; segment->segment_info_slices = info_slices;
segment->thread_id = _mi_thread_id(); segment->thread_id = _mi_thread_id();
@ -629,7 +636,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m
segment->kind = (required == 0 ? MI_SEGMENT_NORMAL : MI_SEGMENT_HUGE); segment->kind = (required == 0 ? MI_SEGMENT_NORMAL : MI_SEGMENT_HUGE);
segment->allow_decommit = !commit; segment->allow_decommit = !commit;
segment->commit_mask = (!commit ? 0x01 : ~((uintptr_t)0)); // on lazy commit, the initial part is always committed segment->commit_mask = (!commit ? 0x01 : ~((uintptr_t)0)); // on lazy commit, the initial part is always committed
memset(segment->slices, 0, sizeof(mi_slice_t)*(info_slices+1)); // memset(segment->slices, 0, sizeof(mi_slice_t)*(info_slices+1));
_mi_stat_increase(&tld->stats->page_committed, mi_segment_info_size(segment)); _mi_stat_increase(&tld->stats->page_committed, mi_segment_info_size(segment));
// set up guard pages // set up guard pages

View File

@ -18,7 +18,7 @@ terms of the MIT license.
// argument defaults // argument defaults
static int THREADS = 32; // more repeatable if THREADS <= #processors static int THREADS = 32; // more repeatable if THREADS <= #processors
static int N = 20; // scaling factor static int N = 40; // scaling factor
// static int THREADS = 8; // more repeatable if THREADS <= #processors // static int THREADS = 8; // more repeatable if THREADS <= #processors
// static int N = 100; // scaling factor // static int N = 100; // scaling factor