merge from dev-exp; better abandoned reclamation

This commit is contained in:
daan 2020-01-27 22:12:23 -08:00
commit b50bec463d
25 changed files with 661 additions and 337 deletions

View File

@ -21,6 +21,10 @@ set(mi_sources
src/random.c
src/os.c
src/arena.c
<<<<<<< HEAD
=======
src/region.c
>>>>>>> dev-exp
src/segment.c
src/page.c
src/alloc.c
@ -106,10 +110,9 @@ endif()
# Compiler flags
if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU")
list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas)
list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas -fvisibility=hidden)
if(CMAKE_C_COMPILER_ID MATCHES "GNU")
list(APPEND mi_cflags -Wno-invalid-memory-model)
list(APPEND mi_cflags -fvisibility=hidden)
endif()
endif()

View File

@ -112,7 +112,7 @@
<ConformanceMode>true</ConformanceMode>
<AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
<ExceptionHandling>false</ExceptionHandling>
<ExceptionHandling>Sync</ExceptionHandling>
<CompileAs>Default</CompileAs>
<SupportJustMyCode>false</SupportJustMyCode>
</ClCompile>

View File

@ -250,4 +250,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

View File

@ -74,4 +74,4 @@
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
</Project>
</Project>

View File

@ -111,7 +111,7 @@
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<WarningLevel>Level4</WarningLevel>
<Optimization>Disabled</Optimization>
<SDLCheck>true</SDLCheck>
<ConformanceMode>true</ConformanceMode>
@ -165,7 +165,7 @@
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<WarningLevel>Level4</WarningLevel>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<ConformanceMode>true</ConformanceMode>
@ -244,4 +244,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

View File

@ -80,4 +80,4 @@
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
</Project>
</Project>

View File

@ -90,7 +90,7 @@
<ConformanceMode>true</ConformanceMode>
<AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
<ExceptionHandling>false</ExceptionHandling>
<ExceptionHandling>Sync</ExceptionHandling>
<CompileAs>Default</CompileAs>
<SupportJustMyCode>false</SupportJustMyCode>
</ClCompile>
@ -112,7 +112,7 @@
<ConformanceMode>true</ConformanceMode>
<AdditionalIncludeDirectories>..\..\include</AdditionalIncludeDirectories>
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
<ExceptionHandling>false</ExceptionHandling>
<ExceptionHandling>Sync</ExceptionHandling>
<CompileAs>Default</CompileAs>
<SupportJustMyCode>false</SupportJustMyCode>
</ClCompile>

View File

@ -75,4 +75,4 @@
<UniqueIdentifier>{39cb7e38-69d0-43fb-8406-6a0f7cefc3b4}</UniqueIdentifier>
</Filter>
</ItemGroup>
</Project>
</Project>

View File

@ -100,7 +100,7 @@
<PreprocessorDefinitions>MI_DEBUG=3;%(PreprocessorDefinitions);</PreprocessorDefinitions>
<CompileAs>CompileAsCpp</CompileAs>
<SupportJustMyCode>false</SupportJustMyCode>
<LanguageStandard>stdcpp17</LanguageStandard>
<LanguageStandard>Default</LanguageStandard>
</ClCompile>
<Lib>
<AdditionalLibraryDirectories>
@ -119,7 +119,7 @@
<PreprocessorDefinitions>MI_DEBUG=3;%(PreprocessorDefinitions);</PreprocessorDefinitions>
<CompileAs>CompileAsCpp</CompileAs>
<SupportJustMyCode>false</SupportJustMyCode>
<LanguageStandard>stdcpp17</LanguageStandard>
<LanguageStandard>Default</LanguageStandard>
</ClCompile>
<PostBuildEvent>
<Command>

View File

@ -78,4 +78,4 @@
<UniqueIdentifier>{852a14ae-6dde-4e95-8077-ca705e97e5af}</UniqueIdentifier>
</Filter>
</ItemGroup>
</Project>
</Project>

View File

@ -20,13 +20,20 @@ terms of the MIT license. A copy of the license can be found in the file
#define mi_trace_message(...)
#endif
#define MI_CACHE_LINE 64
#if defined(_MSC_VER)
#pragma warning(disable:4127) // constant conditional due to MI_SECURE paths
#define mi_decl_noinline __declspec(noinline)
#elif defined(__GNUC__) || defined(__clang__)
#define mi_decl_noinline __attribute__((noinline))
#pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths)
#define mi_decl_noinline __declspec(noinline)
#define mi_decl_thread __declspec(thread)
#define mi_decl_cache_align __declspec(align(MI_CACHE_LINE))
#elif (defined(__GNUC__) && (__GNUC__>=3)) // includes clang and icc
#define mi_decl_noinline __attribute__((noinline))
#define mi_decl_thread __thread
#define mi_decl_cache_align __attribute__((aligned(MI_CACHE_LINE)))
#else
#define mi_decl_noinline
#define mi_decl_thread __thread // hope for the best :-)
#define mi_decl_cache_align
#endif
@ -72,13 +79,15 @@ void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed,
// "segment.c"
mi_page_t* _mi_segment_page_alloc(size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld);
mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld);
void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld);
void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld);
bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld);
void _mi_segment_thread_collect(mi_segments_tld_t* tld);
uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); // page start for any page
void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld);
void _mi_abandoned_await_readers(void);
// "page.c"
void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc;
@ -421,30 +430,24 @@ static inline mi_thread_free_t mi_tf_set_block(mi_thread_free_t tf, mi_block_t*
return mi_tf_make(block, mi_tf_delayed(tf));
}
// are all blocks in a page freed?
// are all blocks in a page freed?
// note: needs up-to-date used count, (as the `xthread_free` list may not be empty). see `_mi_page_collect_free`.
static inline bool mi_page_all_free(const mi_page_t* page) {
mi_assert_internal(page != NULL);
return (page->used == 0);
}
// are there immediately available blocks
// are there any available blocks?
static inline bool mi_page_has_any_available(const mi_page_t* page) {
mi_assert_internal(page != NULL && page->reserved > 0);
return (page->used < page->reserved || (mi_page_thread_free(page) != NULL));
}
// are there immediately available blocks, i.e. blocks available on the free list.
static inline bool mi_page_immediate_available(const mi_page_t* page) {
mi_assert_internal(page != NULL);
return (page->free != NULL);
}
// are there free blocks in this page?
static inline bool mi_page_has_free(mi_page_t* page) {
mi_assert_internal(page != NULL);
bool hasfree = (mi_page_immediate_available(page) || page->local_free != NULL || (mi_page_thread_free(page) != NULL));
mi_assert_internal(hasfree || page->used == page->capacity);
return hasfree;
}
// are all blocks in use?
static inline bool mi_page_all_used(mi_page_t* page) {
mi_assert_internal(page != NULL);
return !mi_page_has_free(page);
}
// is more than 7/8th of a page in use?
static inline bool mi_page_mostly_used(const mi_page_t* page) {

View File

@ -275,6 +275,7 @@ typedef struct mi_segment_s {
struct mi_segment_s* abandoned_next;
size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`)
size_t abandoned_visits; // count how often this segment is visited in the abandoned list (to force reclaim it it is too long)
size_t used; // count of pages in use
uintptr_t cookie; // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie`

View File

@ -38,14 +38,12 @@ terms of the MIT license. A copy of the license can be found in the file
#define mi_decl_allocator __declspec(restrict)
#endif
#define mi_cdecl __cdecl
#define mi_decl_thread __declspec(thread)
#define mi_attr_malloc
#define mi_attr_alloc_size(s)
#define mi_attr_alloc_size2(s1,s2)
#define mi_attr_alloc_align(p)
#elif defined(__GNUC__) || defined(__clang__)
#elif defined(__GNUC__) // includes clang and icc
#define mi_cdecl // leads to warnings... __attribute__((cdecl))
#define mi_decl_thread __thread
#define mi_decl_export __attribute__((visibility("default")))
#define mi_decl_allocator
#define mi_attr_malloc __attribute__((malloc))
@ -64,7 +62,6 @@ terms of the MIT license. A copy of the license can be found in the file
#endif
#else
#define mi_cdecl
#define mi_decl_thread __thread
#define mi_decl_export
#define mi_decl_allocator
#define mi_attr_malloc

View File

@ -21,7 +21,7 @@ terms of the MIT license. A copy of the license can be found in the file
// Fast allocation in a page: just pop from the free list.
// Fall back to generic allocation only if the list is empty.
extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept {
extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept {
mi_assert_internal(page->xblock_size==0||mi_page_block_size(page) >= size);
mi_block_t* block = page->free;
if (mi_unlikely(block == NULL)) {
@ -291,7 +291,8 @@ mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* p
}
static void mi_decl_noinline mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool local, void* p) {
static void mi_decl_noinline mi_free_generic(const mi_segment_t* segment, bool local, void* p) {
mi_page_t* page = _mi_segment_page_of(segment, p);
mi_block_t* block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p);
_mi_free_block(page, local, block);
}
@ -339,7 +340,7 @@ void mi_free(void* p) mi_attr_noexcept
if (mi_likely(tid == segment->thread_id && page->flags.full_aligned == 0)) { // the thread id matches and it is not a full page, nor has aligned blocks
// local, and not full or aligned
mi_block_t* block = (mi_block_t*)p;
mi_block_t* const block = (mi_block_t*)p;
if (mi_unlikely(mi_check_is_double_free(page,block))) return;
mi_block_set_next(page, block, page->local_free);
page->local_free = block;
@ -350,7 +351,8 @@ void mi_free(void* p) mi_attr_noexcept
}
else {
// non-local, aligned blocks, or a full page; use the more generic path
mi_free_generic(segment, page, tid == segment->thread_id, p);
// note: recalc page in generic to improve code generation
mi_free_generic(segment, tid == segment->thread_id, p);
}
}

View File

@ -77,8 +77,8 @@ typedef struct mi_arena_s {
// The available arenas
static _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS];
static _Atomic(uintptr_t) mi_arena_count; // = 0
static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS];
static mi_decl_cache_align _Atomic(uintptr_t) mi_arena_count; // = 0
/* -----------------------------------------------------------
@ -114,6 +114,7 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t*
size_t idx = mi_atomic_read(&arena->search_idx); // start from last search
for (size_t visited = 0; visited < fcount; visited++, idx++) {
if (idx >= fcount) idx = 0; // wrap around
// try to atomically claim a range of bits
if (mi_bitmap_try_find_claim_field(arena->blocks_inuse, idx, blocks, bitmap_idx)) {
mi_atomic_write(&arena->search_idx, idx); // start search from here next time
return true;
@ -213,6 +214,7 @@ static void mi_cache_purge(mi_os_tld_t* tld) {
if (mi_atomic_cas_ptr_weak(mi_cache_slot_t, &slot->p, MI_SLOT_IN_USE, p)) {
// claimed! test again
if (slot->is_committed && !slot->is_large && now >= slot->expire) {
_mi_abandoned_await_readers(); // wait until safe to decommit
_mi_os_decommit(p, MI_SEGMENT_SIZE, tld->stats);
slot->is_committed = false;
}
@ -251,6 +253,7 @@ static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_commit
if (is_committed) {
long delay = mi_option_get(mi_option_arena_reset_delay);
if (delay == 0 && !is_large) {
_mi_abandoned_await_readers(); // wait until safe to decommit
_mi_os_decommit(start, size, tld->stats);
slot->is_committed = false;
}
@ -286,8 +289,8 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n
// always committed
*commit = true;
}
else if (commit) {
// ensure commit now
else if (*commit) {
// arena not committed as a whole, but commit requested: ensure commit now
bool any_uncommitted;
mi_bitmap_claim(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted);
if (any_uncommitted) {
@ -379,6 +382,7 @@ void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, bool
if (memid == MI_MEMID_OS) {
// was a direct OS allocation, pass through
if (!mi_cache_push(p, size, memid, is_committed, is_large, tld)) {
_mi_abandoned_await_readers(); // wait unti safe to free
_mi_os_free_ex(p, size, is_committed, tld->stats);
}
}

View File

@ -76,9 +76,9 @@ static bool mi_heap_is_valid(mi_heap_t* heap) {
----------------------------------------------------------- */
typedef enum mi_collect_e {
NORMAL,
FORCE,
ABANDON
MI_NORMAL,
MI_FORCE,
MI_ABANDON
} mi_collect_t;
@ -87,12 +87,13 @@ static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t
UNUSED(heap);
mi_assert_internal(mi_heap_page_is_valid(heap, pq, page, NULL, NULL));
mi_collect_t collect = *((mi_collect_t*)arg_collect);
_mi_page_free_collect(page, collect >= ABANDON);
_mi_page_free_collect(page, collect >= MI_FORCE);
if (mi_page_all_free(page)) {
// no more used blocks, free the page. TODO: should we retire here and be less aggressive?
_mi_page_free(page, pq, collect != NORMAL);
// no more used blocks, free the page.
// note: this will free retired pages as well.
_mi_page_free(page, pq, collect >= MI_FORCE);
}
else if (collect == ABANDON) {
else if (collect == MI_ABANDON) {
// still used blocks but the thread is done; abandon the page
_mi_page_abandon(page, pq);
}
@ -111,61 +112,55 @@ static bool mi_heap_page_never_delayed_free(mi_heap_t* heap, mi_page_queue_t* pq
static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
{
if (!mi_heap_is_initialized(heap)) return;
_mi_deferred_free(heap, collect > NORMAL);
_mi_deferred_free(heap, collect >= MI_FORCE);
// collect (some) abandoned pages
if (collect >= NORMAL && !heap->no_reclaim) {
if (collect == NORMAL) {
// this may free some segments (but also take ownership of abandoned pages)
_mi_segment_try_reclaim_abandoned(heap, false, &heap->tld->segments);
}
else if (
#ifdef NDEBUG
collect == FORCE
#else
collect >= FORCE
#endif
&& _mi_is_main_thread() && mi_heap_is_backing(heap))
{
// the main thread is abandoned, try to free all abandoned segments.
// if all memory is freed by now, all segments should be freed.
_mi_segment_try_reclaim_abandoned(heap, true, &heap->tld->segments);
}
// note: never reclaim on collect but leave it to threads that need storage to reclaim
if (
#ifdef NDEBUG
collect == MI_FORCE
#else
collect >= MI_FORCE
#endif
&& _mi_is_main_thread() && mi_heap_is_backing(heap) && !heap->no_reclaim)
{
// the main thread is abandoned (end-of-program), try to reclaim all abandoned segments.
// if all memory is freed by now, all segments should be freed.
_mi_abandoned_reclaim_all(heap, &heap->tld->segments);
}
// if abandoning, mark all pages to no longer add to delayed_free
if (collect == ABANDON) {
//for (mi_page_t* page = heap->pages[MI_BIN_FULL].first; page != NULL; page = page->next) {
// _mi_page_use_delayed_free(page, false); // set thread_free.delayed to MI_NO_DELAYED_FREE
//}
if (collect == MI_ABANDON) {
mi_heap_visit_pages(heap, &mi_heap_page_never_delayed_free, NULL, NULL);
}
// free thread delayed blocks.
// (if abandoning, after this there are no more local references into the pages.)
// (if abandoning, after this there are no more thread-delayed references into the pages.)
_mi_heap_delayed_free(heap);
// collect all pages owned by this thread
mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL);
mi_assert_internal( collect != ABANDON || mi_atomic_read_ptr(mi_block_t,&heap->thread_delayed_free) == NULL );
mi_assert_internal( collect != MI_ABANDON || mi_atomic_read_ptr(mi_block_t,&heap->thread_delayed_free) == NULL );
// collect segment caches
if (collect >= FORCE) {
if (collect >= MI_FORCE) {
_mi_segment_thread_collect(&heap->tld->segments);
}
#ifndef NDEBUG
// collect regions
if (collect >= FORCE && _mi_is_main_thread()) {
// _mi_mem_collect(&heap->tld->stats);
if (collect >= MI_FORCE && _mi_is_main_thread() && mi_heap_is_backing(heap)) {
//_mi_mem_collect(&heap->tld->os);
}
#endif
}
void _mi_heap_collect_abandon(mi_heap_t* heap) {
mi_heap_collect_ex(heap, ABANDON);
mi_heap_collect_ex(heap, MI_ABANDON);
}
void mi_heap_collect(mi_heap_t* heap, bool force) mi_attr_noexcept {
mi_heap_collect_ex(heap, (force ? FORCE : NORMAL));
mi_heap_collect_ex(heap, (force ? MI_FORCE : MI_NORMAL));
}
void mi_collect(bool force) mi_attr_noexcept {
@ -274,6 +269,9 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_
page->used = 0;
// and free the page
// mi_page_free(page,false);
page->next = NULL;
page->prev = NULL;
_mi_segment_page_free(page,false /* no force? */, &heap->tld->segments);
return true; // keep going

View File

@ -165,6 +165,7 @@ mi_stats_t _mi_stats_main = { MI_STATS_NULL };
Initialization and freeing of the thread local heaps
----------------------------------------------------------- */
// note: in x64 in release build `sizeof(mi_thread_data_t)` is under 4KiB (= OS page size).
typedef struct mi_thread_data_s {
mi_heap_t heap; // must come first due to cast in `_mi_heap_done`
mi_tld_t tld;
@ -179,12 +180,13 @@ static bool _mi_heap_init(void) {
mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_get_default_heap());
}
else {
// use `_mi_os_alloc` to allocate directly from the OS
// use `_mi_os_alloc` to allocate directly from the OS
mi_thread_data_t* td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t),&_mi_stats_main); // Todo: more efficient allocation?
if (td == NULL) {
_mi_error_message(ENOMEM, "failed to allocate thread local heap memory\n");
return false;
}
// OS allocated so already zero initialized
mi_tld_t* tld = &td->tld;
mi_heap_t* heap = &td->heap;
memcpy(tld, &tld_empty, sizeof(*tld));
@ -227,6 +229,7 @@ static bool _mi_heap_done(mi_heap_t* heap) {
// free if not the main thread
if (heap != &_mi_heap_main) {
mi_assert_internal(heap->tld->segments.count == 0);
_mi_os_free(heap, sizeof(mi_thread_data_t), &_mi_stats_main);
}
#if (MI_DEBUG > 0)

View File

@ -56,10 +56,10 @@ static mi_option_desc_t options[_mi_option_last] =
{ 0, UNINIT, MI_OPTION(verbose) },
// the following options are experimental and not all combinations make sense.
{ 0, UNINIT, MI_OPTION(eager_commit) }, // commit on demand
{ 1, UNINIT, MI_OPTION(eager_commit) }, // commit on demand?
#if defined(_WIN32) || (MI_INTPTR_SIZE <= 4) // and other OS's without overcommit?
{ 0, UNINIT, MI_OPTION(eager_region_commit) },
{ 1, UNINIT, MI_OPTION(reset_decommits) }, // reset decommits memory
{ 0, UNINIT, MI_OPTION(reset_decommits) }, // reset decommits memory
#else
{ 1, UNINIT, MI_OPTION(eager_region_commit) },
{ 0, UNINIT, MI_OPTION(reset_decommits) }, // reset uses MADV_FREE/MADV_DONTNEED

View File

@ -396,7 +396,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
// On 64-bit systems, we can do efficient aligned allocation by using
// the 4TiB to 30TiB area to allocate them.
#if (MI_INTPTR_SIZE >= 8) && (defined(_WIN32) || (defined(MI_OS_USE_MMAP) && !defined(MAP_ALIGNED)))
static volatile _Atomic(uintptr_t) aligned_base;
static volatile mi_decl_cache_align _Atomic(uintptr_t) aligned_base;
// Return a 4MiB aligned address that is probably available
static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
@ -904,7 +904,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
#if (MI_INTPTR_SIZE >= 8)
// To ensure proper alignment, use our own area for huge OS pages
static _Atomic(uintptr_t) mi_huge_start; // = 0
static mi_decl_cache_align _Atomic(uintptr_t) mi_huge_start; // = 0
// Claim an aligned address range for huge pages
static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {

View File

@ -37,7 +37,7 @@ static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_sta
}
static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_tld_t* tld);
static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld);
#if (MI_DEBUG>=3)
static size_t mi_page_list_count(mi_page_t* page, mi_block_t* head) {
@ -127,12 +127,12 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool overrid
mi_thread_free_t tfreex;
mi_delayed_t old_delay;
do {
tfree = mi_atomic_read(&page->xthread_free);
tfree = mi_atomic_read(&page->xthread_free); // note: must acquire as we can break this loop and not do a CAS
tfreex = mi_tf_set_delayed(tfree, delay);
old_delay = mi_tf_delayed(tfree);
if (mi_unlikely(old_delay == MI_DELAYED_FREEING)) {
mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done.
tfree = mi_tf_set_delayed(tfree, MI_NO_DELAYED_FREE); // will cause CAS to busy fail
// tfree = mi_tf_set_delayed(tfree, MI_NO_DELAYED_FREE); // will cause CAS to busy fail
}
else if (delay == old_delay) {
break; // avoid atomic operation if already equal
@ -140,7 +140,8 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool overrid
else if (!override_never && old_delay == MI_NEVER_DELAYED_FREE) {
break; // leave never-delayed flag set
}
} while (!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree));
} while ((old_delay == MI_DELAYED_FREEING) ||
!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree));
}
/* -----------------------------------------------------------
@ -235,8 +236,8 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) {
mi_assert_internal(mi_page_thread_free_flag(page) != MI_NEVER_DELAYED_FREE);
mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE);
mi_assert_internal(!page->is_reset);
// TODO: push on full queue immediately if it is full?
mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page));
mi_page_queue_push(heap, pq, page);
mi_assert_expensive(_mi_page_is_valid(page));
}
@ -244,11 +245,14 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) {
// allocate a fresh page from a segment
static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size_t block_size) {
mi_assert_internal(pq==NULL||mi_heap_contains_queue(heap, pq));
mi_page_t* page = _mi_segment_page_alloc(block_size, &heap->tld->segments, &heap->tld->os);
if (page == NULL) return NULL;
mi_page_t* page = _mi_segment_page_alloc(heap, block_size, &heap->tld->segments, &heap->tld->os);
if (page == NULL) {
// this may be out-of-memory, or an abandoned page was reclaimed (and in our queue)
return NULL;
}
mi_assert_internal(pq==NULL || _mi_page_segment(page)->kind != MI_SEGMENT_HUGE);
mi_page_init(heap, page, block_size, heap->tld);
_mi_stat_increase( &heap->tld->stats.pages, 1);
_mi_stat_increase(&heap->tld->stats.pages, 1);
if (pq!=NULL) mi_page_queue_push(heap, pq, page); // huge pages use pq==NULL
mi_assert_expensive(_mi_page_is_valid(page));
return page;
@ -257,19 +261,7 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size
// Get a fresh page to use
static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) {
mi_assert_internal(mi_heap_contains_queue(heap, pq));
// try to reclaim an abandoned page first
mi_page_t* page = pq->first;
if (!heap->no_reclaim &&
_mi_segment_try_reclaim_abandoned(heap, false, &heap->tld->segments) &&
page != pq->first)
{
// we reclaimed, and we got lucky with a reclaimed page in our queue
page = pq->first;
if (page->free != NULL) return page;
}
// otherwise allocate the page
page = mi_page_fresh_alloc(heap, pq, pq->block_size);
mi_page_t* page = mi_page_fresh_alloc(heap, pq, pq->block_size);
if (page==NULL) return NULL;
mi_assert_internal(pq->block_size==mi_page_block_size(page));
mi_assert_internal(pq==mi_page_queue(heap, mi_page_block_size(page)));
@ -629,6 +621,8 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
#endif
page->is_zero = page->is_zero_init;
mi_assert_internal(page->is_committed);
mi_assert_internal(!page->is_reset);
mi_assert_internal(page->capacity == 0);
mi_assert_internal(page->free == NULL);
mi_assert_internal(page->used == 0);
@ -654,7 +648,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
-------------------------------------------------------------*/
// Find a page with free blocks of `page->block_size`.
static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq)
static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq, bool first_try)
{
// search through the pages in "next fit" order
size_t count = 0;
@ -692,13 +686,16 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
if (page == NULL) {
_mi_heap_collect_retired(heap, false); // perhaps make a page available
page = mi_page_fresh(heap, pq);
if (page == NULL && first_try) {
// out-of-memory _or_ an abandoned page with free blocks was reclaimed, try once again
page = mi_page_queue_find_free_ex(heap, pq, false);
}
}
else {
mi_assert(pq->first == page);
page->retire_expire = 0;
}
mi_assert_internal(page == NULL || mi_page_immediate_available(page));
return page;
}
@ -722,7 +719,7 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) {
return page; // fast path
}
}
return mi_page_queue_find_free_ex(heap, pq);
return mi_page_queue_find_free_ex(heap, pq, true);
}

View File

@ -176,7 +176,7 @@ static bool os_random_buf(void* buf, size_t buf_len) {
return true;
}
*/
#elif defined(ANDROID) || defined(XP_DARWIN) || defined(__DragonFly__) || \
#elif defined(ANDROID) || defined(XP_DARWIN) || defined(__APPLE__) || defined(__DragonFly__) || \
defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
defined(__wasi__)
#include <stdlib.h>
@ -325,4 +325,4 @@ static void chacha_test(void)
chacha_block(&r);
mi_assert_internal(array_equals(r.output, r_out, 16));
}
*/
*/

View File

@ -57,7 +57,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, boo
// Constants
#if (MI_INTPTR_SIZE==8)
#define MI_HEAP_REGION_MAX_SIZE (256 * GiB) // 48KiB for the region map
#define MI_HEAP_REGION_MAX_SIZE (256 * GiB) // 64KiB for the region map
#elif (MI_INTPTR_SIZE==4)
#define MI_HEAP_REGION_MAX_SIZE (3 * GiB) // ~ KiB for the region map
#else
@ -72,14 +72,13 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, boo
#define MI_REGION_MAX_OBJ_BLOCKS (MI_REGION_MAX_BLOCKS/4) // 64MiB
#define MI_REGION_MAX_OBJ_SIZE (MI_REGION_MAX_OBJ_BLOCKS*MI_SEGMENT_SIZE)
// Region info is a pointer to the memory region and two bits for
// its flags: is_large, and is_committed.
// Region info
typedef union mi_region_info_u {
uintptr_t value;
uintptr_t value;
struct {
bool valid;
bool is_large;
short numa_node;
bool valid; // initialized?
bool is_large; // allocated in fixed large/huge OS pages
short numa_node; // the associated NUMA node (where -1 means no associated node)
} x;
} mi_region_info_t;
@ -87,13 +86,14 @@ typedef union mi_region_info_u {
// A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with
// a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block.
typedef struct mem_region_s {
volatile _Atomic(uintptr_t) info; // is_large, and associated numa node + 1 (so 0 is no association)
volatile _Atomic(void*) start; // start of the memory area (and flags)
volatile _Atomic(uintptr_t) info; // mi_region_info_t.value
volatile _Atomic(void*) start; // start of the memory area
mi_bitmap_field_t in_use; // bit per in-use block
mi_bitmap_field_t dirty; // track if non-zero per block
mi_bitmap_field_t commit; // track if committed per block (if `!info.is_committed))
mi_bitmap_field_t reset; // track reset per block
volatile _Atomic(uintptr_t) arena_memid; // if allocated from a (huge page) arena-
mi_bitmap_field_t commit; // track if committed per block
mi_bitmap_field_t reset; // track if reset per block
volatile _Atomic(uintptr_t) arena_memid; // if allocated from a (huge page) arena
uintptr_t padding; // round to 8 fields
} mem_region_t;
// The region map
@ -188,6 +188,7 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large,
if (idx >= MI_REGION_MAX) {
mi_atomic_decrement(&regions_count);
_mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats);
_mi_warning_message("maximum regions used: %zu GiB (perhaps recompile with a larger setting for MI_HEAP_REGION_MAX_SIZE)", _mi_divide_up(MI_HEAP_REGION_MAX_SIZE, GiB));
return false;
}
@ -239,11 +240,13 @@ static bool mi_region_try_claim(int numa_node, size_t blocks, bool allow_large,
{
// try all regions for a free slot
const size_t count = mi_atomic_read(&regions_count);
size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses?
size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? Starting at 0 seems to increase latency though
for (size_t visited = 0; visited < count; visited++, idx++) {
if (idx >= count) idx = 0; // wrap around
mem_region_t* r = &regions[idx];
// if this region suits our demand (numa node matches, large OS page matches)
if (mi_region_is_suitable(r, numa_node, allow_large)) {
// then try to atomically claim a segment(s) in this region
if (mi_bitmap_try_find_claim_field(&r->in_use, 0, blocks, bit_idx)) {
tld->region_idx = idx; // remember the last found position
*region = r;
@ -263,15 +266,15 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo
const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld));
// try to claim in existing regions
if (!mi_region_try_claim(numa_node, blocks, *is_large, &region, &bit_idx, tld)) {
// otherwise try to allocate a fresh region
// otherwise try to allocate a fresh region and claim in there
if (!mi_region_try_alloc_os(blocks, *commit, *is_large, &region, &bit_idx, tld)) {
// out of regions or memory
return NULL;
}
}
// found a region and claimed `blocks` at `bit_idx`
// ------------------------------------------------
// found a region and claimed `blocks` at `bit_idx`, initialize them now
mi_assert_internal(region != NULL);
mi_assert_internal(mi_bitmap_is_claimed(&region->in_use, 1, blocks, bit_idx));
@ -346,25 +349,27 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l
size = _mi_align_up(size, _mi_os_page_size());
// allocate from regions if possible
void* p = NULL;
size_t arena_memid;
const size_t blocks = mi_region_block_count(size);
if (blocks <= MI_REGION_MAX_OBJ_BLOCKS && alignment <= MI_SEGMENT_ALIGN) {
void* p = mi_region_try_alloc(blocks, commit, large, is_zero, memid, tld);
mi_assert_internal(p == NULL || (uintptr_t)p % alignment == 0);
if (p != NULL) {
#if (MI_DEBUG>=2)
if (*commit) { ((uint8_t*)p)[0] = 0; }
#endif
return p;
p = mi_region_try_alloc(blocks, commit, large, is_zero, memid, tld);
if (p == NULL) {
_mi_warning_message("unable to allocate from region: size %zu\n", size);
}
_mi_warning_message("unable to allocate from region: size %zu\n", size);
}
if (p == NULL) {
// and otherwise fall back to the OS
p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_zero, &arena_memid, tld);
*memid = mi_memid_create_from_arena(arena_memid);
}
// and otherwise fall back to the OS
void* p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_zero, &arena_memid, tld);
*memid = mi_memid_create_from_arena(arena_memid);
mi_assert_internal( p == NULL || (uintptr_t)p % alignment == 0);
if (p != NULL && *commit) { ((uint8_t*)p)[0] = 0; }
if (p != NULL) {
mi_assert_internal((uintptr_t)p % alignment == 0);
#if (MI_DEBUG>=2)
if (*commit) { ((uint8_t*)p)[0] = 0; } // ensure the memory is committed
#endif
}
return p;
}
@ -419,6 +424,7 @@ void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_re
bool any_unreset;
mi_bitmap_claim(&region->reset, 1, blocks, bit_idx, &any_unreset);
if (any_unreset) {
_mi_abandoned_await_readers(); // ensure no more pending write (in case reset = decommit)
_mi_mem_reset(p, blocks * MI_SEGMENT_SIZE, tld);
}
}
@ -451,7 +457,8 @@ void _mi_mem_collect(mi_os_tld_t* tld) {
memset(&regions[i], 0, sizeof(mem_region_t));
// and release the whole region
mi_atomic_write(&region->info, 0);
if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) {
if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) {
_mi_abandoned_await_readers(); // ensure no pending reads
_mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats);
}
}

View File

@ -18,17 +18,16 @@ static void mi_segment_map_allocated_at(const mi_segment_t* segment);
static void mi_segment_map_freed_at(const mi_segment_t* segment);
static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_stats_t* stats);
/* -----------------------------------------------------------
/* --------------------------------------------------------------------------------
Segment allocation
In any case the memory for a segment is virtual and only
committed on demand (i.e. we are careful to not touch the memory
until we actually allocate a block there)
In any case the memory for a segment is virtual and usually committed on demand.
(i.e. we are careful to not touch the memory until we actually allocate a block there)
If a thread ends, it "abandons" pages with used blocks
and there is an abandoned segment list whose segments can
be reclaimed by still running threads, much like work-stealing.
----------------------------------------------------------- */
-------------------------------------------------------------------------------- */
/* -----------------------------------------------------------
Slices
@ -119,6 +118,12 @@ static void mi_span_queue_delete(mi_span_queue_t* sq, mi_slice_t* slice) {
Invariant checking
----------------------------------------------------------- */
static bool mi_slice_is_used(const mi_slice_t* slice) {
return (slice->xblock_size > 0);
}
#if (MI_DEBUG>=3)
static bool mi_span_queue_contains(mi_span_queue_t* sq, mi_slice_t* slice) {
for (mi_slice_t* s = sq->first; s != NULL; s = s->next) {
@ -142,7 +147,7 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) {
mi_assert_internal(slice->slice_offset == 0);
size_t index = mi_slice_index(slice);
size_t maxindex = (index + slice->slice_count >= segment->slice_entries ? segment->slice_entries : index + slice->slice_count) - 1;
if (slice->xblock_size > 0) { // a page in use, we need at least MAX_SLICE_OFFSET valid back offsets
if (mi_slice_is_used(slice)) { // a page in use, we need at least MAX_SLICE_OFFSET valid back offsets
used_count++;
for (size_t i = 0; i <= MI_MAX_SLICE_OFFSET && index + i <= maxindex; i++) {
mi_assert_internal(segment->slices[index + i].slice_offset == i*sizeof(mi_slice_t));
@ -183,6 +188,7 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) {
static size_t mi_segment_size(mi_segment_t* segment) {
return segment->segment_slices * MI_SEGMENT_SLICE_SIZE;
}
static size_t mi_segment_info_size(mi_segment_t* segment) {
return segment->segment_info_slices * MI_SEGMENT_SLICE_SIZE;
}
@ -196,6 +202,7 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa
uint8_t* p = (uint8_t*)segment + (idx*MI_SEGMENT_SLICE_SIZE);
/*
if (idx == 0) {
// the first page starts after the segment info (and possible guard page)
p += segment->segment_info_size;
psize -= segment->segment_info_size;
@ -461,9 +468,16 @@ static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_st
mi_assert_internal(segment->decommit_mask == 0);
}
static bool mi_segment_is_abandoned(mi_segment_t* segment) {
return (segment->thread_id == 0);
}
// note: can be called on abandoned segments
static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) {
mi_assert_internal(slice_index < segment->slice_entries);
mi_span_queue_t* sq = (segment->kind == MI_SEGMENT_HUGE ? NULL : mi_span_queue_for(slice_count,tld));
mi_span_queue_t* sq = (segment->kind == MI_SEGMENT_HUGE || mi_segment_is_abandoned(segment)
? NULL : mi_span_queue_for(slice_count,tld));
if (slice_count==0) slice_count = 1;
mi_assert_internal(slice_index + slice_count - 1 < segment->slice_entries);
@ -487,6 +501,7 @@ static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size
else slice->xblock_size = 0; // mark huge page as free anyways
}
/*
// called from reclaim to add existing free spans
static void mi_segment_span_add_free(mi_slice_t* slice, mi_segments_tld_t* tld) {
mi_segment_t* segment = _mi_ptr_segment(slice);
@ -494,6 +509,7 @@ static void mi_segment_span_add_free(mi_slice_t* slice, mi_segments_tld_t* tld)
size_t slice_index = mi_slice_index(slice);
mi_segment_span_free(segment,slice_index,slice->slice_count,tld);
}
*/
static void mi_segment_span_remove_from_queue(mi_slice_t* slice, mi_segments_tld_t* tld) {
mi_assert_internal(slice->slice_count > 0 && slice->slice_offset==0 && slice->xblock_size==0);
@ -502,12 +518,11 @@ static void mi_segment_span_remove_from_queue(mi_slice_t* slice, mi_segments_tld
mi_span_queue_delete(sq, slice);
}
// note: can be called on abandoned segments
static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_tld_t* tld) {
mi_assert_internal(slice != NULL && slice->slice_count > 0 && slice->slice_offset == 0 && slice->xblock_size > 0);
mi_assert_internal(slice != NULL && slice->slice_count > 0 && slice->slice_offset == 0);
mi_segment_t* segment = _mi_ptr_segment(slice);
mi_assert_internal(segment->used > 0);
segment->used--;
bool is_abandoned = mi_segment_is_abandoned(segment);
// for huge pages, just mark as free but don't add to the queues
if (segment->kind == MI_SEGMENT_HUGE) {
@ -524,7 +539,7 @@ static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_
// free next block -- remove it from free and merge
mi_assert_internal(next->slice_count > 0 && next->slice_offset==0);
slice_count += next->slice_count; // extend
mi_segment_span_remove_from_queue(next, tld);
if (!is_abandoned) { mi_segment_span_remove_from_queue(next, tld); }
}
if (slice > segment->slices) {
mi_slice_t* prev = mi_slice_first(slice - 1);
@ -533,14 +548,13 @@ static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_
// free previous slice -- remove it from free and merge
mi_assert_internal(prev->slice_count > 0 && prev->slice_offset==0);
slice_count += prev->slice_count;
mi_segment_span_remove_from_queue(prev, tld);
if (!is_abandoned) { mi_segment_span_remove_from_queue(prev, tld); }
slice = prev;
}
}
// and add the new free page
mi_segment_span_free(segment, mi_slice_index(slice), slice_count, tld);
mi_assert_expensive(mi_segment_is_valid(segment, tld));
return slice;
}
@ -592,6 +606,7 @@ static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_i
// ensure the memory is committed
mi_segment_ensure_committed(segment, _mi_page_start(segment,page,NULL), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats);
page->is_reset = false;
page->is_committed = true;
segment->used++;
return page;
}
@ -626,24 +641,25 @@ static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_segm
----------------------------------------------------------- */
// Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` .
static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld, mi_page_t** huge_page)
static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld, mi_page_t** huge_page)
{
mi_assert_internal((required==0 && huge_page==NULL) || (required>0 && huge_page != NULL));
mi_assert_internal((segment==NULL) || (segment!=NULL && required==0));
// calculate needed sizes first
size_t info_slices;
size_t pre_size;
size_t segment_slices = mi_segment_calculate_slices(required, &pre_size, &info_slices);
size_t slice_entries = (segment_slices > MI_SLICES_PER_SEGMENT ? MI_SLICES_PER_SEGMENT : segment_slices);
size_t segment_size = segment_slices * MI_SEGMENT_SLICE_SIZE;
const size_t segment_slices = mi_segment_calculate_slices(required, &pre_size, &info_slices);
const size_t slice_entries = (segment_slices > MI_SLICES_PER_SEGMENT ? MI_SLICES_PER_SEGMENT : segment_slices);
const size_t segment_size = segment_slices * MI_SEGMENT_SLICE_SIZE;
// Commit eagerly only if not the first N lazy segments (to reduce impact of many threads that allocate just a little)
bool eager_delay = (tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay));
bool eager = !eager_delay && mi_option_is_enabled(mi_option_eager_commit);
const bool eager_delay = (tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay));
const bool eager = !eager_delay && mi_option_is_enabled(mi_option_eager_commit);
bool commit = eager || (required > 0);
// Try to get from our cache first
mi_segment_t* segment = mi_segment_cache_pop(segment_slices, tld);
bool is_zero = false;
bool commit_info_still_good = (segment != NULL);
const bool commit_info_still_good = (segment != NULL);
if (segment==NULL) {
// Allocate the segment from the OS
bool mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy
@ -660,8 +676,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m
}
segment->memid = memid;
segment->mem_is_fixed = mem_large;
segment->mem_is_committed = mi_option_is_enabled(mi_option_eager_commit); // commit;
segment->mem_is_committed = commit;
mi_segments_track_size((long)(segment_size), tld);
mi_segment_map_allocated_at(segment);
}
@ -719,10 +734,17 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m
*huge_page = mi_segment_span_allocate(segment, info_slices, segment_slices - info_slices, tld);
}
mi_assert_expensive(mi_segment_is_valid(segment,tld));
return segment;
}
// Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` .
static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld, mi_page_t** huge_page) {
return mi_segment_init(NULL, required, tld, os_tld, huge_page);
}
static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) {
mi_assert_internal(segment != NULL);
mi_assert_internal(segment->next == NULL);
@ -756,31 +778,6 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t
}
}
/* -----------------------------------------------------------
Page allocation
----------------------------------------------------------- */
static mi_page_t* mi_segments_page_alloc(mi_page_kind_t page_kind, size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
{
mi_assert_internal(required <= MI_LARGE_OBJ_SIZE_MAX && page_kind <= MI_PAGE_LARGE);
// find a free page
size_t page_size = _mi_align_up(required,(required > MI_MEDIUM_PAGE_SIZE ? MI_MEDIUM_PAGE_SIZE : MI_SEGMENT_SLICE_SIZE));
size_t slices_needed = page_size / MI_SEGMENT_SLICE_SIZE;
mi_assert_internal(slices_needed * MI_SEGMENT_SLICE_SIZE == page_size);
mi_page_t* page = mi_segments_page_find_and_allocate(slices_needed,tld); //(required <= MI_SMALL_SIZE_MAX ? 0 : slices_needed), tld);
if (page==NULL) {
// no free page, allocate a new segment and try again
if (mi_segment_alloc(0, tld, os_tld, NULL) == NULL) return NULL; // OOM
return mi_segments_page_alloc(page_kind, required, tld, os_tld);
}
mi_assert_internal(page != NULL && page->slice_count*MI_SEGMENT_SLICE_SIZE == page_size);
mi_assert_internal(_mi_ptr_segment(page)->thread_id == _mi_thread_id());
mi_segment_delayed_decommit(_mi_ptr_segment(page), false, tld->stats);
return page;
}
/* -----------------------------------------------------------
Page Free
@ -788,17 +785,18 @@ static mi_page_t* mi_segments_page_alloc(mi_page_kind_t page_kind, size_t requir
static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld);
// note: can be called on abandoned pages
static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld) {
mi_assert_internal(page->xblock_size > 0);
mi_assert_internal(mi_page_all_free(page));
mi_segment_t* segment = _mi_ptr_segment(page);
mi_assert_internal(segment->used > 0);
size_t inuse = page->capacity * mi_page_block_size(page);
_mi_stat_decrease(&tld->stats->page_committed, inuse);
_mi_stat_decrease(&tld->stats->pages, 1);
// reset the page memory to reduce memory pressure?
if (!segment->mem_is_fixed && !page->is_reset && mi_option_is_enabled(mi_option_page_reset)) {
size_t psize;
uint8_t* start = _mi_page_start(segment, page, &psize);
@ -813,7 +811,11 @@ static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld
page->xblock_size = 1;
// and free it
return mi_segment_span_free_coalesce(mi_page_to_slice(page), tld);
mi_slice_t* slice = mi_segment_span_free_coalesce(mi_page_to_slice(page), tld);
segment->used--;
// cannot assert segment valid as it is called during reclaim
// mi_assert_expensive(mi_segment_is_valid(segment, tld));
return slice;
}
void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld)
@ -825,6 +827,7 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld)
// mark it as free now
mi_segment_page_clear(page, tld);
mi_assert_expensive(mi_segment_is_valid(segment, tld));
if (segment->used == 0) {
// no more used pages; remove from the free list and free the segment
@ -838,44 +841,175 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld)
/* -----------------------------------------------------------
Abandonment
Abandonment
When threads terminate, they can leave segments with
live blocks (reached through other threads). Such segments
are "abandoned" and will be reclaimed by other threads to
reuse their pages and/or free them eventually
We maintain a global list of abandoned segments that are
reclaimed on demand. Since this is shared among threads
the implementation needs to avoid the A-B-A problem on
popping abandoned segments: <https://en.wikipedia.org/wiki/ABA_problem>
We use tagged pointers to avoid accidentially identifying
reused segments, much like stamped references in Java.
Secondly, we maintain a reader counter to avoid resetting
or decommitting segments that have a pending read operation.
Note: the current implementation is one possible design;
another way might be to keep track of abandoned segments
in the regions. This would have the advantage of keeping
all concurrent code in one place and not needing to deal
with ABA issues. The drawback is that it is unclear how to
scan abandoned segments efficiently in that case as they
would be spread among all other segments in the regions.
----------------------------------------------------------- */
// When threads terminate, they can leave segments with
// live blocks (reached through other threads). Such segments
// are "abandoned" and will be reclaimed by other threads to
// reuse their pages and/or free them eventually
static volatile _Atomic(mi_segment_t*) abandoned; // = NULL;
static volatile _Atomic(uintptr_t) abandoned_count; // = 0; approximate count of abandoned segments
// Use the bottom 20-bits (on 64-bit) of the aligned segment pointers
// to put in a tag that increments on update to avoid the A-B-A problem.
#define MI_TAGGED_MASK MI_SEGMENT_MASK
typedef uintptr_t mi_tagged_segment_t;
// prepend a list of abandoned segments atomically to the global abandoned list; O(n)
static void mi_segments_prepend_abandoned(mi_segment_t* first) {
if (first == NULL) return;
static mi_segment_t* mi_tagged_segment_ptr(mi_tagged_segment_t ts) {
return (mi_segment_t*)(ts & ~MI_TAGGED_MASK);
}
// first try if the abandoned list happens to be NULL
if (mi_atomic_cas_ptr_weak(mi_segment_t, &abandoned, first, NULL)) return;
static mi_tagged_segment_t mi_tagged_segment(mi_segment_t* segment, mi_tagged_segment_t ts) {
mi_assert_internal(((uintptr_t)segment & MI_TAGGED_MASK) == 0);
uintptr_t tag = ((ts & MI_TAGGED_MASK) + 1) & MI_TAGGED_MASK;
return ((uintptr_t)segment | tag);
}
// if not, find the end of the list
// This is a list of visited abandoned pages that were full at the time.
// this list migrates to `abandoned` when that becomes NULL. The use of
// this list reduces contention and the rate at which segments are visited.
static mi_decl_cache_align volatile _Atomic(mi_segment_t*) abandoned_visited; // = NULL
// The abandoned page list (tagged as it supports pop)
static mi_decl_cache_align volatile _Atomic(mi_tagged_segment_t) abandoned; // = NULL
// We also maintain a count of current readers of the abandoned list
// in order to prevent resetting/decommitting segment memory if it might
// still be read.
static mi_decl_cache_align volatile _Atomic(uintptr_t) abandoned_readers; // = 0
// Push on the visited list
static void mi_abandoned_visited_push(mi_segment_t* segment) {
mi_assert_internal(segment->thread_id == 0);
mi_assert_internal(segment->abandoned_next == NULL);
mi_assert_internal(segment->next == NULL);
mi_assert_internal(segment->used > 0);
mi_segment_t* anext;
do {
anext = mi_atomic_read_ptr_relaxed(mi_segment_t, &abandoned_visited);
segment->abandoned_next = anext;
} while (!mi_atomic_cas_ptr_weak(mi_segment_t, &abandoned_visited, segment, anext));
}
// Move the visited list to the abandoned list.
static bool mi_abandoned_visited_revisit(void)
{
// quick check if the visited list is empty
if (mi_atomic_read_ptr_relaxed(mi_segment_t,&abandoned_visited)==NULL) return false;
// grab the whole visited list
mi_segment_t* first = mi_atomic_exchange_ptr(mi_segment_t, &abandoned_visited, NULL);
if (first == NULL) return false;
// first try to swap directly if the abandoned list happens to be NULL
const mi_tagged_segment_t ts = mi_atomic_read_relaxed(&abandoned);
mi_tagged_segment_t afirst;
if (mi_tagged_segment_ptr(ts)==NULL) {
afirst = mi_tagged_segment(first, ts);
if (mi_atomic_cas_strong(&abandoned, afirst, ts)) return true;
}
// find the last element of the visited list: O(n)
mi_segment_t* last = first;
while (last->abandoned_next != NULL) {
last = last->abandoned_next;
}
// and atomically prepend
mi_segment_t* next;
// and atomically prepend to the abandoned list
// (no need to increase the readers as we don't access the abandoned segments)
mi_tagged_segment_t anext;
do {
next = mi_atomic_read_ptr_relaxed(mi_segment_t,&abandoned);
last->abandoned_next = next;
} while (!mi_atomic_cas_ptr_weak(mi_segment_t, &abandoned, first, next));
anext = mi_atomic_read_relaxed(&abandoned);
last->abandoned_next = mi_tagged_segment_ptr(anext);
afirst = mi_tagged_segment(first, anext);
} while (!mi_atomic_cas_weak(&abandoned, afirst, anext));
return true;
}
// Push on the abandoned list.
static void mi_abandoned_push(mi_segment_t* segment) {
mi_assert_internal(segment->thread_id == 0);
mi_assert_internal(segment->abandoned_next == NULL);
mi_assert_internal(segment->next == NULL);
mi_assert_internal(segment->used > 0);
mi_tagged_segment_t ts;
mi_tagged_segment_t next;
do {
ts = mi_atomic_read_relaxed(&abandoned);
segment->abandoned_next = mi_tagged_segment_ptr(ts);
next = mi_tagged_segment(segment, ts);
} while (!mi_atomic_cas_weak(&abandoned, next, ts));
}
// Wait until there are no more pending reads on segments that used to be in the abandoned list
void _mi_abandoned_await_readers(void) {
uintptr_t n;
do {
n = mi_atomic_read(&abandoned_readers);
if (n != 0) mi_atomic_yield();
} while (n != 0);
}
// Pop from the abandoned list
static mi_segment_t* mi_abandoned_pop(void) {
mi_segment_t* segment;
// Check efficiently if it is empty (or if the visited list needs to be moved)
mi_tagged_segment_t ts = mi_atomic_read_relaxed(&abandoned);
segment = mi_tagged_segment_ptr(ts);
if (mi_likely(segment == NULL)) {
if (mi_likely(!mi_abandoned_visited_revisit())) { // try to swap in the visited list on NULL
return NULL;
}
}
// Do a pop. We use a reader count to prevent
// a segment to be decommitted while a read is still pending,
// and a tagged pointer to prevent A-B-A link corruption.
// (this is called from `memory.c:_mi_mem_free` for example)
mi_atomic_increment(&abandoned_readers); // ensure no segment gets decommitted
mi_tagged_segment_t next = 0;
do {
ts = mi_atomic_read_relaxed(&abandoned);
segment = mi_tagged_segment_ptr(ts);
if (segment != NULL) {
next = mi_tagged_segment(segment->abandoned_next, ts); // note: reads the segment's `abandoned_next` field so should not be decommitted
}
} while (segment != NULL && !mi_atomic_cas_weak(&abandoned, next, ts));
mi_atomic_decrement(&abandoned_readers); // release reader lock
if (segment != NULL) {
segment->abandoned_next = NULL;
}
return segment;
}
/* -----------------------------------------------------------
Abandon segment/page
----------------------------------------------------------- */
static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
mi_assert_internal(segment->used == segment->abandoned);
mi_assert_internal(segment->used > 0);
mi_assert_internal(segment->abandoned_next == NULL);
mi_assert_internal(segment->abandoned_visits == 0);
mi_assert_expensive(mi_segment_is_valid(segment,tld));
// remove the free pages from our lists
// remove the free pages from the free page queues
mi_slice_t* slice = &segment->slices[0];
const mi_slice_t* end = mi_segment_slices_end(segment);
while (slice < end) {
@ -896,8 +1030,8 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
mi_segments_track_size(-((long)mi_segment_size(segment)), tld);
segment->thread_id = 0;
segment->abandoned_next = NULL;
mi_segments_prepend_abandoned(segment); // prepend one-element list
mi_atomic_increment(&abandoned_count); // keep approximate count
segment->abandoned_visits = 1; // from 0 to 1 to signify it is abandoned
mi_abandoned_push(segment);
}
void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) {
@ -908,111 +1042,242 @@ void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) {
mi_assert_expensive(mi_segment_is_valid(segment,tld));
segment->abandoned++;
_mi_stat_increase(&tld->stats->pages_abandoned, 1);
mi_assert_internal(segment->abandoned <= segment->used);
if (segment->used == segment->abandoned) {
// all pages are abandoned, abandon the entire segment
mi_segment_abandon(segment,tld);
mi_segment_abandon(segment, tld);
}
}
bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld) {
// To avoid the A-B-A problem, grab the entire list atomically
mi_segment_t* segment = mi_atomic_read_ptr_relaxed(mi_segment_t,&abandoned); // pre-read to avoid expensive atomic operations
if (segment == NULL) return false;
segment = mi_atomic_exchange_ptr(mi_segment_t, &abandoned, NULL);
if (segment == NULL) return false;
/* -----------------------------------------------------------
Reclaim abandoned pages
----------------------------------------------------------- */
// we got a non-empty list
if (!try_all) {
// take at most 1/8th of the list and append the rest back to the abandoned list again
// this is O(n) but simplifies the code a lot (as we don't have an A-B-A problem)
// and probably ok since the length will tend to be not too large.
uintptr_t atmost = mi_atomic_read(&abandoned_count)/8; // at most 1/8th of all outstanding (estimated)
if (atmost < 8) atmost = 8; // but at least 8
static mi_slice_t* mi_slices_start_iterate(mi_segment_t* segment, const mi_slice_t** end) {
mi_slice_t* slice = &segment->slices[0];
*end = mi_segment_slices_end(segment);
mi_assert_internal(slice->slice_count>0 && slice->xblock_size>0); // segment allocated page
slice = slice + slice->slice_count; // skip the first segment allocated page
return slice;
}
// find the split point
mi_segment_t* last = segment;
while (last->abandoned_next != NULL && atmost > 0) {
last = last->abandoned_next;
atmost--;
}
// split the list and push back the remaining segments
mi_segment_t* next = last->abandoned_next;
last->abandoned_next = NULL;
mi_segments_prepend_abandoned(next);
}
// reclaim all segments that we kept
while(segment != NULL) {
mi_segment_t* const next = segment->abandoned_next; // save the next segment
// got it.
mi_atomic_decrement(&abandoned_count);
mi_assert_expensive(mi_segment_is_valid(segment, tld));
segment->abandoned_next = NULL;
segment->thread_id = _mi_thread_id();
mi_segments_track_size((long)mi_segment_size(segment),tld);
mi_assert_internal(segment->next == NULL);
_mi_stat_decrease(&tld->stats->segments_abandoned,1);
//mi_assert_internal(segment->decommit_mask == 0);
mi_slice_t* slice = &segment->slices[0];
const mi_slice_t* end = mi_segment_slices_end(segment);
mi_assert_internal(slice->slice_count>0 && slice->xblock_size>0); // segment allocated page
slice = slice + slice->slice_count; // skip the first segment allocated page
while (slice < end) {
mi_assert_internal(slice->slice_count > 0);
mi_assert_internal(slice->slice_offset == 0);
if (slice->xblock_size == 0) { // a free page, add it to our lists
mi_segment_span_add_free(slice,tld);
}
slice = slice + slice->slice_count;
}
slice = &segment->slices[0];
mi_assert_internal(slice->slice_count>0 && slice->xblock_size>0); // segment allocated page
slice = slice + slice->slice_count; // skip the first segment allocated page
while (slice < end) {
mi_assert_internal(slice->slice_count > 0);
mi_assert_internal(slice->slice_offset == 0);
mi_page_t* page = mi_slice_to_page(slice);
if (page->xblock_size > 0) { // a used page
// Possibly free pages and check if free space is available
static bool mi_segment_check_free(mi_segment_t* segment, size_t slices_needed, size_t block_size, mi_segments_tld_t* tld)
{
mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE);
mi_assert_internal(mi_segment_is_abandoned(segment));
bool has_page = false;
// for all slices
const mi_slice_t* end;
mi_slice_t* slice = mi_slices_start_iterate(segment, &end);
while (slice < end) {
mi_assert_internal(slice->slice_count > 0);
mi_assert_internal(slice->slice_offset == 0);
if (mi_slice_is_used(slice)) { // used page
// ensure used count is up to date and collect potential concurrent frees
mi_page_t* const page = mi_slice_to_page(slice);
_mi_page_free_collect(page, false);
if (mi_page_all_free(page)) {
// if this page is all free now, free it without adding to any queues (yet)
mi_assert_internal(page->next == NULL && page->prev==NULL);
_mi_stat_decrease(&tld->stats->pages_abandoned, 1);
segment->abandoned--;
// set the heap again and allow delayed free again
mi_page_set_heap(page, heap);
_mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set)
_mi_page_free_collect(page, false); // ensure used count is up to date
if (mi_page_all_free(page)) {
// if everything free by now, free the page
slice = mi_segment_page_clear(page, tld); // set slice again due to coalesceing
}
else {
// otherwise reclaim it into the heap
_mi_page_reclaim(heap,page);
slice = mi_segment_page_clear(page, tld); // re-assign slice due to coalesce!
mi_assert_internal(!mi_slice_is_used(slice));
if (slice->slice_count >= slices_needed) {
has_page = true;
}
}
mi_assert_internal(slice->slice_count>0 && slice->slice_offset==0);
slice = slice + slice->slice_count;
else {
if (page->xblock_size == block_size && mi_page_has_any_available(page)) {
// a page has available free blocks of the right size
has_page = true;
}
}
}
mi_assert(segment->abandoned == 0);
if (segment->used == 0) { // due to page_clear
mi_segment_free(segment,false,tld);
else {
// empty span
if (slice->slice_count >= slices_needed) {
has_page = true;
}
}
slice = slice + slice->slice_count;
}
return has_page;
}
// go on
segment = next;
// Reclaim an abandoned segment; returns NULL if the segment was freed
// set `right_page_reclaimed` to `true` if it reclaimed a page of the right `block_size` that was not full.
static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, size_t requested_block_size, bool* right_page_reclaimed, mi_segments_tld_t* tld) {
mi_assert_internal(segment->abandoned_next == NULL);
mi_assert_expensive(mi_segment_is_valid(segment, tld));
if (right_page_reclaimed != NULL) { *right_page_reclaimed = false; }
segment->thread_id = _mi_thread_id();
segment->abandoned_visits = 0;
mi_segments_track_size((long)mi_segment_size(segment), tld);
mi_assert_internal(segment->next == NULL);
_mi_stat_decrease(&tld->stats->segments_abandoned, 1);
// for all slices
const mi_slice_t* end;
mi_slice_t* slice = mi_slices_start_iterate(segment, &end);
while (slice < end) {
mi_assert_internal(slice->slice_count > 0);
mi_assert_internal(slice->slice_offset == 0);
if (mi_slice_is_used(slice)) {
// in use: reclaim the page in our heap
mi_page_t* page = mi_slice_to_page(slice);
mi_assert_internal(!page->is_reset);
mi_assert_internal(page->is_committed);
mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE);
mi_assert_internal(mi_page_heap(page) == NULL);
mi_assert_internal(page->next == NULL && page->prev==NULL);
_mi_stat_decrease(&tld->stats->pages_abandoned, 1);
segment->abandoned--;
// set the heap again and allow delayed free again
mi_page_set_heap(page, heap);
_mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set)
_mi_page_free_collect(page, false); // ensure used count is up to date
if (mi_page_all_free(page)) {
// if everything free by now, free the page
slice = mi_segment_page_clear(page, tld); // set slice again due to coalesceing
}
else {
// otherwise reclaim it into the heap
_mi_page_reclaim(heap, page);
if (requested_block_size == page->xblock_size && mi_page_has_any_available(page)) {
if (right_page_reclaimed != NULL) { *right_page_reclaimed = true; }
}
}
}
else {
// the span is free, add it to our page queues
slice = mi_segment_span_free_coalesce(slice, tld); // set slice again due to coalesceing
}
mi_assert_internal(slice->slice_count>0 && slice->slice_offset==0);
slice = slice + slice->slice_count;
}
return true;
mi_assert(segment->abandoned == 0);
if (segment->used == 0) { // due to page_clear
mi_assert_internal(right_page_reclaimed == NULL || !(*right_page_reclaimed));
mi_segment_free(segment, false, tld);
return NULL;
}
else {
return segment;
}
}
void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld) {
mi_segment_t* segment;
while ((segment = mi_abandoned_pop()) != NULL) {
mi_segment_reclaim(segment, heap, 0, NULL, tld);
}
}
static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slices, size_t block_size, bool* reclaimed, mi_segments_tld_t* tld)
{
*reclaimed = false;
mi_segment_t* segment;
int max_tries = 8; // limit the work to bound allocation times
while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) {
segment->abandoned_visits++;
bool has_page = mi_segment_check_free(segment,needed_slices,block_size,tld); // try to free up pages (due to concurrent frees)
if (segment->used == 0) {
// free the segment (by forced reclaim) to make it available to other threads.
// note1: we prefer to free a segment as that might lead to reclaiming another
// segment that is still partially used.
// note2: we could in principle optimize this by skipping reclaim and directly
// freeing but that would violate some invariants temporarily)
mi_segment_reclaim(segment, heap, 0, NULL, tld);
}
else if (has_page) {
// found a large enough free span, or a page of the right block_size with free space
// we return the result of reclaim (which is usually `segment`) as it might free
// the segment due to concurrent frees (in which case `NULL` is returned).
return mi_segment_reclaim(segment, heap, block_size, reclaimed, tld);
}
else if (segment->abandoned_visits > 3) {
// always reclaim on 3rd visit to limit the abandoned queue length.
mi_segment_reclaim(segment, heap, 0, NULL, tld);
}
else {
// otherwise, push on the visited list so it gets not looked at too quickly again
mi_abandoned_visited_push(segment);
}
}
return NULL;
}
/* -----------------------------------------------------------
Reclaim or allocate
----------------------------------------------------------- */
static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t needed_slices, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
{
mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE);
mi_assert_internal(block_size <= MI_LARGE_OBJ_SIZE_MAX);
// 1. try to get a segment from our cache
mi_segment_t* segment = mi_segment_cache_pop(MI_SEGMENT_SIZE, tld);
if (segment != NULL) {
mi_segment_init(segment, 0, tld, os_tld, NULL);
return segment;
}
// 2. try to reclaim an abandoned segment
bool reclaimed;
segment = mi_segment_try_reclaim(heap, needed_slices, block_size, &reclaimed, tld);
if (reclaimed) {
// reclaimed the right page right into the heap
mi_assert_internal(segment != NULL);
return NULL; // pretend out-of-memory as the page will be in the page queue of the heap with available blocks
}
else if (segment != NULL) {
// reclaimed a segment with a large enough empty span in it
return segment;
}
// 3. otherwise allocate a fresh segment
return mi_segment_alloc(0, tld, os_tld, NULL);
}
/* -----------------------------------------------------------
Page allocation
----------------------------------------------------------- */
static mi_page_t* mi_segments_page_alloc(mi_heap_t* heap, mi_page_kind_t page_kind, size_t required, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
{
mi_assert_internal(required <= MI_LARGE_OBJ_SIZE_MAX && page_kind <= MI_PAGE_LARGE);
// find a free page
size_t page_size = _mi_align_up(required, (required > MI_MEDIUM_PAGE_SIZE ? MI_MEDIUM_PAGE_SIZE : MI_SEGMENT_SLICE_SIZE));
size_t slices_needed = page_size / MI_SEGMENT_SLICE_SIZE;
mi_assert_internal(slices_needed * MI_SEGMENT_SLICE_SIZE == page_size);
mi_page_t* page = mi_segments_page_find_and_allocate(slices_needed, tld); //(required <= MI_SMALL_SIZE_MAX ? 0 : slices_needed), tld);
if (page==NULL) {
// no free page, allocate a new segment and try again
if (mi_segment_reclaim_or_alloc(heap, slices_needed, block_size, tld, os_tld) == NULL) {
// OOM or reclaimed a good page in the heap
return NULL;
}
else {
// otherwise try again
return mi_segments_page_alloc(heap, page_kind, required, block_size, tld, os_tld);
}
}
mi_assert_internal(page != NULL && page->slice_count*MI_SEGMENT_SLICE_SIZE == page_size);
mi_assert_internal(_mi_ptr_segment(page)->thread_id == _mi_thread_id());
mi_segment_delayed_decommit(_mi_ptr_segment(page), false, tld->stats);
return page;
}
/* -----------------------------------------------------------
Huge page allocation
----------------------------------------------------------- */
@ -1031,16 +1296,16 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld
/* -----------------------------------------------------------
Page allocation and free
----------------------------------------------------------- */
mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
mi_page_t* page;
if (block_size <= MI_SMALL_OBJ_SIZE_MAX) {
page = mi_segments_page_alloc(MI_PAGE_SMALL,block_size,tld,os_tld);
page = mi_segments_page_alloc(heap,MI_PAGE_SMALL,block_size,block_size,tld,os_tld);
}
else if (block_size <= MI_MEDIUM_OBJ_SIZE_MAX) {
page = mi_segments_page_alloc(MI_PAGE_MEDIUM,MI_MEDIUM_PAGE_SIZE,tld, os_tld);
page = mi_segments_page_alloc(heap,MI_PAGE_MEDIUM,MI_MEDIUM_PAGE_SIZE,block_size,tld, os_tld);
}
else if (block_size <= MI_LARGE_OBJ_SIZE_MAX) {
page = mi_segments_page_alloc(MI_PAGE_LARGE,block_size,tld, os_tld);
page = mi_segments_page_alloc(heap,MI_PAGE_LARGE,block_size,block_size,tld, os_tld);
}
else {
page = mi_segment_huge_page_alloc(block_size,tld,os_tld);

View File

@ -18,6 +18,10 @@ terms of the MIT license. A copy of the license can be found in the file
#include "os.c"
//#include "memory.c"
#include "arena.c"
<<<<<<< HEAD
=======
#include "region.c"
>>>>>>> dev-exp
#include "segment.c"
#include "page.c"
#include "heap.c"

View File

@ -32,16 +32,18 @@ static int ITER = 50; // N full iterations destructing and re-creating a
// static int THREADS = 8; // more repeatable if THREADS <= #processors
// static int SCALE = 100; // scaling factor
#define STRESS // undefine for leak test
static bool allow_large_objects = true; // allow very large objects?
static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`?
#ifdef USE_STD_MALLOC
#define custom_malloc(s) malloc(s)
#define custom_calloc(n,s) calloc(n,s)
#define custom_realloc(p,s) realloc(p,s)
#define custom_free(p) free(p)
#else
#define custom_malloc(s) mi_malloc(s)
#define custom_calloc(n,s) mi_calloc(n,s)
#define custom_realloc(p,s) mi_realloc(p,s)
#define custom_free(p) mi_free(p)
#endif
@ -94,9 +96,12 @@ static void* alloc_items(size_t items, random_t r) {
}
if (items == 40) items++; // pthreads uses that size for stack increases
if (use_one_size > 0) items = (use_one_size / sizeof(uintptr_t));
uintptr_t* p = (uintptr_t*)custom_malloc(items * sizeof(uintptr_t));
if (items==0) items = 1;
uintptr_t* p = (uintptr_t*)custom_calloc(items,sizeof(uintptr_t));
if (p != NULL) {
for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie;
for (uintptr_t i = 0; i < items; i++) {
p[i] = (items - i) ^ cookie;
}
}
return p;
}
@ -118,7 +123,7 @@ static void free_items(void* p) {
static void stress(intptr_t tid) {
//bench_start_thread();
uintptr_t r = tid * 43;
uintptr_t r = (tid * 43); // rand();
const size_t max_item_shift = 5; // 128
const size_t max_item_retained_shift = max_item_shift + 2;
size_t allocs = 100 * ((size_t)SCALE) * (tid % 8 + 1); // some threads do more
@ -126,7 +131,7 @@ static void stress(intptr_t tid) {
void** data = NULL;
size_t data_size = 0;
size_t data_top = 0;
void** retained = (void**)custom_malloc(retain * sizeof(void*));
void** retained = (void**)custom_calloc(retain,sizeof(void*));
size_t retain_top = 0;
while (allocs > 0 || retain > 0) {
@ -171,7 +176,46 @@ static void stress(intptr_t tid) {
//bench_end_thread();
}
static void run_os_threads(size_t nthreads);
static void run_os_threads(size_t nthreads, void (*entry)(intptr_t tid));
static void test_stress(void) {
uintptr_t r = rand();
for (int n = 0; n < ITER; n++) {
run_os_threads(THREADS, &stress);
for (int i = 0; i < TRANSFERS; i++) {
if (chance(50, &r) || n + 1 == ITER) { // free all on last run, otherwise free half of the transfers
void* p = atomic_exchange_ptr(&transfer[i], NULL);
free_items(p);
}
}
mi_collect(false);
#ifndef NDEBUG
if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
#endif
}
}
#ifndef STRESS
static void leak(intptr_t tid) {
uintptr_t r = rand();
void* p = alloc_items(1 /*pick(&r)%128*/, &r);
if (chance(50, &r)) {
intptr_t i = (pick(&r) % TRANSFERS);
void* q = atomic_exchange_ptr(&transfer[i], p);
free_items(q);
}
}
static void test_leak(void) {
for (int n = 0; n < ITER; n++) {
run_os_threads(THREADS, &leak);
mi_collect(false);
#ifndef NDEBUG
if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
#endif
}
}
#endif
int main(int argc, char** argv) {
// > mimalloc-test-stress [THREADS] [SCALE] [ITER]
@ -197,21 +241,13 @@ int main(int argc, char** argv) {
//bench_start_program();
// Run ITER full iterations where half the objects in the transfer buffer survive to the next round.
srand(0x7feb352d);
mi_stats_reset();
uintptr_t r = 43 * 43;
for (int n = 0; n < ITER; n++) {
run_os_threads(THREADS);
for (int i = 0; i < TRANSFERS; i++) {
if (chance(50, &r) || n + 1 == ITER) { // free all on last run, otherwise free half of the transfers
void* p = atomic_exchange_ptr(&transfer[i], NULL);
free_items(p);
}
}
mi_collect(false);
#ifndef NDEBUG
if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
#endif
}
#ifdef STRESS
test_stress();
#else
test_leak();
#endif
mi_collect(true);
mi_stats_print(NULL);
@ -220,18 +256,21 @@ int main(int argc, char** argv) {
}
static void (*thread_entry_fun)(intptr_t) = &stress;
#ifdef _WIN32
#include <windows.h>
static DWORD WINAPI thread_entry(LPVOID param) {
stress((intptr_t)param);
static DWORD WINAPI thread_entry(LPVOID param) {
thread_entry_fun((intptr_t)param);
return 0;
}
static void run_os_threads(size_t nthreads) {
DWORD* tids = (DWORD*)custom_malloc(nthreads * sizeof(DWORD));
HANDLE* thandles = (HANDLE*)custom_malloc(nthreads * sizeof(HANDLE));
static void run_os_threads(size_t nthreads, void (*fun)(intptr_t)) {
thread_entry_fun = fun;
DWORD* tids = (DWORD*)custom_calloc(nthreads,sizeof(DWORD));
HANDLE* thandles = (HANDLE*)custom_calloc(nthreads,sizeof(HANDLE));
for (uintptr_t i = 0; i < nthreads; i++) {
thandles[i] = CreateThread(0, 4096, &thread_entry, (void*)(i), 0, &tids[i]);
}
@ -246,7 +285,7 @@ static void run_os_threads(size_t nthreads) {
}
static void* atomic_exchange_ptr(volatile void** p, void* newval) {
#if (INTPTR_MAX == UINT32_MAX)
#if (INTPTR_MAX == INT32_MAX)
return (void*)InterlockedExchange((volatile LONG*)p, (LONG)newval);
#else
return (void*)InterlockedExchange64((volatile LONG64*)p, (LONG64)newval);
@ -257,12 +296,13 @@ static void* atomic_exchange_ptr(volatile void** p, void* newval) {
#include <pthread.h>
static void* thread_entry(void* param) {
stress((uintptr_t)param);
thread_entry_fun((uintptr_t)param);
return NULL;
}
static void run_os_threads(size_t nthreads) {
pthread_t* threads = (pthread_t*)custom_malloc(nthreads * sizeof(pthread_t));
static void run_os_threads(size_t nthreads, void (*fun)(intptr_t)) {
thread_entry_fun = fun;
pthread_t* threads = (pthread_t*)custom_calloc(nthreads,sizeof(pthread_t));
memset(threads, 0, sizeof(pthread_t) * nthreads);
//pthread_setconcurrency(nthreads);
for (uintptr_t i = 0; i < nthreads; i++) {
@ -277,12 +317,12 @@ static void run_os_threads(size_t nthreads) {
#ifdef __cplusplus
#include <atomic>
static void* atomic_exchange_ptr(volatile void** p, void* newval) {
return std::atomic_exchange_explicit((volatile std::atomic<void*>*)p, newval, std::memory_order_acquire);
return std::atomic_exchange((volatile std::atomic<void*>*)p, newval);
}
#else
#include <stdatomic.h>
static void* atomic_exchange_ptr(volatile void** p, void* newval) {
return atomic_exchange_explicit((volatile _Atomic(void*)*)p, newval, memory_order_acquire);
return atomic_exchange((volatile _Atomic(void*)*)p, newval);
}
#endif