From 7bef5f5f5b2cde374e1ac8b38f708de88c2dee52 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 31 Oct 2022 10:34:55 -0700 Subject: [PATCH] reduce contention on the delayed_free lock; see issue #630 --- include/mimalloc-internal.h | 5 +++-- src/alloc.c | 5 ++++- src/heap.c | 8 ++++---- src/page.c | 26 +++++++++++++++++++++----- 4 files changed, 32 insertions(+), 12 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 7f16544b..eecfea0d 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -113,10 +113,11 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept; / void _mi_page_unfull(mi_page_t* page); void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force); // free the page void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq); // abandon the page, to be picked up by another thread... -void _mi_heap_delayed_free(mi_heap_t* heap); +void _mi_heap_delayed_free_all(mi_heap_t* heap); +bool _mi_heap_delayed_free_partial(mi_heap_t* heap); void _mi_heap_collect_retired(mi_heap_t* heap, bool force); -void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never); +bool _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never); size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append); void _mi_deferred_free(mi_heap_t* heap, bool force); diff --git a/src/alloc.c b/src/alloc.c index fc613187..44c0bf8b 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -518,6 +518,7 @@ void mi_free(void* p) mi_attr_noexcept } } +// return true if successful bool _mi_free_delayed_block(mi_block_t* block) { // get segment and page const mi_segment_t* const segment = _mi_ptr_segment(block); @@ -530,7 +531,9 @@ bool _mi_free_delayed_block(mi_block_t* block) { // some blocks may end up in the page `thread_free` list with no blocks in the // heap `thread_delayed_free` list which may cause the page to be never freed! // (it would only be freed if we happen to scan it in `mi_page_queue_find_free_ex`) - _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, false /* dont overwrite never delayed */); + if (!_mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, false /* dont overwrite never delayed */)) { + return false; + } // collect all other non-local frees to ensure up-to-date `used` count _mi_page_free_collect(page, false); diff --git a/src/heap.c b/src/heap.c index 45cb14c1..c36a9616 100644 --- a/src/heap.c +++ b/src/heap.c @@ -136,9 +136,9 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) mi_heap_visit_pages(heap, &mi_heap_page_never_delayed_free, NULL, NULL); } - // free thread delayed blocks. + // free all current thread delayed blocks. // (if abandoning, after this there are no more thread-delayed references into the pages.) - _mi_heap_delayed_free(heap); + _mi_heap_delayed_free_all(heap); // collect retired pages _mi_heap_collect_retired(heap, collect >= MI_FORCE); @@ -339,7 +339,7 @@ static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) { if (from==NULL || from->page_count == 0) return; // reduce the size of the delayed frees - _mi_heap_delayed_free(from); + _mi_heap_delayed_free_partial(from); // transfer all pages by appending the queues; this will set a new heap field // so threads may do delayed frees in either heap for a while. @@ -358,7 +358,7 @@ static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) { // note: be careful here as the `heap` field in all those pages no longer point to `from`, // turns out to be ok as `_mi_heap_delayed_free` only visits the list and calls a // the regular `_mi_free_delayed_block` which is safe. - _mi_heap_delayed_free(from); + _mi_heap_delayed_free_all(from); #if !defined(_MSC_VER) || (_MSC_VER > 1900) // somehow the following line gives an error in VS2015, issue #353 mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_block_t,&from->thread_delayed_free) == NULL); #endif diff --git a/src/page.c b/src/page.c index bb74c600..f09753d6 100644 --- a/src/page.c +++ b/src/page.c @@ -122,15 +122,18 @@ bool _mi_page_is_valid(mi_page_t* page) { } #endif -void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never) { +bool _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never) { mi_thread_free_t tfreex; mi_delayed_t old_delay; mi_thread_free_t tfree; + size_t yield_count = 0; do { tfree = mi_atomic_load_acquire(&page->xthread_free); // note: must acquire as we can break/repeat this loop and not do a CAS; tfreex = mi_tf_set_delayed(tfree, delay); old_delay = mi_tf_delayed(tfree); if mi_unlikely(old_delay == MI_DELAYED_FREEING) { + if (yield_count >= 4) return false; // give up after 4 tries + yield_count++; mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done. // tfree = mi_tf_set_delayed(tfree, MI_NO_DELAYED_FREE); // will cause CAS to busy fail } @@ -142,6 +145,8 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool overrid } } while ((old_delay == MI_DELAYED_FREEING) || !mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex)); + + return true; // success } /* ----------------------------------------------------------- @@ -272,10 +277,18 @@ static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) { Do any delayed frees (put there by other threads if they deallocated in a full page) ----------------------------------------------------------- */ -void _mi_heap_delayed_free(mi_heap_t* heap) { +void _mi_heap_delayed_free_all(mi_heap_t* heap) { + while (!_mi_heap_delayed_free_partial(heap)) { + mi_atomic_yield(); + } +} + +// returns true if all delayed frees were processed +bool _mi_heap_delayed_free_partial(mi_heap_t* heap) { // take over the list (note: no atomic exchange since it is often NULL) mi_block_t* block = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free); while (block != NULL && !mi_atomic_cas_ptr_weak_acq_rel(mi_block_t, &heap->thread_delayed_free, &block, NULL)) { /* nothing */ }; + bool all_freed = true; // and free them all while(block != NULL) { @@ -283,7 +296,9 @@ void _mi_heap_delayed_free(mi_heap_t* heap) { // use internal free instead of regular one to keep stats etc correct if (!_mi_free_delayed_block(block)) { // we might already start delayed freeing while another thread has not yet - // reset the delayed_freeing flag; in that case delay it further by reinserting. + // reset the delayed_freeing flag; in that case delay it further by reinserting the current block + // into the delayed free list + all_freed = false; mi_block_t* dfree = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free); do { mi_block_set_nextx(heap, block, dfree, heap->keys); @@ -291,6 +306,7 @@ void _mi_heap_delayed_free(mi_heap_t* heap) { } block = next; } + return all_freed; } /* ----------------------------------------------------------- @@ -832,8 +848,8 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexce // call potential deferred free routines _mi_deferred_free(heap, false); - // free delayed frees from other threads - _mi_heap_delayed_free(heap); + // free delayed frees from other threads (but skip contended ones) + _mi_heap_delayed_free_partial(heap); // find (or allocate) a page of the right size mi_page_t* page = mi_find_page(heap, size);