enable more reset delay slots

This commit is contained in:
daan 2019-11-11 17:06:16 -08:00
parent 5e6754f3f7
commit a0958b2da6
5 changed files with 66 additions and 32 deletions

View File

@ -390,13 +390,20 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount);
// ------------------------------------------------------ // ------------------------------------------------------
typedef int64_t mi_msecs_t; typedef int64_t mi_msecs_t;
#define MI_RESET_DELAY_SLOTS (256)
typedef struct mi_delay_slot_s { typedef struct mi_delay_slot_s {
mi_msecs_t expire; mi_msecs_t expire;
uint8_t* addr; uint8_t* addr;
size_t size; size_t size;
} mi_delay_slot_t; } mi_delay_slot_t;
#define MI_RESET_DELAY_SLOTS (128) typedef struct mi_delay_slots_s {
size_t capacity; // always `MI_RESET_DELAY_SLOTS`
size_t count; // current slots used (`<= capacity`)
mi_delay_slot_t slots[MI_RESET_DELAY_SLOTS];
} mi_delay_slots_t;
// ------------------------------------------------------ // ------------------------------------------------------
// Thread Local data // Thread Local data
@ -411,8 +418,8 @@ typedef struct mi_segment_queue_s {
// OS thread local data // OS thread local data
typedef struct mi_os_tld_s { typedef struct mi_os_tld_s {
size_t region_idx; // start point for next allocation size_t region_idx; // start point for next allocation
mi_stats_t* stats; // points to tld stats mi_delay_slots_t* reset_delay; // delay slots for OS reset operations
mi_delay_slot_t reset_delay[MI_RESET_DELAY_SLOTS]; mi_stats_t* stats; // points to tld stats
} mi_os_tld_t; } mi_os_tld_t;
// Segments thread local data // Segments thread local data

View File

@ -100,8 +100,8 @@ static mi_tld_t tld_main = {
0, false, 0, false,
&_mi_heap_main, &_mi_heap_main,
{ { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats, tld_main_os }, // segments { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats, tld_main_os }, // segments
{ 0, tld_main_stats, {{0,NULL,0}} }, // os { 0, NULL, tld_main_stats }, // os
{ MI_STATS_NULL } // stats { MI_STATS_NULL } // stats
}; };
mi_heap_t _mi_heap_main = { mi_heap_t _mi_heap_main = {
@ -192,6 +192,7 @@ uintptr_t _mi_random_init(uintptr_t seed /* can be zero */) {
typedef struct mi_thread_data_s { typedef struct mi_thread_data_s {
mi_heap_t heap; // must come first due to cast in `_mi_heap_done` mi_heap_t heap; // must come first due to cast in `_mi_heap_done`
mi_tld_t tld; mi_tld_t tld;
mi_delay_slots_t reset_delay;
} mi_thread_data_t; } mi_thread_data_t;
// Initialize the thread local default heap, called from `mi_thread_init` // Initialize the thread local default heap, called from `mi_thread_init`
@ -211,6 +212,7 @@ static bool _mi_heap_init(void) {
} }
mi_tld_t* tld = &td->tld; mi_tld_t* tld = &td->tld;
mi_heap_t* heap = &td->heap; mi_heap_t* heap = &td->heap;
mi_delay_slots_t* reset_delay = &td->reset_delay;
memcpy(heap, &_mi_heap_empty, sizeof(*heap)); memcpy(heap, &_mi_heap_empty, sizeof(*heap));
heap->thread_id = _mi_thread_id(); heap->thread_id = _mi_thread_id();
heap->random = _mi_random_init(heap->thread_id); heap->random = _mi_random_init(heap->thread_id);
@ -221,6 +223,9 @@ static bool _mi_heap_init(void) {
tld->segments.stats = &tld->stats; tld->segments.stats = &tld->stats;
tld->segments.os = &tld->os; tld->segments.os = &tld->os;
tld->os.stats = &tld->stats; tld->os.stats = &tld->stats;
tld->os.reset_delay = reset_delay;
memset(reset_delay, 0, sizeof(*reset_delay));
reset_delay->capacity = MI_RESET_DELAY_SLOTS;
_mi_heap_default = heap; _mi_heap_default = heap;
} }
return false; return false;

View File

@ -54,7 +54,7 @@ void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, s
void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
// local // local
static bool mi_delay_remove(mi_delay_slot_t* slots, size_t count, void* p, size_t size); static bool mi_delay_remove(mi_delay_slots_t* delay_slots, void* p, size_t size);
// Constants // Constants
@ -208,7 +208,7 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large,
Try to claim blocks in suitable regions Try to claim blocks in suitable regions
-----------------------------------------------------------------------------*/ -----------------------------------------------------------------------------*/
static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool commit, bool allow_large ) { static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool allow_large ) {
// initialized at all? // initialized at all?
mi_region_info_t info = mi_atomic_read_relaxed(&region->info); mi_region_info_t info = mi_atomic_read_relaxed(&region->info);
if (info==0) return false; if (info==0) return false;
@ -229,7 +229,7 @@ static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, boo
} }
static bool mi_region_try_claim(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) static bool mi_region_try_claim(size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld)
{ {
// try all regions for a free slot // try all regions for a free slot
const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld)); const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld));
@ -238,7 +238,7 @@ static bool mi_region_try_claim(size_t blocks, bool commit, bool allow_large, me
for (size_t visited = 0; visited < count; visited++, idx++) { for (size_t visited = 0; visited < count; visited++, idx++) {
if (idx >= count) idx = 0; // wrap around if (idx >= count) idx = 0; // wrap around
mem_region_t* r = &regions[idx]; mem_region_t* r = &regions[idx];
if (mi_region_is_suitable(r, numa_node, commit, allow_large)) { if (mi_region_is_suitable(r, numa_node, allow_large)) {
if (mi_bitmap_try_claim_field(&r->in_use, 0, blocks, bit_idx)) { if (mi_bitmap_try_claim_field(&r->in_use, 0, blocks, bit_idx)) {
tld->region_idx = idx; // remember the last found position tld->region_idx = idx; // remember the last found position
*region = r; *region = r;
@ -256,7 +256,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo
mem_region_t* region; mem_region_t* region;
mi_bitmap_index_t bit_idx; mi_bitmap_index_t bit_idx;
// first try to claim in existing regions // first try to claim in existing regions
if (!mi_region_try_claim(blocks, *commit, *is_large, &region, &bit_idx, tld)) { if (!mi_region_try_claim(blocks, *is_large, &region, &bit_idx, tld)) {
// otherwise try to allocate a fresh region // otherwise try to allocate a fresh region
if (!mi_region_try_alloc_os(blocks, *commit, *is_large, &region, &bit_idx, tld)) { if (!mi_region_try_alloc_os(blocks, *commit, *is_large, &region, &bit_idx, tld)) {
// out of regions or memory // out of regions or memory
@ -354,7 +354,7 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld) {
if (p==NULL) return; if (p==NULL) return;
if (size==0) return; if (size==0) return;
mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size); mi_delay_remove(tld->reset_delay, p, size);
size_t arena_memid = 0; size_t arena_memid = 0;
mi_bitmap_index_t bit_idx; mi_bitmap_index_t bit_idx;
@ -424,7 +424,7 @@ void _mi_mem_collect(mi_os_tld_t* tld) {
bool is_eager_committed; bool is_eager_committed;
void* start = mi_region_info_read(mi_atomic_read(&regions[i].info), NULL, &is_eager_committed); void* start = mi_region_info_read(mi_atomic_read(&regions[i].info), NULL, &is_eager_committed);
if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) {
mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, start, MI_REGION_SIZE); mi_delay_remove(tld->reset_delay, start, MI_REGION_SIZE);
_mi_arena_free(start, MI_REGION_SIZE, region->arena_memid, tld->stats); _mi_arena_free(start, MI_REGION_SIZE, region->arena_memid, tld->stats);
} }
// and release // and release
@ -440,21 +440,22 @@ void _mi_mem_collect(mi_os_tld_t* tld) {
typedef void (mi_delay_resolve_fun)(void* addr, size_t size, void* arg); typedef void (mi_delay_resolve_fun)(void* addr, size_t size, void* arg);
static void mi_delay_insert(mi_delay_slot_t* slots, size_t count, static void mi_delay_insert(mi_delay_slots_t* ds,
mi_msecs_t delay, uint8_t* addr, size_t size, mi_msecs_t delay, uint8_t* addr, size_t size,
mi_delay_resolve_fun* resolve, void* arg) mi_delay_resolve_fun* resolve, void* arg)
{ {
if (delay==0) { if (ds == NULL || delay==0 || addr==NULL || size==0) {
resolve(addr, size, arg); resolve(addr, size, arg);
return; return;
} }
mi_msecs_t now = _mi_clock_now(); mi_msecs_t now = _mi_clock_now();
mi_delay_slot_t* oldest = slots; mi_delay_slot_t* oldest = &ds->slots[0];
// walk through all slots, resolving expired ones. // walk through all slots, resolving expired ones.
// remember the oldest slot to insert the new entry in. // remember the oldest slot to insert the new entry in.
for (size_t i = 0; i < count; i++) { size_t newcount = 0;
mi_delay_slot_t* slot = &slots[i]; for (size_t i = 0; i < ds->count; i++) {
mi_delay_slot_t* slot = &ds->slots[i];
if (slot->expire == 0) { if (slot->expire == 0) {
// empty slot // empty slot
@ -480,26 +481,40 @@ static void mi_delay_insert(mi_delay_slot_t* slots, size_t count,
} }
else if (oldest->expire > slot->expire) { else if (oldest->expire > slot->expire) {
oldest = slot; oldest = slot;
newcount = i+1;
}
else {
newcount = i+1;
} }
} }
ds->count = newcount;
if (delay>0) { if (delay>0) {
// not yet registered, use the oldest slot // not yet registered, use the oldest slot (or a new one if there is space)
if (oldest->expire > 0) { if (ds->count < ds->capacity) {
oldest = &ds->slots[ds->count];
ds->count++;
}
else if (oldest->expire > 0) {
resolve(oldest->addr, oldest->size, arg); // evict if not empty resolve(oldest->addr, oldest->size, arg); // evict if not empty
} }
mi_assert_internal((oldest - ds->slots) < (ptrdiff_t)ds->count);
oldest->expire = now + delay; oldest->expire = now + delay;
oldest->addr = addr; oldest->addr = addr;
oldest->size = size; oldest->size = size;
} }
} }
static bool mi_delay_remove(mi_delay_slot_t* slots, size_t count, void* p, size_t size) static bool mi_delay_remove(mi_delay_slots_t* ds, void* p, size_t size)
{ {
if (ds == NULL || p==NULL || size==0) return false;
uint8_t* addr = (uint8_t*)p; uint8_t* addr = (uint8_t*)p;
bool done = false; bool done = false;
// walk through all slots size_t newcount = 0;
for (size_t i = 0; i < count; i++) {
mi_delay_slot_t* slot = &slots[i]; // walk through all valid slots
for (size_t i = 0; i < ds->count; i++) {
mi_delay_slot_t* slot = &ds->slots[i];
if (slot->addr <= addr && slot->addr + slot->size >= addr + size) { if (slot->addr <= addr && slot->addr + slot->size >= addr + size) {
// earlier slot encompasses the area; remove it // earlier slot encompasses the area; remove it
slot->expire = 0; slot->expire = 0;
@ -510,12 +525,17 @@ static bool mi_delay_remove(mi_delay_slot_t* slots, size_t count, void* p, size_
slot->expire = 0; slot->expire = 0;
} }
else if ((addr <= slot->addr && addr + size > slot->addr) || else if ((addr <= slot->addr && addr + size > slot->addr) ||
(addr < slot->addr + slot->size && addr + size >= slot->addr + slot->size)) { (addr < slot->addr + slot->size && addr + size >= slot->addr + slot->size)) {
// partial overlap, remove slot // partial overlap
mi_assert_internal(false); // can happen with a large object spanning onto some partial end block
// mi_assert_internal(false);
slot->expire = 0; slot->expire = 0;
} }
else {
newcount = i + 1;
}
} }
ds->count = newcount;
return done; return done;
} }
@ -525,13 +545,13 @@ static void mi_resolve_reset(void* p, size_t size, void* vtld) {
} }
bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) { bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) {
mi_delay_insert(tld->reset_delay, MI_RESET_DELAY_SLOTS, mi_option_get(mi_option_reset_delay), mi_delay_insert(tld->reset_delay, mi_option_get(mi_option_reset_delay),
(uint8_t*)p, size, &mi_resolve_reset, tld); (uint8_t*)p, size, &mi_resolve_reset, tld);
return true; return true;
} }
bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
if (!mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, (uint8_t*)p, size)) { if (!mi_delay_remove(tld->reset_delay, (uint8_t*)p, size)) {
return _mi_os_unreset(p, size, is_zero, tld->stats); return _mi_os_unreset(p, size, is_zero, tld->stats);
} }
return true; return true;
@ -544,12 +564,12 @@ bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
-----------------------------------------------------------------------------*/ -----------------------------------------------------------------------------*/
bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size); mi_delay_remove(tld->reset_delay,p, size);
return _mi_os_commit(p, size, is_zero, tld->stats); return _mi_os_commit(p, size, is_zero, tld->stats);
} }
bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld) { bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld) {
mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size); mi_delay_remove(tld->reset_delay, p, size);
return _mi_os_decommit(p, size, tld->stats); return _mi_os_decommit(p, size, tld->stats);
} }

View File

@ -65,7 +65,7 @@ static mi_option_desc_t options[_mi_option_last] =
{ 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
{ 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) },
{ 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread
{ 0, UNINIT, MI_OPTION(page_reset) }, // reset pages on free { 1, UNINIT, MI_OPTION(page_reset) }, // reset pages on free
{ 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit)
{ 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed
{ 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds { 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds

View File

@ -504,7 +504,9 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg
_mi_stat_decrease(&tld->stats->pages, 1); _mi_stat_decrease(&tld->stats->pages, 1);
// reset the page memory to reduce memory pressure? // reset the page memory to reduce memory pressure?
if (!segment->mem_is_fixed && !page->is_reset && mi_option_is_enabled(mi_option_page_reset)) { if (!segment->mem_is_fixed && !page->is_reset && mi_option_is_enabled(mi_option_page_reset))
// && segment->page_kind <= MI_PAGE_MEDIUM) // to prevent partial overlapping resets
{
size_t psize; size_t psize;
uint8_t* start = _mi_page_start(segment, page, &psize); uint8_t* start = _mi_page_start(segment, page, &psize);
page->is_reset = true; page->is_reset = true;