enable more reset delay slots
This commit is contained in:
parent
5e6754f3f7
commit
a0958b2da6
@ -390,13 +390,20 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount);
|
|||||||
// ------------------------------------------------------
|
// ------------------------------------------------------
|
||||||
typedef int64_t mi_msecs_t;
|
typedef int64_t mi_msecs_t;
|
||||||
|
|
||||||
|
#define MI_RESET_DELAY_SLOTS (256)
|
||||||
|
|
||||||
typedef struct mi_delay_slot_s {
|
typedef struct mi_delay_slot_s {
|
||||||
mi_msecs_t expire;
|
mi_msecs_t expire;
|
||||||
uint8_t* addr;
|
uint8_t* addr;
|
||||||
size_t size;
|
size_t size;
|
||||||
} mi_delay_slot_t;
|
} mi_delay_slot_t;
|
||||||
|
|
||||||
#define MI_RESET_DELAY_SLOTS (128)
|
typedef struct mi_delay_slots_s {
|
||||||
|
size_t capacity; // always `MI_RESET_DELAY_SLOTS`
|
||||||
|
size_t count; // current slots used (`<= capacity`)
|
||||||
|
mi_delay_slot_t slots[MI_RESET_DELAY_SLOTS];
|
||||||
|
} mi_delay_slots_t;
|
||||||
|
|
||||||
|
|
||||||
// ------------------------------------------------------
|
// ------------------------------------------------------
|
||||||
// Thread Local data
|
// Thread Local data
|
||||||
@ -411,8 +418,8 @@ typedef struct mi_segment_queue_s {
|
|||||||
// OS thread local data
|
// OS thread local data
|
||||||
typedef struct mi_os_tld_s {
|
typedef struct mi_os_tld_s {
|
||||||
size_t region_idx; // start point for next allocation
|
size_t region_idx; // start point for next allocation
|
||||||
|
mi_delay_slots_t* reset_delay; // delay slots for OS reset operations
|
||||||
mi_stats_t* stats; // points to tld stats
|
mi_stats_t* stats; // points to tld stats
|
||||||
mi_delay_slot_t reset_delay[MI_RESET_DELAY_SLOTS];
|
|
||||||
} mi_os_tld_t;
|
} mi_os_tld_t;
|
||||||
|
|
||||||
// Segments thread local data
|
// Segments thread local data
|
||||||
|
@ -100,7 +100,7 @@ static mi_tld_t tld_main = {
|
|||||||
0, false,
|
0, false,
|
||||||
&_mi_heap_main,
|
&_mi_heap_main,
|
||||||
{ { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats, tld_main_os }, // segments
|
{ { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats, tld_main_os }, // segments
|
||||||
{ 0, tld_main_stats, {{0,NULL,0}} }, // os
|
{ 0, NULL, tld_main_stats }, // os
|
||||||
{ MI_STATS_NULL } // stats
|
{ MI_STATS_NULL } // stats
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -192,6 +192,7 @@ uintptr_t _mi_random_init(uintptr_t seed /* can be zero */) {
|
|||||||
typedef struct mi_thread_data_s {
|
typedef struct mi_thread_data_s {
|
||||||
mi_heap_t heap; // must come first due to cast in `_mi_heap_done`
|
mi_heap_t heap; // must come first due to cast in `_mi_heap_done`
|
||||||
mi_tld_t tld;
|
mi_tld_t tld;
|
||||||
|
mi_delay_slots_t reset_delay;
|
||||||
} mi_thread_data_t;
|
} mi_thread_data_t;
|
||||||
|
|
||||||
// Initialize the thread local default heap, called from `mi_thread_init`
|
// Initialize the thread local default heap, called from `mi_thread_init`
|
||||||
@ -211,6 +212,7 @@ static bool _mi_heap_init(void) {
|
|||||||
}
|
}
|
||||||
mi_tld_t* tld = &td->tld;
|
mi_tld_t* tld = &td->tld;
|
||||||
mi_heap_t* heap = &td->heap;
|
mi_heap_t* heap = &td->heap;
|
||||||
|
mi_delay_slots_t* reset_delay = &td->reset_delay;
|
||||||
memcpy(heap, &_mi_heap_empty, sizeof(*heap));
|
memcpy(heap, &_mi_heap_empty, sizeof(*heap));
|
||||||
heap->thread_id = _mi_thread_id();
|
heap->thread_id = _mi_thread_id();
|
||||||
heap->random = _mi_random_init(heap->thread_id);
|
heap->random = _mi_random_init(heap->thread_id);
|
||||||
@ -221,6 +223,9 @@ static bool _mi_heap_init(void) {
|
|||||||
tld->segments.stats = &tld->stats;
|
tld->segments.stats = &tld->stats;
|
||||||
tld->segments.os = &tld->os;
|
tld->segments.os = &tld->os;
|
||||||
tld->os.stats = &tld->stats;
|
tld->os.stats = &tld->stats;
|
||||||
|
tld->os.reset_delay = reset_delay;
|
||||||
|
memset(reset_delay, 0, sizeof(*reset_delay));
|
||||||
|
reset_delay->capacity = MI_RESET_DELAY_SLOTS;
|
||||||
_mi_heap_default = heap;
|
_mi_heap_default = heap;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
68
src/memory.c
68
src/memory.c
@ -54,7 +54,7 @@ void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, s
|
|||||||
void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
|
void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
|
||||||
|
|
||||||
// local
|
// local
|
||||||
static bool mi_delay_remove(mi_delay_slot_t* slots, size_t count, void* p, size_t size);
|
static bool mi_delay_remove(mi_delay_slots_t* delay_slots, void* p, size_t size);
|
||||||
|
|
||||||
|
|
||||||
// Constants
|
// Constants
|
||||||
@ -208,7 +208,7 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large,
|
|||||||
Try to claim blocks in suitable regions
|
Try to claim blocks in suitable regions
|
||||||
-----------------------------------------------------------------------------*/
|
-----------------------------------------------------------------------------*/
|
||||||
|
|
||||||
static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool commit, bool allow_large ) {
|
static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool allow_large ) {
|
||||||
// initialized at all?
|
// initialized at all?
|
||||||
mi_region_info_t info = mi_atomic_read_relaxed(®ion->info);
|
mi_region_info_t info = mi_atomic_read_relaxed(®ion->info);
|
||||||
if (info==0) return false;
|
if (info==0) return false;
|
||||||
@ -229,7 +229,7 @@ static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, boo
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static bool mi_region_try_claim(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld)
|
static bool mi_region_try_claim(size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld)
|
||||||
{
|
{
|
||||||
// try all regions for a free slot
|
// try all regions for a free slot
|
||||||
const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld));
|
const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld));
|
||||||
@ -238,7 +238,7 @@ static bool mi_region_try_claim(size_t blocks, bool commit, bool allow_large, me
|
|||||||
for (size_t visited = 0; visited < count; visited++, idx++) {
|
for (size_t visited = 0; visited < count; visited++, idx++) {
|
||||||
if (idx >= count) idx = 0; // wrap around
|
if (idx >= count) idx = 0; // wrap around
|
||||||
mem_region_t* r = ®ions[idx];
|
mem_region_t* r = ®ions[idx];
|
||||||
if (mi_region_is_suitable(r, numa_node, commit, allow_large)) {
|
if (mi_region_is_suitable(r, numa_node, allow_large)) {
|
||||||
if (mi_bitmap_try_claim_field(&r->in_use, 0, blocks, bit_idx)) {
|
if (mi_bitmap_try_claim_field(&r->in_use, 0, blocks, bit_idx)) {
|
||||||
tld->region_idx = idx; // remember the last found position
|
tld->region_idx = idx; // remember the last found position
|
||||||
*region = r;
|
*region = r;
|
||||||
@ -256,7 +256,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo
|
|||||||
mem_region_t* region;
|
mem_region_t* region;
|
||||||
mi_bitmap_index_t bit_idx;
|
mi_bitmap_index_t bit_idx;
|
||||||
// first try to claim in existing regions
|
// first try to claim in existing regions
|
||||||
if (!mi_region_try_claim(blocks, *commit, *is_large, ®ion, &bit_idx, tld)) {
|
if (!mi_region_try_claim(blocks, *is_large, ®ion, &bit_idx, tld)) {
|
||||||
// otherwise try to allocate a fresh region
|
// otherwise try to allocate a fresh region
|
||||||
if (!mi_region_try_alloc_os(blocks, *commit, *is_large, ®ion, &bit_idx, tld)) {
|
if (!mi_region_try_alloc_os(blocks, *commit, *is_large, ®ion, &bit_idx, tld)) {
|
||||||
// out of regions or memory
|
// out of regions or memory
|
||||||
@ -354,7 +354,7 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld) {
|
|||||||
if (p==NULL) return;
|
if (p==NULL) return;
|
||||||
if (size==0) return;
|
if (size==0) return;
|
||||||
|
|
||||||
mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size);
|
mi_delay_remove(tld->reset_delay, p, size);
|
||||||
|
|
||||||
size_t arena_memid = 0;
|
size_t arena_memid = 0;
|
||||||
mi_bitmap_index_t bit_idx;
|
mi_bitmap_index_t bit_idx;
|
||||||
@ -424,7 +424,7 @@ void _mi_mem_collect(mi_os_tld_t* tld) {
|
|||||||
bool is_eager_committed;
|
bool is_eager_committed;
|
||||||
void* start = mi_region_info_read(mi_atomic_read(®ions[i].info), NULL, &is_eager_committed);
|
void* start = mi_region_info_read(mi_atomic_read(®ions[i].info), NULL, &is_eager_committed);
|
||||||
if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) {
|
if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) {
|
||||||
mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, start, MI_REGION_SIZE);
|
mi_delay_remove(tld->reset_delay, start, MI_REGION_SIZE);
|
||||||
_mi_arena_free(start, MI_REGION_SIZE, region->arena_memid, tld->stats);
|
_mi_arena_free(start, MI_REGION_SIZE, region->arena_memid, tld->stats);
|
||||||
}
|
}
|
||||||
// and release
|
// and release
|
||||||
@ -440,21 +440,22 @@ void _mi_mem_collect(mi_os_tld_t* tld) {
|
|||||||
|
|
||||||
typedef void (mi_delay_resolve_fun)(void* addr, size_t size, void* arg);
|
typedef void (mi_delay_resolve_fun)(void* addr, size_t size, void* arg);
|
||||||
|
|
||||||
static void mi_delay_insert(mi_delay_slot_t* slots, size_t count,
|
static void mi_delay_insert(mi_delay_slots_t* ds,
|
||||||
mi_msecs_t delay, uint8_t* addr, size_t size,
|
mi_msecs_t delay, uint8_t* addr, size_t size,
|
||||||
mi_delay_resolve_fun* resolve, void* arg)
|
mi_delay_resolve_fun* resolve, void* arg)
|
||||||
{
|
{
|
||||||
if (delay==0) {
|
if (ds == NULL || delay==0 || addr==NULL || size==0) {
|
||||||
resolve(addr, size, arg);
|
resolve(addr, size, arg);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
mi_msecs_t now = _mi_clock_now();
|
mi_msecs_t now = _mi_clock_now();
|
||||||
mi_delay_slot_t* oldest = slots;
|
mi_delay_slot_t* oldest = &ds->slots[0];
|
||||||
// walk through all slots, resolving expired ones.
|
// walk through all slots, resolving expired ones.
|
||||||
// remember the oldest slot to insert the new entry in.
|
// remember the oldest slot to insert the new entry in.
|
||||||
for (size_t i = 0; i < count; i++) {
|
size_t newcount = 0;
|
||||||
mi_delay_slot_t* slot = &slots[i];
|
for (size_t i = 0; i < ds->count; i++) {
|
||||||
|
mi_delay_slot_t* slot = &ds->slots[i];
|
||||||
|
|
||||||
if (slot->expire == 0) {
|
if (slot->expire == 0) {
|
||||||
// empty slot
|
// empty slot
|
||||||
@ -480,26 +481,40 @@ static void mi_delay_insert(mi_delay_slot_t* slots, size_t count,
|
|||||||
}
|
}
|
||||||
else if (oldest->expire > slot->expire) {
|
else if (oldest->expire > slot->expire) {
|
||||||
oldest = slot;
|
oldest = slot;
|
||||||
|
newcount = i+1;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
newcount = i+1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
ds->count = newcount;
|
||||||
if (delay>0) {
|
if (delay>0) {
|
||||||
// not yet registered, use the oldest slot
|
// not yet registered, use the oldest slot (or a new one if there is space)
|
||||||
if (oldest->expire > 0) {
|
if (ds->count < ds->capacity) {
|
||||||
|
oldest = &ds->slots[ds->count];
|
||||||
|
ds->count++;
|
||||||
|
}
|
||||||
|
else if (oldest->expire > 0) {
|
||||||
resolve(oldest->addr, oldest->size, arg); // evict if not empty
|
resolve(oldest->addr, oldest->size, arg); // evict if not empty
|
||||||
}
|
}
|
||||||
|
mi_assert_internal((oldest - ds->slots) < (ptrdiff_t)ds->count);
|
||||||
oldest->expire = now + delay;
|
oldest->expire = now + delay;
|
||||||
oldest->addr = addr;
|
oldest->addr = addr;
|
||||||
oldest->size = size;
|
oldest->size = size;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool mi_delay_remove(mi_delay_slot_t* slots, size_t count, void* p, size_t size)
|
static bool mi_delay_remove(mi_delay_slots_t* ds, void* p, size_t size)
|
||||||
{
|
{
|
||||||
|
if (ds == NULL || p==NULL || size==0) return false;
|
||||||
|
|
||||||
uint8_t* addr = (uint8_t*)p;
|
uint8_t* addr = (uint8_t*)p;
|
||||||
bool done = false;
|
bool done = false;
|
||||||
// walk through all slots
|
size_t newcount = 0;
|
||||||
for (size_t i = 0; i < count; i++) {
|
|
||||||
mi_delay_slot_t* slot = &slots[i];
|
// walk through all valid slots
|
||||||
|
for (size_t i = 0; i < ds->count; i++) {
|
||||||
|
mi_delay_slot_t* slot = &ds->slots[i];
|
||||||
if (slot->addr <= addr && slot->addr + slot->size >= addr + size) {
|
if (slot->addr <= addr && slot->addr + slot->size >= addr + size) {
|
||||||
// earlier slot encompasses the area; remove it
|
// earlier slot encompasses the area; remove it
|
||||||
slot->expire = 0;
|
slot->expire = 0;
|
||||||
@ -511,11 +526,16 @@ static bool mi_delay_remove(mi_delay_slot_t* slots, size_t count, void* p, size_
|
|||||||
}
|
}
|
||||||
else if ((addr <= slot->addr && addr + size > slot->addr) ||
|
else if ((addr <= slot->addr && addr + size > slot->addr) ||
|
||||||
(addr < slot->addr + slot->size && addr + size >= slot->addr + slot->size)) {
|
(addr < slot->addr + slot->size && addr + size >= slot->addr + slot->size)) {
|
||||||
// partial overlap, remove slot
|
// partial overlap
|
||||||
mi_assert_internal(false);
|
// can happen with a large object spanning onto some partial end block
|
||||||
|
// mi_assert_internal(false);
|
||||||
slot->expire = 0;
|
slot->expire = 0;
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
newcount = i + 1;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
ds->count = newcount;
|
||||||
return done;
|
return done;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -525,13 +545,13 @@ static void mi_resolve_reset(void* p, size_t size, void* vtld) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) {
|
bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) {
|
||||||
mi_delay_insert(tld->reset_delay, MI_RESET_DELAY_SLOTS, mi_option_get(mi_option_reset_delay),
|
mi_delay_insert(tld->reset_delay, mi_option_get(mi_option_reset_delay),
|
||||||
(uint8_t*)p, size, &mi_resolve_reset, tld);
|
(uint8_t*)p, size, &mi_resolve_reset, tld);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
|
bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
|
||||||
if (!mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, (uint8_t*)p, size)) {
|
if (!mi_delay_remove(tld->reset_delay, (uint8_t*)p, size)) {
|
||||||
return _mi_os_unreset(p, size, is_zero, tld->stats);
|
return _mi_os_unreset(p, size, is_zero, tld->stats);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
@ -544,12 +564,12 @@ bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
|
|||||||
-----------------------------------------------------------------------------*/
|
-----------------------------------------------------------------------------*/
|
||||||
|
|
||||||
bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
|
bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
|
||||||
mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size);
|
mi_delay_remove(tld->reset_delay,p, size);
|
||||||
return _mi_os_commit(p, size, is_zero, tld->stats);
|
return _mi_os_commit(p, size, is_zero, tld->stats);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld) {
|
bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld) {
|
||||||
mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size);
|
mi_delay_remove(tld->reset_delay, p, size);
|
||||||
return _mi_os_decommit(p, size, tld->stats);
|
return _mi_os_decommit(p, size, tld->stats);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -65,7 +65,7 @@ static mi_option_desc_t options[_mi_option_last] =
|
|||||||
{ 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
|
{ 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
|
||||||
{ 0, UNINIT, MI_OPTION(reserve_huge_os_pages) },
|
{ 0, UNINIT, MI_OPTION(reserve_huge_os_pages) },
|
||||||
{ 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread
|
{ 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread
|
||||||
{ 0, UNINIT, MI_OPTION(page_reset) }, // reset pages on free
|
{ 1, UNINIT, MI_OPTION(page_reset) }, // reset pages on free
|
||||||
{ 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit)
|
{ 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit)
|
||||||
{ 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed
|
{ 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed
|
||||||
{ 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds
|
{ 500, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds
|
||||||
|
@ -504,7 +504,9 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg
|
|||||||
_mi_stat_decrease(&tld->stats->pages, 1);
|
_mi_stat_decrease(&tld->stats->pages, 1);
|
||||||
|
|
||||||
// reset the page memory to reduce memory pressure?
|
// reset the page memory to reduce memory pressure?
|
||||||
if (!segment->mem_is_fixed && !page->is_reset && mi_option_is_enabled(mi_option_page_reset)) {
|
if (!segment->mem_is_fixed && !page->is_reset && mi_option_is_enabled(mi_option_page_reset))
|
||||||
|
// && segment->page_kind <= MI_PAGE_MEDIUM) // to prevent partial overlapping resets
|
||||||
|
{
|
||||||
size_t psize;
|
size_t psize;
|
||||||
uint8_t* start = _mi_page_start(segment, page, &psize);
|
uint8_t* start = _mi_page_start(segment, page, &psize);
|
||||||
page->is_reset = true;
|
page->is_reset = true;
|
||||||
|
Loading…
Reference in New Issue
Block a user