improve allocation of the huge OS page arena
This commit is contained in:
parent
d36d04b4a6
commit
aaf01620f4
39
src/arena.c
39
src/arena.c
@ -6,7 +6,16 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
-----------------------------------------------------------------------------*/
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
"Arenas" are fixed area's of OS memory from which we can allocate
|
||||
large blocks (>= MI_ARENA_BLOCK_SIZE, 16MiB). Currently only used to
|
||||
allocate in one arena consisting of huge OS pages -- otherwise it
|
||||
delegates to direct allocation from the OS.
|
||||
|
||||
In the future, we can expose an API to manually add more arenas which
|
||||
is sometimes needed for embedded devices or shared memory for example.
|
||||
|
||||
The arena allocation needs to be thread safe and we use a lock-free scan
|
||||
with on-demand coalescing.
|
||||
-----------------------------------------------------------------------------*/
|
||||
#include "mimalloc.h"
|
||||
#include "mimalloc-internal.h"
|
||||
@ -16,8 +25,8 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
|
||||
// os.c
|
||||
void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld);
|
||||
void* _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment);
|
||||
int _mi_os_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept;
|
||||
int _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** pstart, size_t* pages_reserved, size_t* psize) mi_attr_noexcept;
|
||||
void _mi_os_free(void* p, size_t size, mi_stats_t* stats);
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Arena allocation
|
||||
@ -338,25 +347,27 @@ static bool mi_arena_add(mi_arena_t* arena) {
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Reserve a huge page arena.
|
||||
TODO: improve OS api to just reserve and claim a huge
|
||||
page area at once, (and return the total size).
|
||||
----------------------------------------------------------- */
|
||||
|
||||
#include <errno.h>
|
||||
#include <errno.h> // ENOMEM
|
||||
|
||||
int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept {
|
||||
size_t pages_reserved_default = 0;
|
||||
if (pages_reserved==NULL) pages_reserved = &pages_reserved_default;
|
||||
int err = _mi_os_reserve_huge_os_pages(pages, max_secs, pages_reserved);
|
||||
if (*pages_reserved==0) return err;
|
||||
size_t hsize = (*pages_reserved) * GiB;
|
||||
void* p = _mi_os_try_alloc_from_huge_reserved(hsize, MI_SEGMENT_ALIGN);
|
||||
mi_assert_internal(p != NULL);
|
||||
if (p == NULL) return ENOMEM;
|
||||
size_t hsize = 0;
|
||||
void* p = NULL;
|
||||
int err = _mi_os_alloc_huge_os_pages(pages, max_secs, &p, pages_reserved, &hsize);
|
||||
_mi_verbose_message("reserved %zu huge pages\n", *pages_reserved);
|
||||
if (p==NULL) return err;
|
||||
// err might be != 0 but that is fine, we just got less pages.
|
||||
mi_assert_internal(*pages_reserved > 0 && hsize > 0 && *pages_reserved <= pages);
|
||||
size_t bcount = hsize / MI_ARENA_BLOCK_SIZE;
|
||||
size_t asize = sizeof(mi_arena_t) + (bcount*sizeof(mi_block_info_t)); // one too much
|
||||
mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_heap_default->tld->stats);
|
||||
if (arena == NULL) return ENOMEM;
|
||||
mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main);
|
||||
if (arena == NULL) {
|
||||
*pages_reserved = 0;
|
||||
_mi_os_free(p, hsize, &_mi_stats_main);
|
||||
return ENOMEM;
|
||||
}
|
||||
arena->block_count = bcount;
|
||||
arena->start = (uint8_t*)p;
|
||||
arena->block_bottom = 0;
|
||||
|
110
src/os.c
110
src/os.c
@ -36,8 +36,6 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
large OS pages (if MIMALLOC_LARGE_OS_PAGES is true).
|
||||
----------------------------------------------------------- */
|
||||
bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats);
|
||||
bool _mi_os_is_huge_reserved(void* p);
|
||||
void* _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment);
|
||||
|
||||
static void* mi_align_up_ptr(void* p, size_t alignment) {
|
||||
return (void*)_mi_align_up((uintptr_t)p, alignment);
|
||||
@ -184,7 +182,7 @@ void _mi_os_init() {
|
||||
|
||||
static bool mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats_t* stats)
|
||||
{
|
||||
if (addr == NULL || size == 0 || _mi_os_is_huge_reserved(addr)) return true;
|
||||
if (addr == NULL || size == 0) return true; // || _mi_os_is_huge_reserved(addr)
|
||||
bool err = false;
|
||||
#if defined(_WIN32)
|
||||
err = (VirtualFree(addr, 0, MEM_RELEASE) == 0);
|
||||
@ -628,7 +626,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ
|
||||
*is_zero = false;
|
||||
size_t csize;
|
||||
void* start = mi_os_page_align_areax(conservative, addr, size, &csize);
|
||||
if (csize == 0 || _mi_os_is_huge_reserved(addr)) return true;
|
||||
if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr))
|
||||
int err = 0;
|
||||
if (commit) {
|
||||
_mi_stat_increase(&stats->committed, csize);
|
||||
@ -684,7 +682,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats)
|
||||
// page align conservatively within the range
|
||||
size_t csize;
|
||||
void* start = mi_os_page_align_area_conservative(addr, size, &csize);
|
||||
if (csize == 0 || _mi_os_is_huge_reserved(addr)) return true;
|
||||
if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr)
|
||||
if (reset) _mi_stat_increase(&stats->reset, csize);
|
||||
else _mi_stat_decrease(&stats->reset, csize);
|
||||
if (!reset) return true; // nothing to do on unreset!
|
||||
@ -758,9 +756,11 @@ static bool mi_os_protectx(void* addr, size_t size, bool protect) {
|
||||
size_t csize = 0;
|
||||
void* start = mi_os_page_align_area_conservative(addr, size, &csize);
|
||||
if (csize == 0) return false;
|
||||
/*
|
||||
if (_mi_os_is_huge_reserved(addr)) {
|
||||
_mi_warning_message("cannot mprotect memory allocated in huge OS pages\n");
|
||||
}
|
||||
*/
|
||||
int err = 0;
|
||||
#ifdef _WIN32
|
||||
DWORD oldprotect = 0;
|
||||
@ -816,79 +816,42 @@ will be reused.
|
||||
-----------------------------------------------------------------------------*/
|
||||
#define MI_HUGE_OS_PAGE_SIZE ((size_t)1 << 30) // 1GiB
|
||||
|
||||
typedef struct mi_huge_info_s {
|
||||
volatile _Atomic(void*) start; // start of huge page area (32TiB)
|
||||
volatile _Atomic(size_t) reserved; // total reserved size
|
||||
volatile _Atomic(size_t) used; // currently allocated
|
||||
} mi_huge_info_t;
|
||||
|
||||
static mi_huge_info_t os_huge_reserved = { NULL, 0, ATOMIC_VAR_INIT(0) };
|
||||
|
||||
bool _mi_os_is_huge_reserved(void* p) {
|
||||
return (mi_atomic_read_ptr(&os_huge_reserved.start) != NULL &&
|
||||
p >= mi_atomic_read_ptr(&os_huge_reserved.start) &&
|
||||
(uint8_t*)p < (uint8_t*)mi_atomic_read_ptr(&os_huge_reserved.start) + mi_atomic_read(&os_huge_reserved.reserved));
|
||||
}
|
||||
|
||||
void* _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment)
|
||||
{
|
||||
// only allow large aligned allocations (e.g. regions)
|
||||
if (size < MI_SEGMENT_SIZE || (size % MI_SEGMENT_SIZE) != 0) return NULL;
|
||||
if (try_alignment > MI_SEGMENT_SIZE) return NULL;
|
||||
if (mi_atomic_read_ptr(&os_huge_reserved.start)==NULL) return NULL;
|
||||
if (mi_atomic_read(&os_huge_reserved.used) >= mi_atomic_read(&os_huge_reserved.reserved)) return NULL; // already full
|
||||
|
||||
// always aligned
|
||||
mi_assert_internal(mi_atomic_read(&os_huge_reserved.used) % MI_SEGMENT_SIZE == 0 );
|
||||
mi_assert_internal( (uintptr_t)mi_atomic_read_ptr(&os_huge_reserved.start) % MI_SEGMENT_SIZE == 0 );
|
||||
|
||||
// try to reserve space
|
||||
size_t base = mi_atomic_addu( &os_huge_reserved.used, size );
|
||||
if ((base + size) > os_huge_reserved.reserved) {
|
||||
// "free" our over-allocation
|
||||
mi_atomic_subu( &os_huge_reserved.used, size);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// success!
|
||||
uint8_t* p = (uint8_t*)mi_atomic_read_ptr(&os_huge_reserved.start) + base;
|
||||
mi_assert_internal( (uintptr_t)p % MI_SEGMENT_SIZE == 0 );
|
||||
return p;
|
||||
}
|
||||
|
||||
/*
|
||||
static void mi_os_free_huge_reserved() {
|
||||
uint8_t* addr = os_huge_reserved.start;
|
||||
size_t total = os_huge_reserved.reserved;
|
||||
os_huge_reserved.reserved = 0;
|
||||
os_huge_reserved.start = NULL;
|
||||
for( size_t current = 0; current < total; current += MI_HUGE_OS_PAGE_SIZE) {
|
||||
_mi_os_free(addr + current, MI_HUGE_OS_PAGE_SIZE, &_mi_stats_main);
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
#if !(MI_INTPTR_SIZE >= 8 && (defined(_WIN32) || defined(MI_OS_USE_MMAP)))
|
||||
int _mi_os_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept {
|
||||
int _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** start, size_t* pages_reserved, size_t* size) mi_attr_noexcept {
|
||||
UNUSED(pages); UNUSED(max_secs);
|
||||
if (start != NULL) *start = NULL;
|
||||
if (pages_reserved != NULL) *pages_reserved = 0;
|
||||
if (size != NULL) *size = 0;
|
||||
return ENOMEM;
|
||||
}
|
||||
#else
|
||||
int _mi_os_reserve_huge_os_pages( size_t pages, double max_secs, size_t* pages_reserved ) mi_attr_noexcept
|
||||
static _Atomic(uintptr_t) huge_top; // = 0
|
||||
|
||||
int _mi_os_alloc_huge_os_pages(size_t pages, double max_secs, void** pstart, size_t* pages_reserved, size_t* psize) mi_attr_noexcept
|
||||
{
|
||||
if (pages_reserved != NULL) *pages_reserved = 0;
|
||||
*pstart = NULL;
|
||||
*pages_reserved = 0;
|
||||
*psize = 0;
|
||||
if (max_secs==0) return ETIMEDOUT; // timeout
|
||||
if (pages==0) return 0; // ok
|
||||
if (!mi_atomic_cas_ptr_strong(&os_huge_reserved.start,(void*)1,NULL)) return ETIMEDOUT; // already reserved
|
||||
|
||||
// Set the start address after the 32TiB area
|
||||
uint8_t* start = (uint8_t*)((uintptr_t)32 << 40); // 32TiB virtual start address
|
||||
#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode
|
||||
uintptr_t r = _mi_random_init((uintptr_t)&mi_reserve_huge_os_pages);
|
||||
start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF)); // (randomly 0-1024)*1GiB == 0 to 1TiB
|
||||
#endif
|
||||
// Atomically claim a huge address range
|
||||
size_t size = pages * MI_HUGE_OS_PAGE_SIZE;
|
||||
uint8_t* start;
|
||||
do {
|
||||
start = (uint8_t*)mi_atomic_addu(&huge_top, size);
|
||||
if (start == NULL) {
|
||||
uintptr_t top = ((uintptr_t)32 << 40); // 32TiB virtual start address
|
||||
#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode
|
||||
uintptr_t r = _mi_random_init((uintptr_t)&_mi_os_alloc_huge_os_pages);
|
||||
top += ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF)); // (randomly 0-1024)*1GiB == 0 to 1TiB
|
||||
#endif
|
||||
mi_atomic_cas_strong(&huge_top, top, 0);
|
||||
}
|
||||
} while (start == NULL);
|
||||
|
||||
|
||||
// Allocate one page at the time but try to place them contiguously
|
||||
// We allocate one page at the time to be able to abort if it takes too long
|
||||
double start_t = _mi_clock_start();
|
||||
@ -925,16 +888,13 @@ int _mi_os_reserve_huge_os_pages( size_t pages, double max_secs, size_t* pages_r
|
||||
}
|
||||
// success, record it
|
||||
if (page==0) {
|
||||
mi_atomic_write_ptr(&os_huge_reserved.start, addr); // don't switch the order of these writes
|
||||
mi_atomic_write(&os_huge_reserved.reserved, MI_HUGE_OS_PAGE_SIZE);
|
||||
*pstart = addr;
|
||||
}
|
||||
else {
|
||||
mi_atomic_addu(&os_huge_reserved.reserved,MI_HUGE_OS_PAGE_SIZE);
|
||||
}
|
||||
_mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE);
|
||||
*psize += MI_HUGE_OS_PAGE_SIZE;
|
||||
*pages_reserved += 1;
|
||||
_mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE);
|
||||
_mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE);
|
||||
if (pages_reserved != NULL) { *pages_reserved = page + 1; }
|
||||
|
||||
|
||||
// check for timeout
|
||||
double elapsed = _mi_clock_end(start_t);
|
||||
if (elapsed > max_secs) return ETIMEDOUT;
|
||||
@ -943,7 +903,7 @@ int _mi_os_reserve_huge_os_pages( size_t pages, double max_secs, size_t* pages_r
|
||||
if (estimate > 1.5*max_secs) return ETIMEDOUT; // seems like we are going to timeout
|
||||
}
|
||||
}
|
||||
_mi_verbose_message("reserved %zu huge pages\n", pages);
|
||||
mi_assert_internal(*psize == size);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user