diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 6bfabe27..668a7bd3 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -17,18 +17,18 @@ terms of the MIT license. A copy of the license can be found in the file #if (MI_DEBUG>0) #define mi_trace_message(...) _mi_trace_message(__VA_ARGS__) #else -#define mi_trace_message(...) +#define mi_trace_message(...) #endif #if defined(_MSC_VER) #define mi_decl_noinline __declspec(noinline) -#define mi_attr_noreturn +#define mi_attr_noreturn #elif defined(__GNUC__) || defined(__clang__) #define mi_decl_noinline __attribute__((noinline)) #define mi_attr_noreturn __attribute__((noreturn)) #else #define mi_decl_noinline -#define mi_attr_noreturn +#define mi_attr_noreturn #endif @@ -56,8 +56,6 @@ void _mi_os_init(void); // called fro void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocate thread local data void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data size_t _mi_os_good_alloc_size(size_t size); -int _mi_os_numa_node(mi_os_tld_t* tld); -int _mi_os_numa_node_count(void); // memory.c void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* id, mi_os_tld_t* tld); @@ -146,8 +144,8 @@ bool _mi_page_is_valid(mi_page_t* page); Inlined definitions ----------------------------------------------------------- */ #define UNUSED(x) (void)(x) -#if (MI_DEBUG>0) -#define UNUSED_RELEASE(x) +#if (MI_DEBUG>0) +#define UNUSED_RELEASE(x) #else #define UNUSED_RELEASE(x) UNUSED(x) #endif @@ -398,7 +396,7 @@ static inline mi_block_t* mi_block_nextx( uintptr_t cookie, const mi_block_t* bl #endif } -static inline void mi_block_set_nextx(uintptr_t cookie, mi_block_t* block, const mi_block_t* next) { +static inline void mi_block_set_nextx(uintptr_t cookie, mi_block_t* block, const mi_block_t* next) { #ifdef MI_ENCODE_FREELIST block->next = (mi_encoded_t)next ^ cookie; #else @@ -411,12 +409,12 @@ static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* #ifdef MI_ENCODE_FREELIST mi_block_t* next = mi_block_nextx(page->cookie,block); // check for free list corruption: is `next` at least in our segment range? - // TODO: it is better to check if it is actually inside our page but that is more expensive + // TODO: it is better to check if it is actually inside our page but that is more expensive // to calculate. Perhaps with a relative free list this becomes feasible? if (next!=NULL && !mi_is_in_same_segment(block, next)) { _mi_fatal_error("corrupted free list entry of size %zub at %p: value 0x%zx\n", page->block_size, block, (uintptr_t)next); next = NULL; - } + } return next; #else UNUSED(page); @@ -433,6 +431,25 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c #endif } + +// ------------------------------------------------------------------- +// Optimize numa node access for the common case (= one node) +// ------------------------------------------------------------------- + +int _mi_os_numa_node_get(mi_os_tld_t* tld); +int _mi_os_numa_node_count_get(void); + +extern int _mi_numa_node_count; +static inline int _mi_os_numa_node(mi_os_tld_t* tld) { + if (mi_likely(_mi_numa_node_count == 1)) return 0; + else return _mi_os_numa_node_get(tld); +} +static inline int _mi_os_numa_node_count(void) { + if (mi_likely(_mi_numa_node_count>0)) return _mi_numa_node_count; + else return _mi_os_numa_node_count_get(); +} + + // ------------------------------------------------------------------- // Getting the thread id should be performant // as it is called in the fast path of `_mi_free`, diff --git a/src/os.c b/src/os.c index 5229381b..d6878927 100644 --- a/src/os.c +++ b/src/os.c @@ -786,9 +786,9 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE; mi_win_enable_large_os_pages(); - + #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) - MEM_EXTENDED_PARAMETER params[3] = { {0,0},{0,0},{0,0} }; + MEM_EXTENDED_PARAMETER params[3] = { {0,0},{0,0},{0,0} }; // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages static bool mi_huge_pages_available = true; if (pNtAllocateVirtualMemoryEx != NULL && mi_huge_pages_available) { @@ -818,7 +818,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) // on modern Windows try use VirtualAlloc2 for numa aware large OS page allocation if (pVirtualAlloc2 != NULL && numa_node >= 0) { params[0].Type = MemExtendedParameterNumaNode; - params[0].ULong = (unsigned)numa_node; + params[0].ULong = (unsigned)numa_node; return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1); } #endif @@ -838,7 +838,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) #ifdef MI_HAS_NUMA if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes uintptr_t numa_mask = (1UL << numa_node); - // TODO: does `mbind` work correctly for huge OS pages? should we + // TODO: does `mbind` work correctly for huge OS pages? should we // use `set_mempolicy` before calling mmap instead? // see: long err = mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); @@ -857,7 +857,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) } #endif -#if (MI_INTPTR_SIZE >= 8) +#if (MI_INTPTR_SIZE >= 8) // To ensure proper alignment, use our own area for huge OS pages static _Atomic(uintptr_t) mi_huge_start; // = 0 @@ -900,7 +900,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse size_t size = 0; uint8_t* start = mi_os_claim_huge_pages(pages, &size); if (start == NULL) return NULL; // or 32-bit systems - + // Allocate one page at the time but try to place them contiguously // We allocate one page at the time to be able to abort if it takes too long // or to at least allocate as many as available on the system. @@ -920,11 +920,11 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse } break; } - + // success, record it _mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE); _mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE); - + // check for timeout if (max_msecs > 0) { mi_msecs_t elapsed = _mi_clock_end(start_t); @@ -958,7 +958,7 @@ void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) { } /* ---------------------------------------------------------------------------- -Support NUMA aware allocation +Support NUMA aware allocation -----------------------------------------------------------------------------*/ #ifdef WIN32 static int mi_os_numa_nodex() { @@ -975,9 +975,8 @@ static int mi_os_numa_node_countx(void) { return (int)(numa_max + 1); } #elif defined(__linux__) -#include -#include -#include +#include // getcpu +#include // access static int mi_os_numa_nodex(void) { #ifdef SYS_getcpu @@ -990,22 +989,15 @@ static int mi_os_numa_nodex(void) { return 0; #endif } - static int mi_os_numa_node_countx(void) { - DIR* d = opendir("/sys/devices/system/node"); - if (d==NULL) return 1; - - struct dirent* de; - int max_node_num = 0; - while ((de = readdir(d)) != NULL) { - int node_num; - if (strncmp(de->d_name, "node", 4) == 0) { - node_num = (int)strtol(de->d_name+4, NULL, 0); - if (max_node_num < node_num) max_node_num = node_num; - } + char buf[128]; + int max_node = mi_option_get(mi_option_max_numa_node); + int node = 0; + for(node = 0; node < max_node; node++) { + snprintf(buf, 127, "/sys/devices/system/node/node%i", node + 1); + if (access(buf,R_OK) != 0) break; } - closedir(d); - return (max_node_num + 1); + return (node+1); } #else static int mi_os_numa_nodex(void) { @@ -1016,29 +1008,30 @@ static int mi_os_numa_node_countx(void) { } #endif -int _mi_os_numa_node_count(void) { - static int numa_node_count = 0; // cache the node count - if (mi_unlikely(numa_node_count <= 0)) { - int ncount = mi_os_numa_node_countx(); +int _mi_numa_node_count = 0; // cache the node count + +int _mi_os_numa_node_count_get(void) { + if (mi_unlikely(_mi_numa_node_count <= 0)) { + int ncount = mi_os_numa_node_countx(); int ncount0 = ncount; // never more than max numa node and at least 1 int nmax = 1 + (int)mi_option_get(mi_option_max_numa_node); if (ncount > nmax) ncount = nmax; if (ncount <= 0) ncount = 1; - numa_node_count = ncount; - _mi_verbose_message("using %i numa regions (%i nodes detected)\n", numa_node_count, ncount0); + _mi_numa_node_count = ncount; + _mi_verbose_message("using %i numa regions (%i nodes detected)\n", _mi_numa_node_count, ncount0); } - mi_assert_internal(numa_node_count >= 1); - return numa_node_count; + mi_assert_internal(_mi_numa_node_count >= 1); + return _mi_numa_node_count; } -int _mi_os_numa_node(mi_os_tld_t* tld) { +int _mi_os_numa_node_get(mi_os_tld_t* tld) { UNUSED(tld); int numa_count = _mi_os_numa_node_count(); if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0 // never more than the node count and >= 0 int numa_node = mi_os_numa_nodex(); if (numa_node >= numa_count) { numa_node = numa_node % numa_count; } - if (numa_node < 0) numa_node = 0; + if (numa_node < 0) numa_node = 0; return numa_node; }