From fd9d8c85ae40db95feb51da6e5478850bc6722fc Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 2 Nov 2019 11:55:03 -0700 Subject: [PATCH] change numa support on linux to use getcpu --- include/mimalloc-types.h | 1 - src/init.c | 5 ++--- src/os.c | 45 +++++++++++----------------------------- 3 files changed, 14 insertions(+), 37 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 0208d5c7..99b6b22b 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -413,7 +413,6 @@ typedef struct mi_segments_tld_s { // OS thread local data typedef struct mi_os_tld_s { size_t region_idx; // start point for next allocation - int numa_node; // numa node associated with this thread mi_stats_t* stats; // points to tld stats } mi_os_tld_t; diff --git a/src/init.c b/src/init.c index 166ca451..ed15aeba 100644 --- a/src/init.c +++ b/src/init.c @@ -99,8 +99,8 @@ static mi_tld_t tld_main = { 0, false, &_mi_heap_main, { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments - { 0, -1, tld_main_stats }, // os - { MI_STATS_NULL } // stats + { 0, tld_main_stats }, // os + { MI_STATS_NULL } // stats }; mi_heap_t _mi_heap_main = { @@ -218,7 +218,6 @@ static bool _mi_heap_init(void) { memset(tld, 0, sizeof(*tld)); tld->heap_backing = heap; tld->segments.stats = &tld->stats; - tld->os.numa_node = -1; tld->os.stats = &tld->stats; _mi_heap_default = heap; } diff --git a/src/os.c b/src/os.c index c41d028f..8e1b3e91 100644 --- a/src/os.c +++ b/src/os.c @@ -903,29 +903,21 @@ static int mi_os_numa_node_countx(void) { GetNumaHighestNodeNumber(&numa_max); return (int)(numa_max + 1); } -#elif MI_HAS_NUMA +#elif defined(__linux__) #include #include -#include +#include + static int mi_os_numa_nodex(void) { - #define MI_NUMA_NODE_SLOW // too slow, so cache it - // TODO: perhaps use RDTSCP instruction on x64? - // see - #define MI_MAX_MASK (4) // support at most 256 nodes - unsigned long mask[MI_MAX_MASK]; - memset(mask,0,MI_MAX_MASK*sizeof(long)); - int mode = 0; - long err = get_mempolicy(&mode, mask, MI_MAX_MASK*sizeof(long)*8, NULL, 0 /* thread policy */); +#ifdef SYS_getcpu + unsigned node = 0; + unsigned ncpu = 0; + int err = syscall(SYS_getcpu, &ncpu, &node, NULL); if (err != 0) return 0; - // find the lowest bit that is set - for(int i = 0; i < MI_MAX_MASK; i++) { - for(int j = 0; j < (int)(sizeof(long)*8); j++) { - if ((mask[i] & (1UL << j)) != 0) { - return (i*sizeof(long)*8 + j); - } - } - } - return 0; + return (int)node; +#else + return 0; +#endif } static int mi_os_numa_node_countx(void) { @@ -968,21 +960,8 @@ int _mi_os_numa_node_count(void) { } int _mi_os_numa_node(mi_os_tld_t* tld) { - int numa_node; -#ifndef MI_NUMA_NODE_SLOW UNUSED(tld); - numa_node = mi_os_numa_nodex(); -#else - if (mi_unlikely(tld->numa_node < 0)) { - // Cache the NUMA node of the thread if the call is slow. - // This may not be correct as threads can migrate to another cpu on - // another node -- however, for memory allocation this just means we keep - // using the same 'node id' for its allocations; new OS allocations - // naturally come from the actual node so in practice this may be fine. - tld->numa_node = mi_os_numa_nodex(); - } - numa_node = tld->numa_node; -#endif + int numa_node = mi_os_numa_nodex(); // never more than the node count and >= 0 int numa_count = _mi_os_numa_node_count(); if (numa_node >= numa_count) { numa_node = numa_node % numa_count; }