avoid allocation at numa node detection on linux

This commit is contained in:
daan 2019-11-12 10:16:59 -08:00
parent 165ee45845
commit ef179a6377
2 changed files with 56 additions and 46 deletions

View File

@ -56,8 +56,6 @@ void _mi_os_init(void); // called fro
void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocate thread local data void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocate thread local data
void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data
size_t _mi_os_good_alloc_size(size_t size); size_t _mi_os_good_alloc_size(size_t size);
int _mi_os_numa_node(mi_os_tld_t* tld);
int _mi_os_numa_node_count(void);
// memory.c // memory.c
void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* id, mi_os_tld_t* tld); void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* id, mi_os_tld_t* tld);
@ -433,6 +431,25 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c
#endif #endif
} }
// -------------------------------------------------------------------
// Optimize numa node access for the common case (= one node)
// -------------------------------------------------------------------
int _mi_os_numa_node_get(mi_os_tld_t* tld);
int _mi_os_numa_node_count_get(void);
extern int _mi_numa_node_count;
static inline int _mi_os_numa_node(mi_os_tld_t* tld) {
if (mi_likely(_mi_numa_node_count == 1)) return 0;
else return _mi_os_numa_node_get(tld);
}
static inline int _mi_os_numa_node_count(void) {
if (mi_likely(_mi_numa_node_count>0)) return _mi_numa_node_count;
else return _mi_os_numa_node_count_get();
}
// ------------------------------------------------------------------- // -------------------------------------------------------------------
// Getting the thread id should be performant // Getting the thread id should be performant
// as it is called in the fast path of `_mi_free`, // as it is called in the fast path of `_mi_free`,

View File

@ -975,9 +975,8 @@ static int mi_os_numa_node_countx(void) {
return (int)(numa_max + 1); return (int)(numa_max + 1);
} }
#elif defined(__linux__) #elif defined(__linux__)
#include <dirent.h> #include <sys/syscall.h> // getcpu
#include <stdlib.h> #include <stdio.h> // access
#include <sys/syscall.h>
static int mi_os_numa_nodex(void) { static int mi_os_numa_nodex(void) {
#ifdef SYS_getcpu #ifdef SYS_getcpu
@ -990,22 +989,15 @@ static int mi_os_numa_nodex(void) {
return 0; return 0;
#endif #endif
} }
static int mi_os_numa_node_countx(void) { static int mi_os_numa_node_countx(void) {
DIR* d = opendir("/sys/devices/system/node"); char buf[128];
if (d==NULL) return 1; int max_node = mi_option_get(mi_option_max_numa_node);
int node = 0;
struct dirent* de; for(node = 0; node < max_node; node++) {
int max_node_num = 0; snprintf(buf, 127, "/sys/devices/system/node/node%i", node + 1);
while ((de = readdir(d)) != NULL) { if (access(buf,R_OK) != 0) break;
int node_num;
if (strncmp(de->d_name, "node", 4) == 0) {
node_num = (int)strtol(de->d_name+4, NULL, 0);
if (max_node_num < node_num) max_node_num = node_num;
}
} }
closedir(d); return (node+1);
return (max_node_num + 1);
} }
#else #else
static int mi_os_numa_nodex(void) { static int mi_os_numa_nodex(void) {
@ -1016,23 +1008,24 @@ static int mi_os_numa_node_countx(void) {
} }
#endif #endif
int _mi_os_numa_node_count(void) { int _mi_numa_node_count = 0; // cache the node count
static int numa_node_count = 0; // cache the node count
if (mi_unlikely(numa_node_count <= 0)) { int _mi_os_numa_node_count_get(void) {
if (mi_unlikely(_mi_numa_node_count <= 0)) {
int ncount = mi_os_numa_node_countx(); int ncount = mi_os_numa_node_countx();
int ncount0 = ncount; int ncount0 = ncount;
// never more than max numa node and at least 1 // never more than max numa node and at least 1
int nmax = 1 + (int)mi_option_get(mi_option_max_numa_node); int nmax = 1 + (int)mi_option_get(mi_option_max_numa_node);
if (ncount > nmax) ncount = nmax; if (ncount > nmax) ncount = nmax;
if (ncount <= 0) ncount = 1; if (ncount <= 0) ncount = 1;
numa_node_count = ncount; _mi_numa_node_count = ncount;
_mi_verbose_message("using %i numa regions (%i nodes detected)\n", numa_node_count, ncount0); _mi_verbose_message("using %i numa regions (%i nodes detected)\n", _mi_numa_node_count, ncount0);
} }
mi_assert_internal(numa_node_count >= 1); mi_assert_internal(_mi_numa_node_count >= 1);
return numa_node_count; return _mi_numa_node_count;
} }
int _mi_os_numa_node(mi_os_tld_t* tld) { int _mi_os_numa_node_get(mi_os_tld_t* tld) {
UNUSED(tld); UNUSED(tld);
int numa_count = _mi_os_numa_node_count(); int numa_count = _mi_os_numa_node_count();
if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0 if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0