change numa support on linux to use getcpu

This commit is contained in:
daan 2019-11-02 11:55:03 -07:00
parent 70748ee1ee
commit fd9d8c85ae
3 changed files with 14 additions and 37 deletions

View File

@ -413,7 +413,6 @@ typedef struct mi_segments_tld_s {
// OS thread local data
typedef struct mi_os_tld_s {
size_t region_idx; // start point for next allocation
int numa_node; // numa node associated with this thread
mi_stats_t* stats; // points to tld stats
} mi_os_tld_t;

View File

@ -99,8 +99,8 @@ static mi_tld_t tld_main = {
0, false,
&_mi_heap_main,
{ { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments
{ 0, -1, tld_main_stats }, // os
{ MI_STATS_NULL } // stats
{ 0, tld_main_stats }, // os
{ MI_STATS_NULL } // stats
};
mi_heap_t _mi_heap_main = {
@ -218,7 +218,6 @@ static bool _mi_heap_init(void) {
memset(tld, 0, sizeof(*tld));
tld->heap_backing = heap;
tld->segments.stats = &tld->stats;
tld->os.numa_node = -1;
tld->os.stats = &tld->stats;
_mi_heap_default = heap;
}

View File

@ -903,29 +903,21 @@ static int mi_os_numa_node_countx(void) {
GetNumaHighestNodeNumber(&numa_max);
return (int)(numa_max + 1);
}
#elif MI_HAS_NUMA
#elif defined(__linux__)
#include <dirent.h>
#include <stdlib.h>
#include <numaif.h>
#include <sys/syscall.h>
static int mi_os_numa_nodex(void) {
#define MI_NUMA_NODE_SLOW // too slow, so cache it
// TODO: perhaps use RDTSCP instruction on x64?
// see <https://stackoverflow.com/questions/16862620/numa-get-current-node-core>
#define MI_MAX_MASK (4) // support at most 256 nodes
unsigned long mask[MI_MAX_MASK];
memset(mask,0,MI_MAX_MASK*sizeof(long));
int mode = 0;
long err = get_mempolicy(&mode, mask, MI_MAX_MASK*sizeof(long)*8, NULL, 0 /* thread policy */);
#ifdef SYS_getcpu
unsigned node = 0;
unsigned ncpu = 0;
int err = syscall(SYS_getcpu, &ncpu, &node, NULL);
if (err != 0) return 0;
// find the lowest bit that is set
for(int i = 0; i < MI_MAX_MASK; i++) {
for(int j = 0; j < (int)(sizeof(long)*8); j++) {
if ((mask[i] & (1UL << j)) != 0) {
return (i*sizeof(long)*8 + j);
}
}
}
return 0;
return (int)node;
#else
return 0;
#endif
}
static int mi_os_numa_node_countx(void) {
@ -968,21 +960,8 @@ int _mi_os_numa_node_count(void) {
}
int _mi_os_numa_node(mi_os_tld_t* tld) {
int numa_node;
#ifndef MI_NUMA_NODE_SLOW
UNUSED(tld);
numa_node = mi_os_numa_nodex();
#else
if (mi_unlikely(tld->numa_node < 0)) {
// Cache the NUMA node of the thread if the call is slow.
// This may not be correct as threads can migrate to another cpu on
// another node -- however, for memory allocation this just means we keep
// using the same 'node id' for its allocations; new OS allocations
// naturally come from the actual node so in practice this may be fine.
tld->numa_node = mi_os_numa_nodex();
}
numa_node = tld->numa_node;
#endif
int numa_node = mi_os_numa_nodex();
// never more than the node count and >= 0
int numa_count = _mi_os_numa_node_count();
if (numa_node >= numa_count) { numa_node = numa_node % numa_count; }