optimized numa calls; better Linux support
This commit is contained in:
parent
57dd69265a
commit
2c12d7f223
@ -1,6 +1,8 @@
|
||||
cmake_minimum_required(VERSION 3.0)
|
||||
project(libmimalloc C CXX)
|
||||
include("cmake/mimalloc-config-version.cmake")
|
||||
include("CheckIncludeFile")
|
||||
|
||||
set(CMAKE_C_STANDARD 11)
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
@ -88,6 +90,16 @@ if(MI_USE_CXX MATCHES "ON")
|
||||
set_source_files_properties(src/static.c test/test-api.c PROPERTIES LANGUAGE CXX )
|
||||
endif()
|
||||
|
||||
CHECK_INCLUDE_FILE("numaif.h" MI_HAVE_NUMA_H)
|
||||
if(MI_HAVE_NUMA_H)
|
||||
list(APPEND mi_defines MI_HAS_NUMA)
|
||||
list(APPEND mi_libraries numa)
|
||||
else()
|
||||
if (NOT(WIN32))
|
||||
message(WARNING "Compiling without using NUMA optimized allocation (on Linux, install libnuma-dev?)")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Compiler flags
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU")
|
||||
list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas)
|
||||
|
@ -56,7 +56,7 @@ void _mi_os_init(void); // called fro
|
||||
void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocate thread local data
|
||||
void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data
|
||||
size_t _mi_os_good_alloc_size(size_t size);
|
||||
int _mi_os_numa_node(void);
|
||||
int _mi_os_numa_node(mi_os_tld_t* tld);
|
||||
|
||||
|
||||
// memory.c
|
||||
|
@ -413,6 +413,7 @@ typedef struct mi_segments_tld_s {
|
||||
// OS thread local data
|
||||
typedef struct mi_os_tld_s {
|
||||
size_t region_idx; // start point for next allocation
|
||||
int numa_node; // numa node associated with this thread
|
||||
mi_stats_t* stats; // points to tld stats
|
||||
} mi_os_tld_t;
|
||||
|
||||
|
@ -267,7 +267,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
|
||||
{
|
||||
size_t asize = _mi_align_up(size, MI_ARENA_BLOCK_SIZE);
|
||||
size_t bcount = asize / MI_ARENA_BLOCK_SIZE;
|
||||
int numa_node = _mi_os_numa_node(); // current numa node
|
||||
int numa_node = _mi_os_numa_node(tld); // current numa node
|
||||
|
||||
mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE);
|
||||
// try numa affine allocation
|
||||
|
@ -99,7 +99,7 @@ static mi_tld_t tld_main = {
|
||||
0, false,
|
||||
&_mi_heap_main,
|
||||
{ { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments
|
||||
{ 0, tld_main_stats }, // os
|
||||
{ 0, -1, tld_main_stats }, // os
|
||||
{ MI_STATS_NULL } // stats
|
||||
};
|
||||
|
||||
@ -218,6 +218,7 @@ static bool _mi_heap_init(void) {
|
||||
memset(tld, 0, sizeof(*tld));
|
||||
tld->heap_backing = heap;
|
||||
tld->segments.stats = &tld->stats;
|
||||
tld->os.numa_node = -1;
|
||||
tld->os.stats = &tld->stats;
|
||||
_mi_heap_default = heap;
|
||||
}
|
||||
|
@ -211,7 +211,7 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
|
||||
if (mi_atomic_cas_strong(®ion->info, info, 0)) {
|
||||
// update the region count
|
||||
region->arena_memid = arena_memid;
|
||||
mi_atomic_write(®ion->numa_node, _mi_os_numa_node() + 1);
|
||||
mi_atomic_write(®ion->numa_node, _mi_os_numa_node(tld) + 1);
|
||||
mi_atomic_increment(®ions_count);
|
||||
}
|
||||
else {
|
||||
@ -220,7 +220,7 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
|
||||
for(size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) {
|
||||
if (mi_atomic_cas_strong(®ions[idx+i].info, info, 0)) {
|
||||
regions[idx+i].arena_memid = arena_memid;
|
||||
mi_atomic_write(®ions[idx+i].numa_node, _mi_os_numa_node() + 1);
|
||||
mi_atomic_write(®ions[idx+i].numa_node, _mi_os_numa_node(tld) + 1);
|
||||
mi_atomic_increment(®ions_count);
|
||||
start = NULL;
|
||||
break;
|
||||
@ -430,7 +430,7 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l
|
||||
mi_assert_internal(blocks > 0 && blocks <= 8*MI_INTPTR_SIZE);
|
||||
|
||||
// find a range of free blocks
|
||||
int numa_node = _mi_os_numa_node();
|
||||
int numa_node = _mi_os_numa_node(tld);
|
||||
void* p = NULL;
|
||||
size_t count = mi_atomic_read(®ions_count);
|
||||
size_t idx = tld->region_idx; // start at 0 to reuse low addresses? Or, use tld->region_idx to reduce contention?
|
||||
|
58
src/os.c
58
src/os.c
@ -884,7 +884,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, size_t* psize) {
|
||||
}
|
||||
|
||||
#ifdef WIN32
|
||||
static int mi_os_numa_nodex(void) {
|
||||
static int mi_os_numa_nodex() {
|
||||
PROCESSOR_NUMBER pnum;
|
||||
USHORT numa_node = 0;
|
||||
GetCurrentProcessorNumberEx(&pnum);
|
||||
@ -898,12 +898,42 @@ static int mi_os_numa_node_countx(void) {
|
||||
return (int)(numa_max + 1);
|
||||
}
|
||||
#elif MI_HAS_NUMA
|
||||
#include <numa.h>
|
||||
#include <dirent.h>
|
||||
#include <stdlib.h>
|
||||
#include <numaif.h>
|
||||
static int mi_os_numa_nodex(void) {
|
||||
return numa_preferred();
|
||||
#define MI_MAX_MASK (4) // support at most 256 nodes
|
||||
unsigned long mask[MI_MAX_MASK];
|
||||
memset(mask,0,MI_MAX_MASK*sizeof(long));
|
||||
int mode = 0;
|
||||
long err = get_mempolicy(&mode, mask, MI_MAX_MASK*sizeof(long)*8, NULL, 0 /* thread policy */);
|
||||
if (err != 0) return 0;
|
||||
// find the lowest bit that is set
|
||||
for(int i = 0; i < MI_MAX_MASK; i++) {
|
||||
for(int j = 0; j < (int)(sizeof(long)*8); j++) {
|
||||
if ((mask[i] & (1UL << j)) != 0) {
|
||||
return (i*sizeof(long)*8 + j);
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mi_os_numa_node_countx(void) {
|
||||
return (numa_max_node() + 1);
|
||||
DIR* d = opendir("/sys/devices/system/node");
|
||||
if (d==NULL) return 1;
|
||||
|
||||
struct dirent* de;
|
||||
int max_node_num = 0;
|
||||
while ((de = readdir(d)) != NULL) {
|
||||
int node_num;
|
||||
if (strncmp(de->d_name, "node", 4) == 0) {
|
||||
node_num = (int)strtol(de->d_name+4, NULL, 0);
|
||||
if (max_node_num < node_num) max_node_num = node_num;
|
||||
}
|
||||
}
|
||||
closedir(d);
|
||||
return (max_node_num + 1);
|
||||
}
|
||||
#else
|
||||
static int mi_os_numa_nodex(void) {
|
||||
@ -915,18 +945,28 @@ static int mi_os_numa_node_countx(void) {
|
||||
#endif
|
||||
|
||||
int _mi_os_numa_node_count(void) {
|
||||
long ncount = mi_os_numa_node_countx();
|
||||
static int numa_node_count = 0;
|
||||
if (mi_unlikely(numa_node_count <= 0)) {
|
||||
int ncount = mi_os_numa_node_countx();
|
||||
// never more than max numa node and at least 1
|
||||
long nmax = 1 + mi_option_get(mi_option_max_numa_node);
|
||||
int nmax = 1 + (int)mi_option_get(mi_option_max_numa_node);
|
||||
if (ncount > nmax) ncount = nmax;
|
||||
if (ncount <= 0) ncount = 1;
|
||||
return ncount;
|
||||
numa_node_count = ncount;
|
||||
}
|
||||
mi_assert_internal(numa_node_count >= 1);
|
||||
return numa_node_count;
|
||||
}
|
||||
|
||||
int _mi_os_numa_node(void) {
|
||||
int _mi_os_numa_node(mi_os_tld_t* tld) {
|
||||
if (mi_unlikely(tld->numa_node < 0)) {
|
||||
int nnode = mi_os_numa_nodex();
|
||||
// never more than the node count
|
||||
int ncount = _mi_os_numa_node_count();
|
||||
if (nnode >= ncount) { nnode = nnode % ncount; }
|
||||
return nnode;
|
||||
if (nnode < 0) nnode = 0;
|
||||
tld->numa_node = nnode;
|
||||
}
|
||||
mi_assert_internal(tld->numa_node >= 0 && tld->numa_node < _mi_os_numa_node_count());
|
||||
return tld->numa_node;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user