optimized numa calls; better Linux support

This commit is contained in:
daan 2019-11-01 22:01:52 -07:00
parent 57dd69265a
commit 2c12d7f223
7 changed files with 97 additions and 43 deletions

View File

@ -1,6 +1,8 @@
cmake_minimum_required(VERSION 3.0)
project(libmimalloc C CXX)
include("cmake/mimalloc-config-version.cmake")
include("CheckIncludeFile")
set(CMAKE_C_STANDARD 11)
set(CMAKE_CXX_STANDARD 17)
@ -88,6 +90,16 @@ if(MI_USE_CXX MATCHES "ON")
set_source_files_properties(src/static.c test/test-api.c PROPERTIES LANGUAGE CXX )
endif()
CHECK_INCLUDE_FILE("numaif.h" MI_HAVE_NUMA_H)
if(MI_HAVE_NUMA_H)
list(APPEND mi_defines MI_HAS_NUMA)
list(APPEND mi_libraries numa)
else()
if (NOT(WIN32))
message(WARNING "Compiling without using NUMA optimized allocation (on Linux, install libnuma-dev?)")
endif()
endif()
# Compiler flags
if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU")
list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas)

View File

@ -56,7 +56,7 @@ void _mi_os_init(void); // called fro
void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocate thread local data
void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data
size_t _mi_os_good_alloc_size(size_t size);
int _mi_os_numa_node(void);
int _mi_os_numa_node(mi_os_tld_t* tld);
// memory.c

View File

@ -413,6 +413,7 @@ typedef struct mi_segments_tld_s {
// OS thread local data
typedef struct mi_os_tld_s {
size_t region_idx; // start point for next allocation
int numa_node; // numa node associated with this thread
mi_stats_t* stats; // points to tld stats
} mi_os_tld_t;

View File

@ -267,7 +267,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
{
size_t asize = _mi_align_up(size, MI_ARENA_BLOCK_SIZE);
size_t bcount = asize / MI_ARENA_BLOCK_SIZE;
int numa_node = _mi_os_numa_node(); // current numa node
int numa_node = _mi_os_numa_node(tld); // current numa node
mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE);
// try numa affine allocation

View File

@ -99,7 +99,7 @@ static mi_tld_t tld_main = {
0, false,
&_mi_heap_main,
{ { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments
{ 0, tld_main_stats }, // os
{ 0, -1, tld_main_stats }, // os
{ MI_STATS_NULL } // stats
};
@ -218,6 +218,7 @@ static bool _mi_heap_init(void) {
memset(tld, 0, sizeof(*tld));
tld->heap_backing = heap;
tld->segments.stats = &tld->stats;
tld->os.numa_node = -1;
tld->os.stats = &tld->stats;
_mi_heap_default = heap;
}

View File

@ -211,7 +211,7 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
if (mi_atomic_cas_strong(&region->info, info, 0)) {
// update the region count
region->arena_memid = arena_memid;
mi_atomic_write(&region->numa_node, _mi_os_numa_node() + 1);
mi_atomic_write(&region->numa_node, _mi_os_numa_node(tld) + 1);
mi_atomic_increment(&regions_count);
}
else {
@ -220,7 +220,7 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
for(size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) {
if (mi_atomic_cas_strong(&regions[idx+i].info, info, 0)) {
regions[idx+i].arena_memid = arena_memid;
mi_atomic_write(&regions[idx+i].numa_node, _mi_os_numa_node() + 1);
mi_atomic_write(&regions[idx+i].numa_node, _mi_os_numa_node(tld) + 1);
mi_atomic_increment(&regions_count);
start = NULL;
break;
@ -430,7 +430,7 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l
mi_assert_internal(blocks > 0 && blocks <= 8*MI_INTPTR_SIZE);
// find a range of free blocks
int numa_node = _mi_os_numa_node();
int numa_node = _mi_os_numa_node(tld);
void* p = NULL;
size_t count = mi_atomic_read(&regions_count);
size_t idx = tld->region_idx; // start at 0 to reuse low addresses? Or, use tld->region_idx to reduce contention?

114
src/os.c
View File

@ -97,7 +97,7 @@ typedef NTSTATUS (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*
static PVirtualAlloc2 pVirtualAlloc2 = NULL;
static PNtAllocateVirtualMemoryEx pNtAllocateVirtualMemoryEx = NULL;
static bool mi_win_enable_large_os_pages()
static bool mi_win_enable_large_os_pages()
{
if (large_os_page_size > 0) return true;
@ -148,10 +148,10 @@ void _mi_os_init(void) {
FreeLibrary(hDll);
}
hDll = LoadLibrary(TEXT("ntdll.dll"));
if (hDll != NULL) {
if (hDll != NULL) {
pNtAllocateVirtualMemoryEx = (PNtAllocateVirtualMemoryEx)(void (*)(void))GetProcAddress(hDll, "NtAllocateVirtualMemoryEx");
FreeLibrary(hDll);
}
}
if (mi_option_is_enabled(mi_option_large_os_pages) || mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
mi_win_enable_large_os_pages();
}
@ -191,7 +191,7 @@ static bool mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats
#else
err = (munmap(addr, size) == -1);
#endif
if (was_committed) _mi_stat_decrease(&stats->committed, size);
if (was_committed) _mi_stat_decrease(&stats->committed, size);
_mi_stat_decrease(&stats->reserved, size);
if (err) {
#pragma warning(suppress:4996)
@ -207,14 +207,14 @@ static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size);
#ifdef _WIN32
static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment, DWORD flags) {
#if (MI_INTPTR_SIZE >= 8)
#if (MI_INTPTR_SIZE >= 8)
// on 64-bit systems, try to use the virtual address area after 4TiB for 4MiB aligned allocations
void* hint;
if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment,size)) != NULL) {
return VirtualAlloc(hint, size, flags, PAGE_READWRITE);
}
#endif
#if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS)
#if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS)
// on modern Windows try use VirtualAlloc2 for aligned allocation
if (try_alignment > 0 && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) {
MEM_ADDRESS_REQUIREMENTS reqs = { 0 };
@ -232,7 +232,7 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment,
mi_assert_internal(!(large_only && !allow_large));
static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0;
void* p = NULL;
if ((large_only || use_large_os_page(size, try_alignment))
if ((large_only || use_large_os_page(size, try_alignment))
&& allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0) {
uintptr_t try_ok = mi_atomic_read(&large_page_try_ok);
if (!large_only && try_ok > 0) {
@ -372,7 +372,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
}
if (p == NULL) {
*is_large = false;
p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, flags, fd);
p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, flags, fd);
#if defined(MADV_HUGEPAGE)
// Many Linux systems don't allow MAP_HUGETLB but they support instead
// transparent huge pages (THP). It is not required to call `madvise` with MADV_HUGE
@ -391,7 +391,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
}
#endif
// On 64-bit systems, we can do efficient aligned allocation by using
// On 64-bit systems, we can do efficient aligned allocation by using
// the 4TiB to 30TiB area to allocate them.
#if (MI_INTPTR_SIZE >= 8) && (defined(_WIN32) || (defined(MI_OS_USE_MMAP) && !defined(MAP_ALIGNED)))
static volatile _Atomic(intptr_t) aligned_base;
@ -785,14 +785,14 @@ bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize, mi_stats_t* stats) {
/* ----------------------------------------------------------------------------
Support for allocating huge OS pages (1Gib) that are reserved up-front
Support for allocating huge OS pages (1Gib) that are reserved up-front
and possibly associated with a specific NUMA node. (use `numa_node>=0`)
-----------------------------------------------------------------------------*/
#define MI_HUGE_OS_PAGE_SIZE (GiB)
#define MI_HUGE_OS_PAGE_SIZE (GiB)
#if defined(WIN32) && (MI_INTPTR_SIZE >= 8)
static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node)
{
static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node)
{
mi_assert_internal(size%GiB == 0);
#if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS)
@ -802,8 +802,8 @@ static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node)
reqs.HighestEndingAddress = NULL;
reqs.LowestStartingAddress = NULL;
reqs.Alignment = MI_SEGMENT_SIZE;
// on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages
// on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages
if (pNtAllocateVirtualMemoryEx != NULL) {
#ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE
#define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE (0x10)
@ -825,10 +825,10 @@ static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node)
return base;
}
else {
// fall back to regular huge pages
// fall back to regular huge pages
_mi_warning_message("unable to allocate using huge (1GiB) pages, trying large (2MiB) pages instead (error 0x%lx)\n", err);
}
}
}
// on modern Windows try use VirtualAlloc2 for aligned large OS page allocation
if (pVirtualAlloc2 != NULL) {
params[0].Type = MemExtendedParameterAddressRequirements;
@ -842,7 +842,7 @@ static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node)
return (*pVirtualAlloc2)(GetCurrentProcess(), NULL, size, flags, PAGE_READWRITE, params, param_count);
}
#endif
return NULL; // give up on older Windows..
return NULL; // give up on older Windows..
}
#elif defined(MI_OS_USE_MMAP) && (MI_INTPTR_SIZE >= 8)
#ifdef MI_HAS_NUMA
@ -853,7 +853,7 @@ static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) {
bool is_large = true;
void* p = mi_unix_mmap(NULL, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large);
if (p == NULL) return NULL;
#ifdef MI_HAS_NUMA
#ifdef MI_HAS_NUMA
if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) {
uintptr_t numa_mask = (1UL << numa_node);
long err = mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0);
@ -866,7 +866,7 @@ static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) {
#endif
return p;
}
#else
#else
static void* mi_os_alloc_huge_os_pagesx(size_t size, int numa_node) {
return NULL;
}
@ -884,12 +884,12 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, size_t* psize) {
}
#ifdef WIN32
static int mi_os_numa_nodex(void) {
static int mi_os_numa_nodex() {
PROCESSOR_NUMBER pnum;
USHORT numa_node = 0;
GetCurrentProcessorNumberEx(&pnum);
GetNumaProcessorNodeEx(&pnum,&numa_node);
return (int)numa_node;
return (int)numa_node;
}
static int mi_os_numa_node_countx(void) {
@ -898,12 +898,42 @@ static int mi_os_numa_node_countx(void) {
return (int)(numa_max + 1);
}
#elif MI_HAS_NUMA
#include <numa.h>
#include <dirent.h>
#include <stdlib.h>
#include <numaif.h>
static int mi_os_numa_nodex(void) {
return numa_preferred();
#define MI_MAX_MASK (4) // support at most 256 nodes
unsigned long mask[MI_MAX_MASK];
memset(mask,0,MI_MAX_MASK*sizeof(long));
int mode = 0;
long err = get_mempolicy(&mode, mask, MI_MAX_MASK*sizeof(long)*8, NULL, 0 /* thread policy */);
if (err != 0) return 0;
// find the lowest bit that is set
for(int i = 0; i < MI_MAX_MASK; i++) {
for(int j = 0; j < (int)(sizeof(long)*8); j++) {
if ((mask[i] & (1UL << j)) != 0) {
return (i*sizeof(long)*8 + j);
}
}
}
return 0;
}
static int mi_os_numa_node_countx(void) {
return (numa_max_node() + 1);
DIR* d = opendir("/sys/devices/system/node");
if (d==NULL) return 1;
struct dirent* de;
int max_node_num = 0;
while ((de = readdir(d)) != NULL) {
int node_num;
if (strncmp(de->d_name, "node", 4) == 0) {
node_num = (int)strtol(de->d_name+4, NULL, 0);
if (max_node_num < node_num) max_node_num = node_num;
}
}
closedir(d);
return (max_node_num + 1);
}
#else
static int mi_os_numa_nodex(void) {
@ -915,18 +945,28 @@ static int mi_os_numa_node_countx(void) {
#endif
int _mi_os_numa_node_count(void) {
long ncount = mi_os_numa_node_countx();
// never more than max numa node and at least 1
long nmax = 1 + mi_option_get(mi_option_max_numa_node);
if (ncount > nmax) ncount = nmax;
if (ncount <= 0) ncount = 1;
return ncount;
static int numa_node_count = 0;
if (mi_unlikely(numa_node_count <= 0)) {
int ncount = mi_os_numa_node_countx();
// never more than max numa node and at least 1
int nmax = 1 + (int)mi_option_get(mi_option_max_numa_node);
if (ncount > nmax) ncount = nmax;
if (ncount <= 0) ncount = 1;
numa_node_count = ncount;
}
mi_assert_internal(numa_node_count >= 1);
return numa_node_count;
}
int _mi_os_numa_node(void) {
int nnode = mi_os_numa_nodex();
// never more than the node count
int ncount = _mi_os_numa_node_count();
if (nnode >= ncount) { nnode = nnode % ncount; }
return nnode;
int _mi_os_numa_node(mi_os_tld_t* tld) {
if (mi_unlikely(tld->numa_node < 0)) {
int nnode = mi_os_numa_nodex();
// never more than the node count
int ncount = _mi_os_numa_node_count();
if (nnode >= ncount) { nnode = nnode % ncount; }
if (nnode < 0) nnode = 0;
tld->numa_node = nnode;
}
mi_assert_internal(tld->numa_node >= 0 && tld->numa_node < _mi_os_numa_node_count());
return tld->numa_node;
}