merge dev-exp
This commit is contained in:
commit
181bef382c
@ -10,6 +10,7 @@ option(MI_SEE_ASM "Generate assembly files" OFF)
|
||||
option(MI_CHECK_FULL "Use full internal invariant checking in DEBUG mode" OFF)
|
||||
option(MI_USE_CXX "Use the C++ compiler to compile the library" OFF)
|
||||
option(MI_SECURE "Use security mitigations (like guard pages and randomization)" OFF)
|
||||
option(MI_SECURE_FULL "Use full security mitigations, may be more expensive (includes double-free mitigation)" OFF)
|
||||
option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanism (Unix)" OFF)
|
||||
option(MI_BUILD_TESTS "Build test executables" ON)
|
||||
|
||||
@ -70,9 +71,15 @@ if(MI_OVERRIDE MATCHES "ON")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(MI_SECURE MATCHES "ON")
|
||||
if(MI_SECURE_FULL MATCHES "ON")
|
||||
message(STATUS "Set full secure build (may be more expensive) (MI_SECURE_FULL=ON)")
|
||||
list(APPEND mi_defines MI_SECURE=4)
|
||||
set(MI_SECURE "ON")
|
||||
else()
|
||||
if(MI_SECURE MATCHES "ON")
|
||||
message(STATUS "Set secure build (MI_SECURE=ON)")
|
||||
list(APPEND mi_defines MI_SECURE=3)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(MI_SEE_ASM MATCHES "ON")
|
||||
|
@ -123,7 +123,7 @@
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
<AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);</PreprocessorDefinitions>
|
||||
<PreprocessorDefinitions>MI_DEBUG=3;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);</PreprocessorDefinitions>
|
||||
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
|
||||
<SupportJustMyCode>false</SupportJustMyCode>
|
||||
<CompileAs>Default</CompileAs>
|
||||
@ -232,6 +232,9 @@
|
||||
<ClCompile Include="..\..\src\alloc-posix.c" />
|
||||
<ClCompile Include="..\..\src\alloc.c" />
|
||||
<ClCompile Include="..\..\src\arena.c" />
|
||||
<ClCompile Include="..\..\src\bitmap.inc.c">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\src\heap.c" />
|
||||
<ClCompile Include="..\..\src\init.c" />
|
||||
<ClCompile Include="..\..\src\memory.c" />
|
||||
|
@ -116,7 +116,7 @@
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
<AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>MI_DEBUG=1;%(PreprocessorDefinitions);</PreprocessorDefinitions>
|
||||
<PreprocessorDefinitions>MI_DEBUG=3;%(PreprocessorDefinitions);</PreprocessorDefinitions>
|
||||
<CompileAs>CompileAsCpp</CompileAs>
|
||||
<SupportJustMyCode>false</SupportJustMyCode>
|
||||
<LanguageStandard>stdcpp17</LanguageStandard>
|
||||
@ -218,6 +218,9 @@
|
||||
<ClCompile Include="..\..\src\alloc-posix.c" />
|
||||
<ClCompile Include="..\..\src\alloc.c" />
|
||||
<ClCompile Include="..\..\src\arena.c" />
|
||||
<ClCompile Include="..\..\src\bitmap.inc.c">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\src\heap.c" />
|
||||
<ClCompile Include="..\..\src\init.c" />
|
||||
<ClCompile Include="..\..\src\memory.c" />
|
||||
|
@ -36,6 +36,13 @@ static inline void mi_atomic_add64(volatile int64_t* p, int64_t add);
|
||||
// Atomically add a value; returns the previous value. Memory ordering is relaxed.
|
||||
static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add);
|
||||
|
||||
// Atomically "and" a value; returns the previous value. Memory ordering is relaxed.
|
||||
static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x);
|
||||
|
||||
// Atomically "or" a value; returns the previous value. Memory ordering is relaxed.
|
||||
static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x);
|
||||
|
||||
|
||||
// Atomically compare and exchange a value; returns `true` if successful.
|
||||
// May fail spuriously. Memory ordering as release on success, and relaxed on failure.
|
||||
// (Note: expected and desired are in opposite order from atomic_compare_exchange)
|
||||
@ -121,22 +128,28 @@ static inline void* mi_atomic_exchange_ptr(volatile _Atomic(void*)* p, void* exc
|
||||
#include <intrin.h>
|
||||
#ifdef _WIN64
|
||||
typedef LONG64 msc_intptr_t;
|
||||
#define RC64(f) f##64
|
||||
#define MI_64(f) f##64
|
||||
#else
|
||||
typedef LONG msc_intptr_t;
|
||||
#define RC64(f) f
|
||||
#define MI_64(f) f
|
||||
#endif
|
||||
static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add) {
|
||||
return (intptr_t)RC64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add);
|
||||
return (intptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add);
|
||||
}
|
||||
static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x) {
|
||||
return (uintptr_t)MI_64(_InterlockedAnd)((volatile msc_intptr_t*)p, (msc_intptr_t)x);
|
||||
}
|
||||
static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x) {
|
||||
return (uintptr_t)MI_64(_InterlockedOr)((volatile msc_intptr_t*)p, (msc_intptr_t)x);
|
||||
}
|
||||
static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) {
|
||||
return (expected == (uintptr_t)RC64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)expected));
|
||||
return (expected == (uintptr_t)MI_64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)expected));
|
||||
}
|
||||
static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) {
|
||||
return mi_atomic_cas_strong(p,desired,expected);
|
||||
}
|
||||
static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange) {
|
||||
return (uintptr_t)RC64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange);
|
||||
return (uintptr_t)MI_64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange);
|
||||
}
|
||||
static inline uintptr_t mi_atomic_read(volatile _Atomic(uintptr_t) const* p) {
|
||||
return *p;
|
||||
@ -177,6 +190,14 @@ static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add
|
||||
MI_USING_STD
|
||||
return atomic_fetch_add_explicit(p, add, memory_order_relaxed);
|
||||
}
|
||||
static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x) {
|
||||
MI_USING_STD
|
||||
return atomic_fetch_and_explicit(p, x, memory_order_relaxed);
|
||||
}
|
||||
static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x) {
|
||||
MI_USING_STD
|
||||
return atomic_fetch_or_explicit(p, x, memory_order_relaxed);
|
||||
}
|
||||
static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) {
|
||||
MI_USING_STD
|
||||
return atomic_compare_exchange_weak_explicit(p, &expected, desired, memory_order_release, memory_order_relaxed);
|
||||
|
@ -162,7 +162,6 @@ bool _mi_page_is_valid(mi_page_t* page);
|
||||
|
||||
|
||||
// Overflow detecting multiply
|
||||
#define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX)
|
||||
static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) {
|
||||
#if __has_builtin(__builtin_umul_overflow) || __GNUC__ >= 5
|
||||
#include <limits.h> // UINT_MAX, ULONG_MAX
|
||||
@ -174,6 +173,7 @@ static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) {
|
||||
return __builtin_umulll_overflow(count, size, total);
|
||||
#endif
|
||||
#else /* __builtin_umul_overflow is unavailable */
|
||||
#define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX)
|
||||
*total = count * size;
|
||||
return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW)
|
||||
&& size > 0 && (SIZE_MAX / size) < count);
|
||||
@ -187,6 +187,7 @@ static inline bool _mi_is_power_of_two(uintptr_t x) {
|
||||
|
||||
// Align upwards
|
||||
static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) {
|
||||
mi_assert_internal(alignment != 0);
|
||||
uintptr_t mask = alignment - 1;
|
||||
if ((alignment & mask) == 0) { // power of two?
|
||||
return ((sz + mask) & ~mask);
|
||||
@ -196,6 +197,12 @@ static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) {
|
||||
}
|
||||
}
|
||||
|
||||
// Divide upwards: `s <= _mi_divide_up(s,d)*d < s+d`.
|
||||
static inline uintptr_t _mi_divide_up(uintptr_t size, size_t divider) {
|
||||
mi_assert_internal(divider != 0);
|
||||
return (divider == 0 ? size : ((size + divider - 1) / divider));
|
||||
}
|
||||
|
||||
// Is memory zero initialized?
|
||||
static inline bool mi_mem_is_zero(void* p, size_t size) {
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
@ -282,7 +289,7 @@ static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) {
|
||||
static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) {
|
||||
// if (segment->page_size > MI_SEGMENT_SIZE) return &segment->pages[0]; // huge pages
|
||||
ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment;
|
||||
mi_assert_internal(diff >= 0 && diff < MI_SEGMENT_SIZE);
|
||||
mi_assert_internal(diff >= 0 && (size_t)diff < MI_SEGMENT_SIZE);
|
||||
uintptr_t idx = (uintptr_t)diff >> segment->page_shift;
|
||||
mi_assert_internal(idx < segment->capacity);
|
||||
mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM || idx == 0);
|
||||
|
@ -26,7 +26,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
// #define MI_SECURE 1 // guard page around metadata
|
||||
// #define MI_SECURE 2 // guard page around each mimalloc page
|
||||
// #define MI_SECURE 3 // encode free lists (detect corrupted free list (buffer overflow), and invalid pointer free)
|
||||
// #define MI_SECURE 4 // experimental, may be more expensive: checks for double free.
|
||||
// #define MI_SECURE 4 // experimental, may be more expensive: checks for double free. (cmake -DMI_SECURE_FULL=ON)
|
||||
|
||||
#if !defined(MI_SECURE)
|
||||
#define MI_SECURE 0
|
||||
@ -35,7 +35,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
// Define MI_DEBUG for debug mode
|
||||
// #define MI_DEBUG 1 // basic assertion checks and statistics, check double free, corrupted free list, and invalid pointer free.
|
||||
// #define MI_DEBUG 2 // + internal assertion checks
|
||||
// #define MI_DEBUG 3 // + extensive internal invariant checking
|
||||
// #define MI_DEBUG 3 // + extensive internal invariant checking (cmake -DMI_CHECK_FULL=ON)
|
||||
#if !defined(MI_DEBUG)
|
||||
#if !defined(NDEBUG) || defined(_DEBUG)
|
||||
#define MI_DEBUG 2
|
||||
@ -93,12 +93,12 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
#define MI_SEGMENT_SHIFT ( MI_LARGE_PAGE_SHIFT) // 4mb
|
||||
|
||||
// Derived constants
|
||||
#define MI_SEGMENT_SIZE (1<<MI_SEGMENT_SHIFT)
|
||||
#define MI_SEGMENT_SIZE (1UL<<MI_SEGMENT_SHIFT)
|
||||
#define MI_SEGMENT_MASK ((uintptr_t)MI_SEGMENT_SIZE - 1)
|
||||
|
||||
#define MI_SMALL_PAGE_SIZE (1<<MI_SMALL_PAGE_SHIFT)
|
||||
#define MI_MEDIUM_PAGE_SIZE (1<<MI_MEDIUM_PAGE_SHIFT)
|
||||
#define MI_LARGE_PAGE_SIZE (1<<MI_LARGE_PAGE_SHIFT)
|
||||
#define MI_SMALL_PAGE_SIZE (1UL<<MI_SMALL_PAGE_SHIFT)
|
||||
#define MI_MEDIUM_PAGE_SIZE (1UL<<MI_MEDIUM_PAGE_SHIFT)
|
||||
#define MI_LARGE_PAGE_SIZE (1UL<<MI_LARGE_PAGE_SHIFT)
|
||||
|
||||
#define MI_SMALL_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_SMALL_PAGE_SIZE)
|
||||
#define MI_MEDIUM_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_MEDIUM_PAGE_SIZE)
|
||||
|
@ -230,8 +230,8 @@ mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_b
|
||||
mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept;
|
||||
mi_decl_export bool mi_is_redirected() mi_attr_noexcept;
|
||||
|
||||
mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept;
|
||||
mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept;
|
||||
mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages, size_t timeout_msecs) mi_attr_noexcept;
|
||||
mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept;
|
||||
|
||||
// deprecated
|
||||
mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept;
|
||||
|
287
src/arena.c
287
src/arena.c
@ -7,15 +7,23 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
"Arenas" are fixed area's of OS memory from which we can allocate
|
||||
large blocks (>= MI_ARENA_BLOCK_SIZE, 16MiB). Currently only used to
|
||||
allocate in one arena consisting of huge OS pages -- otherwise it
|
||||
delegates to direct allocation from the OS.
|
||||
large blocks (>= MI_ARENA_BLOCK_SIZE, 32MiB).
|
||||
In contrast to the rest of mimalloc, the arenas are shared between
|
||||
threads and need to be accessed using atomic operations.
|
||||
|
||||
In the future, we can expose an API to manually add more arenas which
|
||||
is sometimes needed for embedded devices or shared memory for example.
|
||||
Currently arenas are only used to for huge OS page (1GiB) reservations,
|
||||
otherwise it delegates to direct allocation from the OS.
|
||||
In the future, we can expose an API to manually add more kinds of arenas
|
||||
which is sometimes needed for embedded devices or shared memory for example.
|
||||
(We can also employ this with WASI or `sbrk` systems to reserve large arenas
|
||||
on demand and be able to reuse them efficiently).
|
||||
|
||||
The arena allocation needs to be thread safe and we use a lock-free scan
|
||||
with on-demand coalescing.
|
||||
The arena allocation needs to be thread safe and we use an atomic
|
||||
bitmap to allocate. The current implementation of the bitmap can
|
||||
only do this within a field (`uintptr_t`) so we can allocate at most
|
||||
blocks of 2GiB (64*32MiB) and no object can cross the boundary. This
|
||||
can lead to fragmentation but fortunately most objects will be regions
|
||||
of 256MiB in practice.
|
||||
-----------------------------------------------------------------------------*/
|
||||
#include "mimalloc.h"
|
||||
#include "mimalloc-internal.h"
|
||||
@ -23,6 +31,8 @@ with on-demand coalescing.
|
||||
|
||||
#include <string.h> // memset
|
||||
|
||||
#include "bitmap.inc.c" // atomic bitmap
|
||||
|
||||
// os.c
|
||||
void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld);
|
||||
void _mi_os_free(void* p, size_t size, mi_stats_t* stats);
|
||||
@ -37,22 +47,22 @@ int _mi_os_numa_node_count(void);
|
||||
----------------------------------------------------------- */
|
||||
|
||||
#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE
|
||||
#define MI_ARENA_BLOCK_SIZE (4*MI_SEGMENT_ALIGN) // 16MiB
|
||||
#define MI_MAX_ARENAS (64)
|
||||
|
||||
// Block info: bit 0 contains the `in_use` bit, the upper bits the
|
||||
// size in count of arena blocks.
|
||||
typedef uintptr_t mi_block_info_t;
|
||||
#define MI_ARENA_BLOCK_SIZE (8*MI_SEGMENT_ALIGN) // 32MiB
|
||||
#define MI_ARENA_MAX_OBJ_SIZE (MI_BITMAP_FIELD_BITS * MI_ARENA_BLOCK_SIZE) // 2GiB
|
||||
#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 16MiB
|
||||
#define MI_MAX_ARENAS (64) // not more than 256 (since we use 8 bits in the memid)
|
||||
|
||||
// A memory arena descriptor
|
||||
typedef struct mi_arena_s {
|
||||
uint8_t* start; // the start of the memory area
|
||||
size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`)
|
||||
size_t field_count; // number of bitmap fields
|
||||
int numa_node; // associated NUMA node
|
||||
bool is_zero_init; // is the arena zero initialized?
|
||||
bool is_large; // large OS page allocated
|
||||
_Atomic(uintptr_t) block_bottom; // optimization to start the search for free blocks
|
||||
_Atomic(mi_block_info_t) blocks[1]; // `block_count` block info's
|
||||
volatile _Atomic(uintptr_t) search_idx; // optimization to start the search for free blocks
|
||||
mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero?
|
||||
mi_bitmap_field_t blocks_map[1]; // bitmap of in-use blocks
|
||||
} mi_arena_t;
|
||||
|
||||
|
||||
@ -69,184 +79,57 @@ static _Atomic(uintptr_t) mi_arena_count; // = 0
|
||||
// Use `0` as a special id for direct OS allocated memory.
|
||||
#define MI_MEMID_OS 0
|
||||
|
||||
static size_t mi_memid_create(size_t arena_index, size_t block_index) {
|
||||
static size_t mi_memid_create(size_t arena_index, mi_bitmap_index_t bitmap_index) {
|
||||
mi_assert_internal(arena_index < 0xFE);
|
||||
return ((block_index << 8) | ((arena_index+1) & 0xFF));
|
||||
mi_assert_internal(((bitmap_index << 8) >> 8) == bitmap_index); // no overflow?
|
||||
return ((bitmap_index << 8) | ((arena_index+1) & 0xFF));
|
||||
}
|
||||
|
||||
static void mi_memid_indices(size_t memid, size_t* arena_index, size_t* block_index) {
|
||||
static void mi_memid_indices(size_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) {
|
||||
mi_assert_internal(memid != MI_MEMID_OS);
|
||||
*arena_index = (memid & 0xFF) - 1;
|
||||
*block_index = (memid >> 8);
|
||||
*bitmap_index = (memid >> 8);
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Block info
|
||||
----------------------------------------------------------- */
|
||||
|
||||
static bool mi_block_is_in_use(mi_block_info_t info) {
|
||||
return ((info&1) != 0);
|
||||
static size_t mi_block_count_of_size(size_t size) {
|
||||
return _mi_divide_up(size, MI_ARENA_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
static size_t mi_block_count(mi_block_info_t info) {
|
||||
return (info>>1);
|
||||
}
|
||||
|
||||
static mi_block_info_t mi_block_info_create(size_t bcount, bool in_use) {
|
||||
return (((mi_block_info_t)bcount << 1) | (in_use ? 1 : 0));
|
||||
}
|
||||
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Thread safe allocation in an arena
|
||||
----------------------------------------------------------- */
|
||||
|
||||
static void* mi_arena_allocx(mi_arena_t* arena, size_t start_idx, size_t end_idx, size_t needed_bcount, bool* is_zero, size_t* block_index)
|
||||
static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx)
|
||||
{
|
||||
// Scan linearly through all block info's
|
||||
// Skipping used ranges, coalescing free ranges on demand.
|
||||
mi_assert_internal(needed_bcount > 0);
|
||||
mi_assert_internal(start_idx <= arena->block_count);
|
||||
mi_assert_internal(end_idx <= arena->block_count);
|
||||
_Atomic(mi_block_info_t)* block = &arena->blocks[start_idx];
|
||||
_Atomic(mi_block_info_t)* end = &arena->blocks[end_idx];
|
||||
while (block < end) {
|
||||
mi_block_info_t binfo = mi_atomic_read_relaxed(block);
|
||||
size_t bcount = mi_block_count(binfo);
|
||||
if (mi_block_is_in_use(binfo)) {
|
||||
// in-use, skip ahead
|
||||
mi_assert_internal(bcount > 0);
|
||||
block += bcount;
|
||||
}
|
||||
else {
|
||||
// free blocks
|
||||
if (bcount==0) {
|
||||
// optimization:
|
||||
// use 0 initialized blocks at the end, to use single atomic operation
|
||||
// initially to reduce contention (as we don't need to split)
|
||||
if (block + needed_bcount > end) {
|
||||
return NULL; // does not fit
|
||||
}
|
||||
else if (!mi_atomic_cas_weak(block, mi_block_info_create(needed_bcount, true), binfo)) {
|
||||
// ouch, someone else was quicker. Try again..
|
||||
continue;
|
||||
}
|
||||
else {
|
||||
// we got it: return a pointer to the claimed memory
|
||||
ptrdiff_t idx = (block - arena->blocks);
|
||||
*is_zero = arena->is_zero_init;
|
||||
*block_index = idx;
|
||||
return (arena->start + (idx*MI_ARENA_BLOCK_SIZE));
|
||||
const size_t fcount = arena->field_count;
|
||||
size_t idx = mi_atomic_read(&arena->search_idx); // start from last search
|
||||
for (size_t visited = 0; visited < fcount; visited++, idx++) {
|
||||
if (idx >= fcount) idx = 0; // wrap around
|
||||
if (mi_bitmap_try_claim_field(arena->blocks_map, idx, blocks, bitmap_idx)) {
|
||||
mi_atomic_write(&arena->search_idx, idx); // start search from here next time
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
mi_assert_internal(bcount>0);
|
||||
if (needed_bcount > bcount) {
|
||||
#if 0 // MI_NO_ARENA_COALESCE
|
||||
block += bcount; // too small, skip to the next range
|
||||
continue;
|
||||
#else
|
||||
// too small, try to coalesce
|
||||
_Atomic(mi_block_info_t)* block_next = block + bcount;
|
||||
if (block_next >= end) {
|
||||
return NULL; // does not fit
|
||||
}
|
||||
mi_block_info_t binfo_next = mi_atomic_read(block_next);
|
||||
size_t bcount_next = mi_block_count(binfo_next);
|
||||
if (mi_block_is_in_use(binfo_next)) {
|
||||
// next block is in use, cannot coalesce
|
||||
block += (bcount + bcount_next); // skip ahea over both blocks
|
||||
}
|
||||
else {
|
||||
// next block is free, try to coalesce
|
||||
// first set the next one to being used to prevent dangling ranges
|
||||
if (!mi_atomic_cas_strong(block_next, mi_block_info_create(bcount_next, true), binfo_next)) {
|
||||
// someone else got in before us.. try again
|
||||
continue;
|
||||
}
|
||||
else {
|
||||
if (!mi_atomic_cas_strong(block, mi_block_info_create(bcount + bcount_next, true), binfo)) { // use strong to increase success chance
|
||||
// someone claimed/coalesced the block in the meantime
|
||||
// first free the next block again..
|
||||
bool ok = mi_atomic_cas_strong(block_next, mi_block_info_create(bcount_next, false), binfo_next); // must be strong
|
||||
mi_assert(ok); UNUSED(ok);
|
||||
// and try again
|
||||
continue;
|
||||
}
|
||||
else {
|
||||
// coalesced! try again
|
||||
// todo: we could optimize here to immediately claim the block if the
|
||||
// coalesced size is a fit instead of retrying. Keep it simple for now.
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else { // needed_bcount <= bcount
|
||||
mi_assert_internal(needed_bcount <= bcount);
|
||||
// it fits, claim the whole block
|
||||
if (!mi_atomic_cas_weak(block, mi_block_info_create(bcount, true), binfo)) {
|
||||
// ouch, someone else was quicker. Try again..
|
||||
continue;
|
||||
}
|
||||
else {
|
||||
// got it, now split off the needed part
|
||||
if (needed_bcount < bcount) {
|
||||
mi_atomic_write(block + needed_bcount, mi_block_info_create(bcount - needed_bcount, false));
|
||||
mi_atomic_write(block, mi_block_info_create(needed_bcount, true));
|
||||
}
|
||||
// return a pointer to the claimed memory
|
||||
ptrdiff_t idx = (block - arena->blocks);
|
||||
*is_zero = false;
|
||||
*block_index = idx;
|
||||
return (arena->start + (idx*MI_ARENA_BLOCK_SIZE));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// no success
|
||||
return NULL;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Try to reduce search time by starting from bottom and wrap around.
|
||||
static void* mi_arena_alloc(mi_arena_t* arena, size_t needed_bcount, bool* is_zero, size_t* block_index)
|
||||
{
|
||||
uintptr_t bottom = mi_atomic_read_relaxed(&arena->block_bottom);
|
||||
void* p = mi_arena_allocx(arena, bottom, arena->block_count, needed_bcount, is_zero, block_index);
|
||||
if (p == NULL && bottom > 0) {
|
||||
// try again from the start
|
||||
p = mi_arena_allocx(arena, 0, bottom, needed_bcount, is_zero, block_index);
|
||||
}
|
||||
if (p != NULL) {
|
||||
mi_atomic_write(&arena->block_bottom, *block_index);
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Arena Allocation
|
||||
----------------------------------------------------------- */
|
||||
|
||||
static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount,
|
||||
bool* commit, bool* large, bool* is_zero,
|
||||
size_t* memid)
|
||||
bool* commit, bool* large, bool* is_zero, size_t* memid)
|
||||
{
|
||||
size_t block_index = SIZE_MAX;
|
||||
void* p = mi_arena_alloc(arena, needed_bcount, is_zero, &block_index);
|
||||
if (p != NULL) {
|
||||
mi_assert_internal(block_index != SIZE_MAX);
|
||||
#if MI_DEBUG>=1
|
||||
_Atomic(mi_block_info_t)* block = &arena->blocks[block_index];
|
||||
mi_block_info_t binfo = mi_atomic_read(block);
|
||||
mi_assert_internal(mi_block_is_in_use(binfo));
|
||||
mi_assert_internal(mi_block_count(binfo) >= needed_bcount);
|
||||
#endif
|
||||
*memid = mi_memid_create(arena_index, block_index);
|
||||
mi_bitmap_index_t bitmap_index;
|
||||
if (mi_arena_alloc(arena, needed_bcount, &bitmap_index)) {
|
||||
// claimed it! set the dirty bits (todo: no need for an atomic op here?)
|
||||
*is_zero = mi_bitmap_claim(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL);
|
||||
*memid = mi_memid_create(arena_index, bitmap_index);
|
||||
*commit = true; // TODO: support commit on demand?
|
||||
*large = arena->is_large;
|
||||
return (arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE));
|
||||
}
|
||||
return p;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
|
||||
@ -261,15 +144,13 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
|
||||
if (large==NULL) large = &default_large; // ensure `large != NULL`
|
||||
|
||||
// try to allocate in an arena if the alignment is small enough
|
||||
// and if there is not too much waste around the `MI_ARENA_BLOCK_SIZE`.
|
||||
// and the object is not too large or too small.
|
||||
if (alignment <= MI_SEGMENT_ALIGN &&
|
||||
size >= 3*(MI_ARENA_BLOCK_SIZE/4) && // > 12MiB (not more than 25% waste)
|
||||
!(size > MI_ARENA_BLOCK_SIZE && size < 3*(MI_ARENA_BLOCK_SIZE/2)) // ! <16MiB - 24MiB>
|
||||
)
|
||||
size <= MI_ARENA_MAX_OBJ_SIZE &&
|
||||
size >= MI_ARENA_MIN_OBJ_SIZE)
|
||||
{
|
||||
size_t asize = _mi_align_up(size, MI_ARENA_BLOCK_SIZE);
|
||||
size_t bcount = asize / MI_ARENA_BLOCK_SIZE;
|
||||
int numa_node = _mi_os_numa_node(tld); // current numa node
|
||||
const size_t bcount = mi_block_count_of_size(size);
|
||||
const int numa_node = _mi_os_numa_node(tld); // current numa node
|
||||
|
||||
mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE);
|
||||
// try numa affine allocation
|
||||
@ -301,6 +182,9 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
|
||||
// finally, fall back to the OS
|
||||
*is_zero = true;
|
||||
*memid = MI_MEMID_OS;
|
||||
if (*large) {
|
||||
*large = mi_option_is_enabled(mi_option_large_os_pages); // try large OS pages only if enabled and allowed
|
||||
}
|
||||
return _mi_os_alloc_aligned(size, alignment, *commit, large, tld);
|
||||
}
|
||||
|
||||
@ -324,8 +208,8 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) {
|
||||
else {
|
||||
// allocated in an arena
|
||||
size_t arena_idx;
|
||||
size_t block_idx;
|
||||
mi_memid_indices(memid, &arena_idx, &block_idx);
|
||||
size_t bitmap_idx;
|
||||
mi_memid_indices(memid, &arena_idx, &bitmap_idx);
|
||||
mi_assert_internal(arena_idx < MI_MAX_ARENAS);
|
||||
mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[arena_idx]));
|
||||
mi_assert_internal(arena != NULL);
|
||||
@ -333,27 +217,17 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) {
|
||||
_mi_fatal_error("trying to free from non-existent arena: %p, size %zu, memid: 0x%zx\n", p, size, memid);
|
||||
return;
|
||||
}
|
||||
mi_assert_internal(arena->block_count > block_idx);
|
||||
if (arena->block_count <= block_idx) {
|
||||
_mi_fatal_error("trying to free from non-existent block: %p, size %zu, memid: 0x%zx\n", p, size, memid);
|
||||
mi_assert_internal(arena->field_count > mi_bitmap_index_field(bitmap_idx));
|
||||
if (arena->field_count <= mi_bitmap_index_field(bitmap_idx)) {
|
||||
_mi_fatal_error("trying to free from non-existent arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid);
|
||||
return;
|
||||
}
|
||||
_Atomic(mi_block_info_t)* block = &arena->blocks[block_idx];
|
||||
mi_block_info_t binfo = mi_atomic_read_relaxed(block);
|
||||
mi_assert_internal(mi_block_is_in_use(binfo));
|
||||
mi_assert_internal(mi_block_count(binfo)*MI_ARENA_BLOCK_SIZE >= size);
|
||||
if (!mi_block_is_in_use(binfo)) {
|
||||
const size_t blocks = mi_block_count_of_size(size);
|
||||
bool ones = mi_bitmap_unclaim(arena->blocks_map, arena->field_count, blocks, bitmap_idx);
|
||||
if (!ones) {
|
||||
_mi_fatal_error("trying to free an already freed block: %p, size %zu\n", p, size);
|
||||
return;
|
||||
};
|
||||
bool ok = mi_atomic_cas_strong(block, mi_block_info_create(mi_block_count(binfo), false), binfo);
|
||||
mi_assert_internal(ok);
|
||||
if (!ok) {
|
||||
_mi_warning_message("unable to free arena block: %p, info 0x%zx", p, binfo);
|
||||
}
|
||||
if (block_idx < mi_atomic_read_relaxed(&arena->block_bottom)) {
|
||||
mi_atomic_write(&arena->block_bottom, block_idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -365,7 +239,6 @@ static bool mi_arena_add(mi_arena_t* arena) {
|
||||
mi_assert_internal(arena != NULL);
|
||||
mi_assert_internal((uintptr_t)arena->start % MI_SEGMENT_ALIGN == 0);
|
||||
mi_assert_internal(arena->block_count > 0);
|
||||
mi_assert_internal(mi_mem_is_zero(arena->blocks,arena->block_count*sizeof(mi_block_info_t)));
|
||||
|
||||
uintptr_t i = mi_atomic_addu(&mi_arena_count,1);
|
||||
if (i >= MI_MAX_ARENAS) {
|
||||
@ -383,40 +256,51 @@ static bool mi_arena_add(mi_arena_t* arena) {
|
||||
#include <errno.h> // ENOMEM
|
||||
|
||||
// reserve at a specific numa node
|
||||
int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept {
|
||||
int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept {
|
||||
if (pages==0) return 0;
|
||||
if (numa_node < -1) numa_node = -1;
|
||||
if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count();
|
||||
size_t hsize = 0;
|
||||
size_t pages_reserved = 0;
|
||||
void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, pages*500, &pages_reserved, &hsize);
|
||||
void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize);
|
||||
if (p==NULL || pages_reserved==0) {
|
||||
_mi_warning_message("failed to reserve %zu gb huge pages\n", pages);
|
||||
return ENOMEM;
|
||||
}
|
||||
_mi_verbose_message("reserved %zu gb huge pages\n", pages_reserved);
|
||||
|
||||
size_t bcount = hsize / MI_ARENA_BLOCK_SIZE;
|
||||
size_t asize = sizeof(mi_arena_t) + (bcount*sizeof(mi_block_info_t)); // one too much
|
||||
size_t bcount = mi_block_count_of_size(hsize);
|
||||
size_t fields = (bcount + MI_BITMAP_FIELD_BITS - 1) / MI_BITMAP_FIELD_BITS;
|
||||
size_t asize = sizeof(mi_arena_t) + (2*fields*sizeof(mi_bitmap_field_t));
|
||||
mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS?
|
||||
if (arena == NULL) {
|
||||
_mi_os_free_huge_pages(p, hsize, &_mi_stats_main);
|
||||
return ENOMEM;
|
||||
}
|
||||
arena->block_count = bcount;
|
||||
arena->field_count = fields;
|
||||
arena->start = (uint8_t*)p;
|
||||
arena->block_bottom = 0;
|
||||
arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1)
|
||||
arena->is_large = true;
|
||||
arena->is_zero_init = true;
|
||||
memset(arena->blocks, 0, bcount * sizeof(mi_block_info_t));
|
||||
arena->search_idx = 0;
|
||||
arena->blocks_dirty = &arena->blocks_map[bcount];
|
||||
// the bitmaps are already zero initialized due to os_alloc
|
||||
// just claim leftover blocks if needed
|
||||
size_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount;
|
||||
if (post > 0) {
|
||||
// don't use leftover bits at the end
|
||||
mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post);
|
||||
mi_bitmap_claim(arena->blocks_map, fields, post, postidx, NULL);
|
||||
}
|
||||
|
||||
mi_arena_add(arena);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
// reserve huge pages evenly among all numa nodes.
|
||||
int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept {
|
||||
int mi_reserve_huge_os_pages_interleave(size_t pages, size_t timeout_msecs) mi_attr_noexcept {
|
||||
if (pages == 0) return 0;
|
||||
|
||||
// pages per numa node
|
||||
@ -424,12 +308,13 @@ int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept {
|
||||
if (numa_count <= 0) numa_count = 1;
|
||||
const size_t pages_per = pages / numa_count;
|
||||
const size_t pages_mod = pages % numa_count;
|
||||
const size_t timeout_per = (timeout_msecs / numa_count) + 50;
|
||||
|
||||
// reserve evenly among numa nodes
|
||||
for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
|
||||
size_t node_pages = pages_per; // can be 0
|
||||
if ((size_t)numa_node < pages_mod) node_pages++;
|
||||
int err = mi_reserve_huge_os_pages_at(node_pages, numa_node);
|
||||
int err = mi_reserve_huge_os_pages_at(node_pages, numa_node, timeout_per);
|
||||
if (err) return err;
|
||||
if (pages < node_pages) {
|
||||
pages = 0;
|
||||
@ -446,7 +331,7 @@ int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserv
|
||||
UNUSED(max_secs);
|
||||
_mi_warning_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n");
|
||||
if (pages_reserved != NULL) *pages_reserved = 0;
|
||||
int err = mi_reserve_huge_os_pages_interleave(pages);
|
||||
int err = mi_reserve_huge_os_pages_interleave(pages, (size_t)(max_secs * 1000.0));
|
||||
if (err==0 && pages_reserved!=NULL) *pages_reserved = pages;
|
||||
return err;
|
||||
}
|
||||
|
208
src/bitmap.inc.c
Normal file
208
src/bitmap.inc.c
Normal file
@ -0,0 +1,208 @@
|
||||
/* ----------------------------------------------------------------------------
|
||||
Copyright (c) 2019, Microsoft Research, Daan Leijen
|
||||
This is free software; you can redistribute it and/or modify it under the
|
||||
terms of the MIT license. A copy of the license can be found in the file
|
||||
"LICENSE" at the root of this distribution.
|
||||
-----------------------------------------------------------------------------*/
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
This file is meant to be included in other files for efficiency.
|
||||
It implements a bitmap that can set/reset sequences of bits atomically
|
||||
and is used to concurrently claim memory ranges.
|
||||
|
||||
A bitmap is an array of fields where each field is a machine word (`uintptr_t`)
|
||||
|
||||
A current limitation is that the bit sequences cannot cross fields
|
||||
and that the sequence must be smaller or equal to the bits in a field.
|
||||
---------------------------------------------------------------------------- */
|
||||
#pragma once
|
||||
#ifndef MI_BITMAP_C
|
||||
#define MI_BITMAP_C
|
||||
|
||||
#include "mimalloc.h"
|
||||
#include "mimalloc-internal.h"
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Bitmap definition
|
||||
----------------------------------------------------------- */
|
||||
|
||||
#define MI_BITMAP_FIELD_BITS (8*MI_INTPTR_SIZE)
|
||||
#define MI_BITMAP_FIELD_FULL (~((uintptr_t)0)) // all bits set
|
||||
|
||||
// An atomic bitmap of `uintptr_t` fields
|
||||
typedef volatile _Atomic(uintptr_t) mi_bitmap_field_t;
|
||||
typedef mi_bitmap_field_t* mi_bitmap_t;
|
||||
|
||||
// A bitmap index is the index of the bit in a bitmap.
|
||||
typedef size_t mi_bitmap_index_t;
|
||||
|
||||
// Create a bit index.
|
||||
static inline mi_bitmap_index_t mi_bitmap_index_create(size_t idx, size_t bitidx) {
|
||||
mi_assert_internal(bitidx < MI_BITMAP_FIELD_BITS);
|
||||
return (idx*MI_BITMAP_FIELD_BITS) + bitidx;
|
||||
}
|
||||
|
||||
// Get the field index from a bit index.
|
||||
static inline size_t mi_bitmap_index_field(mi_bitmap_index_t bitmap_idx) {
|
||||
return (bitmap_idx / MI_BITMAP_FIELD_BITS);
|
||||
}
|
||||
|
||||
// Get the bit index in a bitmap field
|
||||
static inline size_t mi_bitmap_index_bit_in_field(mi_bitmap_index_t bitmap_idx) {
|
||||
return (bitmap_idx % MI_BITMAP_FIELD_BITS);
|
||||
}
|
||||
|
||||
// Get the full bit index
|
||||
static inline size_t mi_bitmap_index_bit(mi_bitmap_index_t bitmap_idx) {
|
||||
return bitmap_idx;
|
||||
}
|
||||
|
||||
|
||||
// The bit mask for a given number of blocks at a specified bit index.
|
||||
static uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) {
|
||||
mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS);
|
||||
if (count == MI_BITMAP_FIELD_BITS) return MI_BITMAP_FIELD_FULL;
|
||||
return ((((uintptr_t)1 << count) - 1) << bitidx);
|
||||
}
|
||||
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Use bit scan forward/reverse to quickly find the first zero bit if it is available
|
||||
----------------------------------------------------------- */
|
||||
#if defined(_MSC_VER)
|
||||
#define MI_HAVE_BITSCAN
|
||||
#include <intrin.h>
|
||||
static inline size_t mi_bsf(uintptr_t x) {
|
||||
if (x==0) return 8*MI_INTPTR_SIZE;
|
||||
DWORD idx;
|
||||
MI_64(_BitScanForward)(&idx, x);
|
||||
return idx;
|
||||
}
|
||||
static inline size_t mi_bsr(uintptr_t x) {
|
||||
if (x==0) return 8*MI_INTPTR_SIZE;
|
||||
DWORD idx;
|
||||
MI_64(_BitScanReverse)(&idx, x);
|
||||
return idx;
|
||||
}
|
||||
#elif defined(__GNUC__) || defined(__clang__)
|
||||
#include <limits.h> // LONG_MAX
|
||||
#define MI_HAVE_BITSCAN
|
||||
#if (INTPTR_MAX == LONG_MAX)
|
||||
# define MI_L(x) x##l
|
||||
#else
|
||||
# define MI_L(x) x##ll
|
||||
#endif
|
||||
static inline size_t mi_bsf(uintptr_t x) {
|
||||
return (x==0 ? 8*MI_INTPTR_SIZE : MI_L(__builtin_ctz)(x));
|
||||
}
|
||||
static inline size_t mi_bsr(uintptr_t x) {
|
||||
return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - MI_L(__builtin_clz)(x));
|
||||
}
|
||||
#endif
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Claim a bit sequence atomically
|
||||
----------------------------------------------------------- */
|
||||
|
||||
// Try to atomically claim a sequence of `count` bits in a single
|
||||
// field at `idx` in `bitmap`. Returns `true` on success.
|
||||
static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx)
|
||||
{
|
||||
mi_assert_internal(bitmap_idx != NULL);
|
||||
volatile _Atomic(uintptr_t)* field = &bitmap[idx];
|
||||
uintptr_t map = mi_atomic_read(field);
|
||||
if (map==MI_BITMAP_FIELD_FULL) return false; // short cut
|
||||
|
||||
// search for 0-bit sequence of length count
|
||||
const uintptr_t mask = mi_bitmap_mask_(count, 0);
|
||||
const size_t bitidx_max = MI_BITMAP_FIELD_BITS - count;
|
||||
|
||||
#ifdef MI_HAVE_BITSCAN
|
||||
size_t bitidx = mi_bsf(~map); // quickly find the first zero bit if possible
|
||||
#else
|
||||
size_t bitidx = 0; // otherwise start at 0
|
||||
#endif
|
||||
uintptr_t m = (mask << bitidx); // invariant: m == mask shifted by bitidx
|
||||
|
||||
// scan linearly for a free range of zero bits
|
||||
while (bitidx <= bitidx_max) {
|
||||
if ((map & m) == 0) { // are the mask bits free at bitidx?
|
||||
mi_assert_internal((m >> bitidx) == mask); // no overflow?
|
||||
const uintptr_t newmap = map | m;
|
||||
mi_assert_internal((newmap^map) >> bitidx == mask);
|
||||
if (!mi_atomic_cas_weak(field, newmap, map)) { // TODO: use strong cas here?
|
||||
// no success, another thread claimed concurrently.. keep going
|
||||
map = mi_atomic_read(field);
|
||||
continue;
|
||||
}
|
||||
else {
|
||||
// success, we claimed the bits!
|
||||
*bitmap_idx = mi_bitmap_index_create(idx, bitidx);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// on to the next bit range
|
||||
#ifdef MI_HAVE_BITSCAN
|
||||
const size_t shift = (count == 1 ? 1 : mi_bsr(map & m) - bitidx + 1);
|
||||
mi_assert_internal(shift > 0 && shift <= count);
|
||||
#else
|
||||
const size_t shift = 1;
|
||||
#endif
|
||||
bitidx += shift;
|
||||
m <<= shift;
|
||||
}
|
||||
}
|
||||
// no bits found
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// Find `count` bits of 0 and set them to 1 atomically; returns `true` on success.
|
||||
// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never span fields.
|
||||
static inline bool mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t* bitmap_idx) {
|
||||
for (size_t idx = 0; idx < bitmap_fields; idx++) {
|
||||
if (mi_bitmap_try_claim_field(bitmap, idx, count, bitmap_idx)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Set `count` bits at `bitmap_idx` to 0 atomically
|
||||
// Returns `true` if all `count` bits were 1 previously
|
||||
static inline bool mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
|
||||
const size_t idx = mi_bitmap_index_field(bitmap_idx);
|
||||
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
|
||||
const uintptr_t mask = mi_bitmap_mask_(count, bitidx);
|
||||
mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields);
|
||||
mi_assert_internal((bitmap[idx] & mask) == mask);
|
||||
uintptr_t prev = mi_atomic_and(&bitmap[idx], ~mask);
|
||||
return ((prev & mask) == mask);
|
||||
}
|
||||
|
||||
|
||||
// Set `count` bits at `bitmap_idx` to 1 atomically
|
||||
// Returns `true` if all `count` bits were 0 previously
|
||||
static inline bool mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero) {
|
||||
const size_t idx = mi_bitmap_index_field(bitmap_idx);
|
||||
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
|
||||
const uintptr_t mask = mi_bitmap_mask_(count, bitidx);
|
||||
mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields);
|
||||
// mi_assert_internal((bitmap[idx] & mask) == 0);
|
||||
uintptr_t prev = mi_atomic_or(&bitmap[idx], mask);
|
||||
if (any_zero != NULL) *any_zero = ((prev & mask) != mask);
|
||||
return ((prev & mask) == 0);
|
||||
}
|
||||
|
||||
// Returns `true` if all `count` bits were 1
|
||||
static inline bool mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
|
||||
const size_t idx = mi_bitmap_index_field(bitmap_idx);
|
||||
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
|
||||
const uintptr_t mask = mi_bitmap_mask_(count, bitidx);
|
||||
mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields);
|
||||
// mi_assert_internal((bitmap[idx] & mask) == 0);
|
||||
return ((mi_atomic_read(&bitmap[idx]) & mask) == mask);
|
||||
}
|
||||
|
||||
#endif
|
@ -436,7 +436,7 @@ static void mi_process_load(void) {
|
||||
|
||||
if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
|
||||
size_t pages = mi_option_get(mi_option_reserve_huge_os_pages);
|
||||
mi_reserve_huge_os_pages_interleave(pages);
|
||||
mi_reserve_huge_os_pages_interleave(pages, pages*500);
|
||||
}
|
||||
}
|
||||
|
||||
|
495
src/memory.c
495
src/memory.c
@ -16,10 +16,10 @@ We need this memory layer between the raw OS calls because of:
|
||||
1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order
|
||||
to reuse memory effectively.
|
||||
2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of
|
||||
an OS allocation/free is still (much) too expensive relative to the accesses in that
|
||||
object :-( (`malloc-large` tests this). This means we need a cheaper way to
|
||||
reuse memory.
|
||||
3. This layer can help with a NUMA aware allocation in the future.
|
||||
an OS allocation/free is still (much) too expensive relative to the accesses
|
||||
in that object :-( (`malloc-large` tests this). This means we need a cheaper
|
||||
way to reuse memory.
|
||||
3. This layer allows for NUMA aware allocation.
|
||||
|
||||
Possible issues:
|
||||
- (2) can potentially be addressed too with a small cache per thread which is much
|
||||
@ -37,6 +37,8 @@ Possible issues:
|
||||
|
||||
#include <string.h> // memset
|
||||
|
||||
#include "bitmap.inc.c"
|
||||
|
||||
// Internal raw OS interface
|
||||
size_t _mi_os_large_page_size();
|
||||
bool _mi_os_protect(void* addr, size_t size);
|
||||
@ -45,8 +47,6 @@ bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
|
||||
bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats);
|
||||
bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats);
|
||||
bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
|
||||
//void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld);
|
||||
//void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats);
|
||||
|
||||
// arena.c
|
||||
void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats);
|
||||
@ -59,22 +59,23 @@ static bool mi_delay_remove(mi_delay_slot_t* slots, size_t count, void* p, size_
|
||||
|
||||
// Constants
|
||||
#if (MI_INTPTR_SIZE==8)
|
||||
#define MI_HEAP_REGION_MAX_SIZE (256 * (1ULL << 30)) // 256GiB => 16KiB for the region map
|
||||
#define MI_HEAP_REGION_MAX_SIZE (256 * GiB) // 40KiB for the region map
|
||||
#elif (MI_INTPTR_SIZE==4)
|
||||
#define MI_HEAP_REGION_MAX_SIZE (3 * (1UL << 30)) // 3GiB => 196 bytes for the region map
|
||||
#define MI_HEAP_REGION_MAX_SIZE (3 * GiB) // ~ KiB for the region map
|
||||
#else
|
||||
#error "define the maximum heap space allowed for regions on this platform"
|
||||
#endif
|
||||
|
||||
#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE
|
||||
|
||||
#define MI_REGION_MAP_BITS (MI_INTPTR_SIZE * 8)
|
||||
#define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_REGION_MAP_BITS)
|
||||
#define MI_REGION_MAX_ALLOC_SIZE ((MI_REGION_MAP_BITS/4)*MI_SEGMENT_SIZE) // 64MiB
|
||||
#define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE)
|
||||
#define MI_REGION_MAP_FULL UINTPTR_MAX
|
||||
|
||||
#define MI_REGION_MAX_BLOCKS MI_BITMAP_FIELD_BITS
|
||||
#define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS) // 256MiB (64MiB on 32 bits)
|
||||
#define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE) // 1024 (48 on 32 bits)
|
||||
#define MI_REGION_MAX_OBJ_BLOCKS (MI_REGION_MAX_BLOCKS/4) // 64MiB
|
||||
#define MI_REGION_MAX_OBJ_SIZE (MI_REGION_MAX_OBJ_BLOCKS*MI_SEGMENT_SIZE)
|
||||
|
||||
// Region info is a pointer to the memory region and two bits for
|
||||
// its flags: is_large, and is_committed.
|
||||
typedef uintptr_t mi_region_info_t;
|
||||
|
||||
static inline mi_region_info_t mi_region_info_create(void* start, bool is_large, bool is_committed) {
|
||||
@ -91,19 +92,18 @@ static inline void* mi_region_info_read(mi_region_info_t info, bool* is_large, b
|
||||
// A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with
|
||||
// a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block.
|
||||
typedef struct mem_region_s {
|
||||
volatile _Atomic(uintptr_t) map; // in-use bit per MI_SEGMENT_SIZE block
|
||||
volatile _Atomic(mi_region_info_t) info; // start of virtual memory area, and flags
|
||||
volatile _Atomic(uintptr_t) dirty_mask; // bit per block if the contents are not zero'd
|
||||
volatile _Atomic(mi_region_info_t) info; // start of the memory area (and flags)
|
||||
volatile _Atomic(uintptr_t) numa_node; // associated numa node + 1 (so 0 is no association)
|
||||
mi_bitmap_field_t in_use;
|
||||
mi_bitmap_field_t dirty;
|
||||
size_t arena_memid; // if allocated from a (huge page) arena
|
||||
} mem_region_t;
|
||||
|
||||
|
||||
// The region map; 16KiB for a 256GiB HEAP_REGION_MAX
|
||||
// TODO: in the future, maintain a map per NUMA node for numa aware allocation
|
||||
// The region map
|
||||
static mem_region_t regions[MI_REGION_MAX];
|
||||
|
||||
static volatile _Atomic(uintptr_t) regions_count; // = 0; // allocated regions
|
||||
// Allocated regions
|
||||
static volatile _Atomic(uintptr_t) regions_count; // = 0;
|
||||
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
@ -112,14 +112,7 @@ Utility functions
|
||||
|
||||
// Blocks (of 4MiB) needed for the given size.
|
||||
static size_t mi_region_block_count(size_t size) {
|
||||
mi_assert_internal(size <= MI_REGION_MAX_ALLOC_SIZE);
|
||||
return (size + MI_SEGMENT_SIZE - 1) / MI_SEGMENT_SIZE;
|
||||
}
|
||||
|
||||
// The bit mask for a given number of blocks at a specified bit index.
|
||||
static uintptr_t mi_region_block_mask(size_t blocks, size_t bitidx) {
|
||||
mi_assert_internal(blocks + bitidx <= MI_REGION_MAP_BITS);
|
||||
return ((((uintptr_t)1 << blocks) - 1) << bitidx);
|
||||
return _mi_divide_up(size, MI_SEGMENT_SIZE);
|
||||
}
|
||||
|
||||
// Return a rounded commit/reset size such that we don't fragment large OS pages into small ones.
|
||||
@ -140,8 +133,11 @@ bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
|
||||
}
|
||||
|
||||
|
||||
static size_t mi_memid_create(size_t idx, size_t bitidx) {
|
||||
return ((idx*MI_REGION_MAP_BITS) + bitidx)<<1;
|
||||
static size_t mi_memid_create(mem_region_t* region, mi_bitmap_index_t bit_idx) {
|
||||
mi_assert_internal(bit_idx < MI_BITMAP_FIELD_BITS);
|
||||
size_t idx = region - regions;
|
||||
mi_assert_internal(®ions[idx] == region);
|
||||
return (idx*MI_BITMAP_FIELD_BITS + bit_idx)<<1;
|
||||
}
|
||||
|
||||
static size_t mi_memid_create_from_arena(size_t arena_memid) {
|
||||
@ -152,317 +148,185 @@ static bool mi_memid_is_arena(size_t id) {
|
||||
return ((id&1)==1);
|
||||
}
|
||||
|
||||
static bool mi_memid_indices(size_t id, size_t* idx, size_t* bitidx, size_t* arena_memid) {
|
||||
static bool mi_memid_indices(size_t id, mem_region_t** region, mi_bitmap_index_t* bit_idx, size_t* arena_memid) {
|
||||
if (mi_memid_is_arena(id)) {
|
||||
*arena_memid = (id>>1);
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
*idx = ((id>>1) / MI_REGION_MAP_BITS);
|
||||
*bitidx = ((id>>1) % MI_REGION_MAP_BITS);
|
||||
size_t idx = (id >> 1) / MI_BITMAP_FIELD_BITS;
|
||||
*bit_idx = (mi_bitmap_index_t)(id>>1) % MI_BITMAP_FIELD_BITS;
|
||||
*region = ®ions[idx];
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
Commit from a region
|
||||
Allocate a region is allocated from the OS (or an arena)
|
||||
-----------------------------------------------------------------------------*/
|
||||
|
||||
// Commit the `blocks` in `region` at `idx` and `bitidx` of a given `size`.
|
||||
// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written
|
||||
// if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call.
|
||||
// (not being able to claim is not considered an error so check for `p != NULL` afterwards).
|
||||
static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bitidx, size_t blocks,
|
||||
size_t size, bool* commit, bool* allow_large, bool* is_zero, void** p, size_t* id, mi_os_tld_t* tld)
|
||||
static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld)
|
||||
{
|
||||
size_t mask = mi_region_block_mask(blocks,bitidx);
|
||||
mi_assert_internal(mask != 0);
|
||||
mi_assert_internal((mask & mi_atomic_read_relaxed(®ion->map)) == mask);
|
||||
mi_assert_internal(®ions[idx] == region);
|
||||
// not out of regions yet?
|
||||
if (mi_atomic_read_relaxed(®ions_count) >= MI_REGION_MAX - 1) return false;
|
||||
|
||||
// ensure the region is reserved
|
||||
mi_region_info_t info = mi_atomic_read(®ion->info);
|
||||
if (info == 0)
|
||||
{
|
||||
bool region_commit = mi_option_is_enabled(mi_option_eager_region_commit);
|
||||
bool region_large = *allow_large;
|
||||
// try to allocate a fresh region from the OS
|
||||
bool region_commit = (commit && mi_option_is_enabled(mi_option_eager_region_commit));
|
||||
bool region_large = (commit && allow_large);
|
||||
bool is_zero = false;
|
||||
size_t arena_memid = 0;
|
||||
void* start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, ®ion_commit, ®ion_large, is_zero, &arena_memid, tld);
|
||||
/*
|
||||
void* start = NULL;
|
||||
if (region_large) {
|
||||
start = _mi_os_try_alloc_from_huge_reserved(MI_REGION_SIZE, MI_SEGMENT_ALIGN);
|
||||
if (start != NULL) { region_commit = true; }
|
||||
}
|
||||
if (start == NULL) {
|
||||
start = _mi_os_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, region_commit, ®ion_large, tld);
|
||||
}
|
||||
*/
|
||||
mi_assert_internal(!(region_large && !*allow_large));
|
||||
void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, ®ion_commit, ®ion_large, &is_zero, &arena_memid, tld);
|
||||
if (start == NULL) return false;
|
||||
mi_assert_internal(!(region_large && !allow_large));
|
||||
|
||||
if (start == NULL) {
|
||||
// failure to allocate from the OS! unclaim the blocks and fail
|
||||
size_t map;
|
||||
do {
|
||||
map = mi_atomic_read_relaxed(®ion->map);
|
||||
} while (!mi_atomic_cas_weak(®ion->map, map & ~mask, map));
|
||||
// claim a fresh slot
|
||||
const uintptr_t idx = mi_atomic_increment(®ions_count);
|
||||
if (idx >= MI_REGION_MAX) {
|
||||
mi_atomic_decrement(®ions_count);
|
||||
_mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats);
|
||||
return false;
|
||||
}
|
||||
|
||||
// set the newly allocated region
|
||||
info = mi_region_info_create(start,region_large,region_commit);
|
||||
if (mi_atomic_cas_strong(®ion->info, info, 0)) {
|
||||
// update the region count
|
||||
region->arena_memid = arena_memid;
|
||||
mi_atomic_write(®ion->numa_node, _mi_os_numa_node(tld) + 1);
|
||||
mi_atomic_increment(®ions_count);
|
||||
}
|
||||
else {
|
||||
// failed, another thread allocated just before us!
|
||||
// we assign it to a later slot instead (up to 4 tries).
|
||||
for(size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) {
|
||||
if (mi_atomic_cas_strong(®ions[idx+i].info, info, 0)) {
|
||||
regions[idx+i].arena_memid = arena_memid;
|
||||
mi_atomic_write(®ions[idx+i].numa_node, _mi_os_numa_node(tld) + 1);
|
||||
mi_atomic_increment(®ions_count);
|
||||
start = NULL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (start != NULL) {
|
||||
// free it if we didn't succeed to save it to some other region
|
||||
_mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats);
|
||||
// _mi_os_free_ex(start, MI_REGION_SIZE, region_commit, tld->stats);
|
||||
}
|
||||
// and continue with the memory at our index
|
||||
info = mi_atomic_read(®ion->info);
|
||||
}
|
||||
}
|
||||
mi_assert_internal(info == mi_atomic_read(®ion->info));
|
||||
mi_assert_internal(info != 0);
|
||||
|
||||
// Commit the blocks to memory
|
||||
bool region_is_committed = false;
|
||||
bool region_is_large = false;
|
||||
void* start = mi_region_info_read(info,®ion_is_large,®ion_is_committed);
|
||||
mi_assert_internal(!(region_is_large && !*allow_large));
|
||||
mi_assert_internal(start!=NULL);
|
||||
|
||||
// set dirty bits
|
||||
uintptr_t m;
|
||||
do {
|
||||
m = mi_atomic_read(®ion->dirty_mask);
|
||||
} while (!mi_atomic_cas_weak(®ion->dirty_mask, m | mask, m));
|
||||
*is_zero = ((m & mask) == 0); // no dirty bit set in our claimed range?
|
||||
|
||||
void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE);
|
||||
if (*commit && !region_is_committed) {
|
||||
// ensure commit
|
||||
bool commit_zero = false;
|
||||
_mi_os_commit(blocks_start, mi_good_commit_size(size), &commit_zero, tld->stats); // only commit needed size (unless using large OS pages)
|
||||
if (commit_zero) *is_zero = true;
|
||||
}
|
||||
else if (!*commit && region_is_committed) {
|
||||
// but even when no commit is requested, we might have committed anyway (in a huge OS page for example)
|
||||
*commit = true;
|
||||
}
|
||||
|
||||
// and return the allocation
|
||||
mi_assert_internal(blocks_start != NULL);
|
||||
*allow_large = region_is_large;
|
||||
*p = blocks_start;
|
||||
*id = mi_memid_create(idx, bitidx);
|
||||
// allocated, initialize and claim the initial blocks
|
||||
mem_region_t* r = ®ions[idx];
|
||||
r->numa_node = _mi_os_numa_node(tld) + 1;
|
||||
r->arena_memid = arena_memid;
|
||||
*bit_idx = 0;
|
||||
mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL);
|
||||
mi_atomic_write(&r->info, mi_region_info_create(start, region_large, region_commit)); // now make it available to others
|
||||
*region = r;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Use bit scan forward to quickly find the first zero bit if it is available
|
||||
#if defined(_MSC_VER)
|
||||
#define MI_HAVE_BITSCAN
|
||||
#include <intrin.h>
|
||||
static inline size_t mi_bsf(uintptr_t x) {
|
||||
if (x==0) return 8*MI_INTPTR_SIZE;
|
||||
DWORD idx;
|
||||
#if (MI_INTPTR_SIZE==8)
|
||||
_BitScanForward64(&idx, x);
|
||||
#else
|
||||
_BitScanForward(&idx, x);
|
||||
#endif
|
||||
return idx;
|
||||
}
|
||||
static inline size_t mi_bsr(uintptr_t x) {
|
||||
if (x==0) return 8*MI_INTPTR_SIZE;
|
||||
DWORD idx;
|
||||
#if (MI_INTPTR_SIZE==8)
|
||||
_BitScanReverse64(&idx, x);
|
||||
#else
|
||||
_BitScanReverse(&idx, x);
|
||||
#endif
|
||||
return idx;
|
||||
}
|
||||
#elif defined(__GNUC__) || defined(__clang__)
|
||||
#define MI_HAVE_BITSCAN
|
||||
static inline size_t mi_bsf(uintptr_t x) {
|
||||
return (x==0 ? 8*MI_INTPTR_SIZE : __builtin_ctzl(x));
|
||||
}
|
||||
static inline size_t mi_bsr(uintptr_t x) {
|
||||
return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - __builtin_clzl(x));
|
||||
}
|
||||
#endif
|
||||
/* ----------------------------------------------------------------------------
|
||||
Try to claim blocks in suitable regions
|
||||
-----------------------------------------------------------------------------*/
|
||||
|
||||
// Allocate `blocks` in a `region` at `idx` of a given `size`.
|
||||
// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written
|
||||
// if the blocks were successfully claimed so ensure they are initialized to NULL/0 before the call.
|
||||
// (not being able to claim is not considered an error so check for `p != NULL` afterwards).
|
||||
static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t blocks, size_t size,
|
||||
bool* commit, bool* allow_large, bool* is_zero, void** p, size_t* id, mi_os_tld_t* tld)
|
||||
{
|
||||
mi_assert_internal(p != NULL && id != NULL);
|
||||
mi_assert_internal(blocks < MI_REGION_MAP_BITS);
|
||||
|
||||
const uintptr_t mask = mi_region_block_mask(blocks, 0);
|
||||
const size_t bitidx_max = MI_REGION_MAP_BITS - blocks;
|
||||
uintptr_t map = mi_atomic_read(®ion->map);
|
||||
if (map==MI_REGION_MAP_FULL) return true;
|
||||
|
||||
#ifdef MI_HAVE_BITSCAN
|
||||
size_t bitidx = mi_bsf(~map); // quickly find the first zero bit if possible
|
||||
#else
|
||||
size_t bitidx = 0; // otherwise start at 0
|
||||
#endif
|
||||
uintptr_t m = (mask << bitidx); // invariant: m == mask shifted by bitidx
|
||||
|
||||
// scan linearly for a free range of zero bits
|
||||
while(bitidx <= bitidx_max) {
|
||||
if ((map & m) == 0) { // are the mask bits free at bitidx?
|
||||
mi_assert_internal((m >> bitidx) == mask); // no overflow?
|
||||
uintptr_t newmap = map | m;
|
||||
mi_assert_internal((newmap^map) >> bitidx == mask);
|
||||
if (!mi_atomic_cas_weak(®ion->map, newmap, map)) { // TODO: use strong cas here?
|
||||
// no success, another thread claimed concurrently.. keep going
|
||||
map = mi_atomic_read(®ion->map);
|
||||
continue;
|
||||
}
|
||||
else {
|
||||
// success, we claimed the bits
|
||||
// now commit the block memory -- this can still fail
|
||||
return mi_region_commit_blocks(region, idx, bitidx, blocks,
|
||||
size, commit, allow_large, is_zero, p, id, tld);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// on to the next bit range
|
||||
#ifdef MI_HAVE_BITSCAN
|
||||
size_t shift = (blocks == 1 ? 1 : mi_bsr(map & m) - bitidx + 1);
|
||||
mi_assert_internal(shift > 0 && shift <= blocks);
|
||||
#else
|
||||
size_t shift = 1;
|
||||
#endif
|
||||
bitidx += shift;
|
||||
m <<= shift;
|
||||
}
|
||||
}
|
||||
// no error, but also no bits found
|
||||
return true;
|
||||
}
|
||||
|
||||
// Try to allocate `blocks` in a `region` at `idx` of a given `size`. Does a quick check before trying to claim.
|
||||
// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written
|
||||
// if the blocks were successfully claimed so ensure they are initialized to NULL/0 before the call.
|
||||
// (not being able to claim is not considered an error so check for `p != NULL` afterwards).
|
||||
static bool mi_region_try_alloc_blocks(int numa_node, size_t idx, size_t blocks, size_t size,
|
||||
bool* commit, bool* allow_large, bool* is_zero,
|
||||
void** p, size_t* id, mi_os_tld_t* tld)
|
||||
{
|
||||
// check if there are available blocks in the region..
|
||||
mi_assert_internal(idx < MI_REGION_MAX);
|
||||
mem_region_t* region = ®ions[idx];
|
||||
uintptr_t m = mi_atomic_read_relaxed(®ion->map);
|
||||
int rnode = ((int)mi_atomic_read_relaxed(®ion->numa_node)) - 1;
|
||||
if ((rnode < 0 || rnode == numa_node) && // fits current numa node
|
||||
(m != MI_REGION_MAP_FULL)) // and some bits are zero
|
||||
{
|
||||
bool ok = (*commit || *allow_large); // committing or allow-large is always ok
|
||||
if (!ok) {
|
||||
// otherwise skip incompatible regions if possible.
|
||||
// this is not guaranteed due to multiple threads allocating at the same time but
|
||||
// that's ok. In secure mode, large is never allowed for any thread, so that works out;
|
||||
// otherwise we might just not be able to reset/decommit individual pages sometimes.
|
||||
static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool commit, bool allow_large ) {
|
||||
// initialized at all?
|
||||
mi_region_info_t info = mi_atomic_read_relaxed(®ion->info);
|
||||
if (info==0) return false;
|
||||
|
||||
// numa correct
|
||||
if (numa_node >= 0) { // use negative numa node to always succeed
|
||||
int rnode = ((int)mi_atomic_read_relaxed(®ion->numa_node)) - 1;
|
||||
if (rnode >= 0 && rnode != numa_node) return false;
|
||||
}
|
||||
|
||||
// note: we also skip if commit is false and the region is committed,
|
||||
// that is a bit strong but prevents allocation of eager-delayed segments in an eagerly committed region
|
||||
bool is_large;
|
||||
bool is_committed;
|
||||
void* start = mi_region_info_read(info,&is_large,&is_committed);
|
||||
ok = (start == NULL || (*commit || !is_committed) || (*allow_large || !is_large)); // Todo: test with one bitmap operation?
|
||||
}
|
||||
if (ok) {
|
||||
return mi_region_alloc_blocks(region, idx, blocks, size, commit, allow_large, is_zero, p, id, tld);
|
||||
}
|
||||
}
|
||||
return true; // no error, but no success either
|
||||
mi_region_info_read(info, &is_large, &is_committed);
|
||||
|
||||
if (!commit && is_committed) return false;
|
||||
if (!allow_large && is_large) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
static bool mi_region_try_claim(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld)
|
||||
{
|
||||
// try all regions for a free slot
|
||||
const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld));
|
||||
const size_t count = mi_atomic_read(®ions_count);
|
||||
size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses?
|
||||
for (size_t visited = 0; visited < count; visited++, idx++) {
|
||||
if (idx >= count) idx = 0; // wrap around
|
||||
mem_region_t* r = ®ions[idx];
|
||||
if (mi_region_is_suitable(r, numa_node, commit, allow_large)) {
|
||||
if (mi_bitmap_try_claim_field(&r->in_use, 0, blocks, bit_idx)) {
|
||||
tld->region_idx = idx; // remember the last found position
|
||||
*region = r;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
|
||||
{
|
||||
mi_assert_internal(blocks <= MI_BITMAP_FIELD_BITS);
|
||||
mem_region_t* region;
|
||||
mi_bitmap_index_t bit_idx;
|
||||
// first try to claim in existing regions
|
||||
if (!mi_region_try_claim(blocks, *commit, *is_large, ®ion, &bit_idx, tld)) {
|
||||
// otherwise try to allocate a fresh region
|
||||
if (!mi_region_try_alloc_os(blocks, *commit, *is_large, ®ion, &bit_idx, tld)) {
|
||||
// out of regions or memory
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// found a region and claimed `blocks` at `bit_idx`
|
||||
mi_assert_internal(region != NULL);
|
||||
mi_assert_internal(mi_bitmap_is_claimed(®ion->in_use, 1, blocks, bit_idx));
|
||||
|
||||
mi_region_info_t info = mi_atomic_read(®ion->info);
|
||||
bool region_is_committed = false;
|
||||
bool region_is_large = false;
|
||||
void* start = mi_region_info_read(info, ®ion_is_large, ®ion_is_committed);
|
||||
mi_assert_internal(!(region_is_large && !*is_large));
|
||||
mi_assert_internal(start != NULL);
|
||||
|
||||
bool any_zero = false;
|
||||
*is_zero = mi_bitmap_claim(®ion->dirty, 1, blocks, bit_idx, &any_zero);
|
||||
if (!mi_option_is_enabled(mi_option_eager_commit)) any_zero = true; // if no eager commit, even dirty segments may be partially committed
|
||||
*is_large = region_is_large;
|
||||
*memid = mi_memid_create(region, bit_idx);
|
||||
void* p = (uint8_t*)start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE);
|
||||
if (*commit && !region_is_committed && any_zero) { // want to commit, but not yet fully committed?
|
||||
// ensure commit
|
||||
_mi_os_commit(p, blocks * MI_SEGMENT_SIZE, is_zero, tld->stats);
|
||||
}
|
||||
else {
|
||||
*commit = region_is_committed || !any_zero;
|
||||
}
|
||||
|
||||
|
||||
// and return the allocation
|
||||
mi_assert_internal(p != NULL);
|
||||
return p;
|
||||
}
|
||||
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
Allocation
|
||||
-----------------------------------------------------------------------------*/
|
||||
|
||||
// Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`.
|
||||
// (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`)
|
||||
void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero,
|
||||
size_t* id, mi_os_tld_t* tld)
|
||||
void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
|
||||
{
|
||||
mi_assert_internal(id != NULL && tld != NULL);
|
||||
mi_assert_internal(memid != NULL && tld != NULL);
|
||||
mi_assert_internal(size > 0);
|
||||
*id = 0;
|
||||
*memid = 0;
|
||||
*is_zero = false;
|
||||
bool default_large = false;
|
||||
if (large==NULL) large = &default_large; // ensure `large != NULL`
|
||||
|
||||
// use direct OS allocation for huge blocks or alignment
|
||||
if (size > MI_REGION_MAX_ALLOC_SIZE || alignment > MI_SEGMENT_ALIGN) {
|
||||
size_t arena_memid = 0;
|
||||
void* p = _mi_arena_alloc_aligned(mi_good_commit_size(size), alignment, commit, large, is_zero, &arena_memid, tld); // round up size
|
||||
*id = mi_memid_create_from_arena(arena_memid);
|
||||
return p;
|
||||
}
|
||||
|
||||
// always round size to OS page size multiple (so commit/decommit go over the entire range)
|
||||
// TODO: use large OS page size here?
|
||||
if (size == 0) return NULL;
|
||||
size = _mi_align_up(size, _mi_os_page_size());
|
||||
|
||||
// calculate the number of needed blocks
|
||||
size_t blocks = mi_region_block_count(size);
|
||||
mi_assert_internal(blocks > 0 && blocks <= 8*MI_INTPTR_SIZE);
|
||||
|
||||
// find a range of free blocks
|
||||
int numa_node = _mi_os_numa_node(tld);
|
||||
void* p = NULL;
|
||||
size_t count = mi_atomic_read(®ions_count);
|
||||
size_t idx = tld->region_idx; // start at 0 to reuse low addresses? Or, use tld->region_idx to reduce contention?
|
||||
for (size_t visited = 0; visited < count; visited++, idx++) {
|
||||
if (idx >= count) idx = 0; // wrap around
|
||||
if (!mi_region_try_alloc_blocks(numa_node, idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error
|
||||
if (p != NULL) break;
|
||||
// allocate from regions if possible
|
||||
size_t arena_memid;
|
||||
const size_t blocks = mi_region_block_count(size);
|
||||
if (blocks <= MI_REGION_MAX_OBJ_BLOCKS && alignment <= MI_SEGMENT_ALIGN) {
|
||||
void* p = mi_region_try_alloc(blocks, commit, large, is_zero, memid, tld);
|
||||
mi_assert_internal(p == NULL || (uintptr_t)p % alignment == 0);
|
||||
if (p != NULL) {
|
||||
if (*commit) { ((uint8_t*)p)[0] = 0; }
|
||||
return p;
|
||||
}
|
||||
|
||||
if (p == NULL) {
|
||||
// no free range in existing regions -- try to extend beyond the count.. but at most 8 regions
|
||||
for (idx = count; idx < mi_atomic_read_relaxed(®ions_count) + 8 && idx < MI_REGION_MAX; idx++) {
|
||||
if (!mi_region_try_alloc_blocks(numa_node, idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error
|
||||
if (p != NULL) break;
|
||||
}
|
||||
}
|
||||
|
||||
if (p == NULL) {
|
||||
// we could not find a place to allocate, fall back to the os directly
|
||||
_mi_warning_message("unable to allocate from region: size %zu\n", size);
|
||||
size_t arena_memid = 0;
|
||||
p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_zero, &arena_memid, tld);
|
||||
*id = mi_memid_create_from_arena(arena_memid);
|
||||
}
|
||||
else {
|
||||
tld->region_idx = idx; // next start of search? currently not used as we use first-fit
|
||||
}
|
||||
|
||||
// and otherwise fall back to the OS
|
||||
void* p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_zero, &arena_memid, tld);
|
||||
*memid = mi_memid_create_from_arena(arena_memid);
|
||||
mi_assert_internal( p == NULL || (uintptr_t)p % alignment == 0);
|
||||
if (p != NULL && *commit) { ((uint8_t*)p)[0] = 0; }
|
||||
return p;
|
||||
}
|
||||
|
||||
@ -481,32 +345,28 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld) {
|
||||
mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, p, size);
|
||||
|
||||
size_t arena_memid = 0;
|
||||
size_t idx = 0;
|
||||
size_t bitidx = 0;
|
||||
if (mi_memid_indices(id,&idx,&bitidx,&arena_memid)) {
|
||||
mi_bitmap_index_t bit_idx;
|
||||
mem_region_t* region;
|
||||
if (mi_memid_indices(id,®ion,&bit_idx,&arena_memid)) {
|
||||
// was a direct arena allocation, pass through
|
||||
_mi_arena_free(p, size, arena_memid, tld->stats);
|
||||
}
|
||||
else {
|
||||
// allocated in a region
|
||||
mi_assert_internal(size <= MI_REGION_MAX_ALLOC_SIZE); if (size > MI_REGION_MAX_ALLOC_SIZE) return;
|
||||
mi_assert_internal(size <= MI_REGION_MAX_OBJ_SIZE); if (size > MI_REGION_MAX_OBJ_SIZE) return;
|
||||
// we can align the size up to page size (as we allocate that way too)
|
||||
// this ensures we fully commit/decommit/reset
|
||||
size = _mi_align_up(size, _mi_os_page_size());
|
||||
size_t blocks = mi_region_block_count(size);
|
||||
size_t mask = mi_region_block_mask(blocks, bitidx);
|
||||
mi_assert_internal(idx < MI_REGION_MAX); if (idx >= MI_REGION_MAX) return; // or `abort`?
|
||||
mem_region_t* region = ®ions[idx];
|
||||
mi_assert_internal((mi_atomic_read_relaxed(®ion->map) & mask) == mask ); // claimed?
|
||||
const size_t blocks = mi_region_block_count(size);
|
||||
mi_region_info_t info = mi_atomic_read(®ion->info);
|
||||
bool is_large;
|
||||
bool is_eager_committed;
|
||||
void* start = mi_region_info_read(info,&is_large,&is_eager_committed);
|
||||
mi_assert_internal(start != NULL);
|
||||
void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE);
|
||||
void* blocks_start = (uint8_t*)start + (bit_idx * MI_SEGMENT_SIZE);
|
||||
mi_assert_internal(blocks_start == p); // not a pointer in our area?
|
||||
mi_assert_internal(bitidx + blocks <= MI_REGION_MAP_BITS);
|
||||
if (blocks_start != p || bitidx + blocks > MI_REGION_MAP_BITS) return; // or `abort`?
|
||||
mi_assert_internal(bit_idx + blocks <= MI_BITMAP_FIELD_BITS);
|
||||
if (blocks_start != p || bit_idx + blocks > MI_BITMAP_FIELD_BITS) return; // or `abort`?
|
||||
|
||||
// decommit (or reset) the blocks to reduce the working set.
|
||||
// TODO: implement delayed decommit/reset as these calls are too expensive
|
||||
@ -532,12 +392,7 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld) {
|
||||
// this frees up virtual address space which might be useful on 32-bit systems?
|
||||
|
||||
// and unclaim
|
||||
uintptr_t map;
|
||||
uintptr_t newmap;
|
||||
do {
|
||||
map = mi_atomic_read_relaxed(®ion->map);
|
||||
newmap = map & ~mask;
|
||||
} while (!mi_atomic_cas_weak(®ion->map, newmap, map));
|
||||
mi_bitmap_unclaim(®ion->in_use, 1, blocks, bit_idx);
|
||||
}
|
||||
}
|
||||
|
||||
@ -547,25 +402,25 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_os_tld_t* tld) {
|
||||
-----------------------------------------------------------------------------*/
|
||||
void _mi_mem_collect(mi_os_tld_t* tld) {
|
||||
// free every region that has no segments in use.
|
||||
for (size_t i = 0; i < regions_count; i++) {
|
||||
uintptr_t rcount = mi_atomic_read_relaxed(®ions_count);
|
||||
for (size_t i = 0; i < rcount; i++) {
|
||||
mem_region_t* region = ®ions[i];
|
||||
if (mi_atomic_read_relaxed(®ion->map) == 0) {
|
||||
if (mi_atomic_read_relaxed(®ion->info) != 0) {
|
||||
// if no segments used, try to claim the whole region
|
||||
uintptr_t m;
|
||||
do {
|
||||
m = mi_atomic_read_relaxed(®ion->map);
|
||||
} while(m == 0 && !mi_atomic_cas_weak(®ion->map, ~((uintptr_t)0), 0 ));
|
||||
m = mi_atomic_read_relaxed(®ion->in_use);
|
||||
} while(m == 0 && !mi_atomic_cas_weak(®ion->in_use, MI_BITMAP_FIELD_FULL, 0 ));
|
||||
if (m == 0) {
|
||||
// on success, free the whole region
|
||||
bool is_eager_committed;
|
||||
void* start = mi_region_info_read(mi_atomic_read(®ion->info), NULL, &is_eager_committed);
|
||||
void* start = mi_region_info_read(mi_atomic_read(®ions[i].info), NULL, &is_eager_committed);
|
||||
if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) {
|
||||
mi_delay_remove(tld->reset_delay, MI_RESET_DELAY_SLOTS, start, MI_REGION_SIZE);
|
||||
_mi_arena_free(start, MI_REGION_SIZE, region->arena_memid, tld->stats);
|
||||
}
|
||||
// and release
|
||||
mi_atomic_write(®ion->info,0);
|
||||
mi_atomic_write(®ion->map,0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
7
src/os.c
7
src/os.c
@ -940,6 +940,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse
|
||||
_mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE);
|
||||
|
||||
// check for timeout
|
||||
if (max_msecs > 0) {
|
||||
mi_msecs_t elapsed = _mi_clock_end(start_t);
|
||||
if (page >= 1) {
|
||||
mi_msecs_t estimate = ((elapsed / (page+1)) * pages);
|
||||
@ -952,6 +953,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size);
|
||||
if (pages_reserved != NULL) *pages_reserved = page;
|
||||
if (psize != NULL) *psize = page * MI_HUGE_OS_PAGE_SIZE;
|
||||
@ -1046,9 +1048,10 @@ int _mi_os_numa_node_count(void) {
|
||||
|
||||
int _mi_os_numa_node(mi_os_tld_t* tld) {
|
||||
UNUSED(tld);
|
||||
int numa_node = mi_os_numa_nodex();
|
||||
// never more than the node count and >= 0
|
||||
int numa_count = _mi_os_numa_node_count();
|
||||
if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0
|
||||
// never more than the node count and >= 0
|
||||
int numa_node = mi_os_numa_nodex();
|
||||
if (numa_node >= numa_count) { numa_node = numa_node % numa_count; }
|
||||
if (numa_node < 0) numa_node = 0;
|
||||
return numa_node;
|
||||
|
35
src/page.c
35
src/page.c
@ -439,15 +439,15 @@ void _mi_page_retire(mi_page_t* page) {
|
||||
#define MI_MAX_SLICES (1UL << MI_MAX_SLICE_SHIFT)
|
||||
#define MI_MIN_SLICES (2)
|
||||
|
||||
static void mi_page_free_list_extend_secure(mi_heap_t* heap, mi_page_t* page, size_t extend, mi_stats_t* stats) {
|
||||
static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* const page, const size_t extend, mi_stats_t* const stats) {
|
||||
UNUSED(stats);
|
||||
#if (MI_SECURE<=2)
|
||||
mi_assert_internal(page->free == NULL);
|
||||
mi_assert_internal(page->local_free == NULL);
|
||||
#endif
|
||||
mi_assert_internal(page->capacity + extend <= page->reserved);
|
||||
void* page_area = _mi_page_start(_mi_page_segment(page), page, NULL);
|
||||
size_t bsize = page->block_size;
|
||||
void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL);
|
||||
const size_t bsize = page->block_size;
|
||||
|
||||
// initialize a randomized free list
|
||||
// set up `slice_count` slices to alternate between
|
||||
@ -455,8 +455,8 @@ static void mi_page_free_list_extend_secure(mi_heap_t* heap, mi_page_t* page, si
|
||||
while ((extend >> shift) == 0) {
|
||||
shift--;
|
||||
}
|
||||
size_t slice_count = (size_t)1U << shift;
|
||||
size_t slice_extend = extend / slice_count;
|
||||
const size_t slice_count = (size_t)1U << shift;
|
||||
const size_t slice_extend = extend / slice_count;
|
||||
mi_assert_internal(slice_extend >= 1);
|
||||
mi_block_t* blocks[MI_MAX_SLICES]; // current start of the slice
|
||||
size_t counts[MI_MAX_SLICES]; // available objects in the slice
|
||||
@ -470,12 +470,12 @@ static void mi_page_free_list_extend_secure(mi_heap_t* heap, mi_page_t* page, si
|
||||
// set up first element
|
||||
size_t current = _mi_heap_random(heap) % slice_count;
|
||||
counts[current]--;
|
||||
page->free = blocks[current];
|
||||
mi_block_t* const free_start = blocks[current];
|
||||
// and iterate through the rest
|
||||
uintptr_t rnd = heap->random;
|
||||
for (size_t i = 1; i < extend; i++) {
|
||||
// call random_shuffle only every INTPTR_SIZE rounds
|
||||
size_t round = i%MI_INTPTR_SIZE;
|
||||
const size_t round = i%MI_INTPTR_SIZE;
|
||||
if (round == 0) rnd = _mi_random_shuffle(rnd);
|
||||
// select a random next slice index
|
||||
size_t next = ((rnd >> 8*round) & (slice_count-1));
|
||||
@ -485,34 +485,39 @@ static void mi_page_free_list_extend_secure(mi_heap_t* heap, mi_page_t* page, si
|
||||
}
|
||||
// and link the current block to it
|
||||
counts[next]--;
|
||||
mi_block_t* block = blocks[current];
|
||||
mi_block_t* const block = blocks[current];
|
||||
blocks[current] = (mi_block_t*)((uint8_t*)block + bsize); // bump to the following block
|
||||
mi_block_set_next(page, block, blocks[next]); // and set next; note: we may have `current == next`
|
||||
current = next;
|
||||
}
|
||||
mi_block_set_next(page, blocks[current], NULL); // end of the list
|
||||
// prepend to the free list (usually NULL)
|
||||
mi_block_set_next(page, blocks[current], page->free); // end of the list
|
||||
page->free = free_start;
|
||||
heap->random = _mi_random_shuffle(rnd);
|
||||
}
|
||||
|
||||
static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* page, size_t extend, mi_stats_t* stats)
|
||||
static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t extend, mi_stats_t* const stats)
|
||||
{
|
||||
UNUSED(stats);
|
||||
#if (MI_SECURE <= 2)
|
||||
mi_assert_internal(page->free == NULL);
|
||||
mi_assert_internal(page->local_free == NULL);
|
||||
#endif
|
||||
mi_assert_internal(page->capacity + extend <= page->reserved);
|
||||
void* page_area = _mi_page_start(_mi_page_segment(page), page, NULL );
|
||||
size_t bsize = page->block_size;
|
||||
mi_block_t* start = mi_page_block_at(page, page_area, page->capacity);
|
||||
void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL );
|
||||
const size_t bsize = page->block_size;
|
||||
mi_block_t* const start = mi_page_block_at(page, page_area, page->capacity);
|
||||
|
||||
// initialize a sequential free list
|
||||
mi_block_t* last = mi_page_block_at(page, page_area, page->capacity + extend - 1);
|
||||
mi_block_t* const last = mi_page_block_at(page, page_area, page->capacity + extend - 1);
|
||||
mi_block_t* block = start;
|
||||
while(block <= last) {
|
||||
mi_block_t* next = (mi_block_t*)((uint8_t*)block + bsize);
|
||||
mi_block_set_next(page,block,next);
|
||||
block = next;
|
||||
}
|
||||
mi_block_set_next(page, last, NULL);
|
||||
// prepend to free list (usually `NULL`)
|
||||
mi_block_set_next(page, last, page->free);
|
||||
page->free = start;
|
||||
}
|
||||
|
||||
|
@ -17,8 +17,8 @@ terms of the MIT license.
|
||||
#include <mimalloc.h>
|
||||
|
||||
// argument defaults
|
||||
static int THREADS = 32; // more repeatable if THREADS <= #processors
|
||||
static int N = 20; // scaling factor
|
||||
static int THREADS = 8; // more repeatable if THREADS <= #processors
|
||||
static int N = 200; // scaling factor
|
||||
|
||||
// static int THREADS = 8; // more repeatable if THREADS <= #processors
|
||||
// static int N = 100; // scaling factor
|
||||
@ -63,10 +63,16 @@ static bool chance(size_t perc, random_t r) {
|
||||
}
|
||||
|
||||
static void* alloc_items(size_t items, random_t r) {
|
||||
if (chance(1, r)) items *= 100; // 1% huge objects;
|
||||
if (chance(1, r)) {
|
||||
if (chance(1, r)) items *= 1000; // 0.01% giant
|
||||
else if (chance(10, r)) items *= 100; // 0.1% huge
|
||||
else items *= 10; // 1% large objects;
|
||||
}
|
||||
if (items==40) items++; // pthreads uses that size for stack increases
|
||||
uintptr_t* p = (uintptr_t*)mi_malloc(items*sizeof(uintptr_t));
|
||||
if (p != NULL) {
|
||||
for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie;
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user