merge from dev
This commit is contained in:
commit
4a27ea1643
@ -18,6 +18,7 @@ include("cmake/mimalloc-config-version.cmake")
|
||||
|
||||
set(mi_sources
|
||||
src/stats.c
|
||||
src/random.c
|
||||
src/os.c
|
||||
src/arena.c
|
||||
src/segment.c
|
||||
@ -114,7 +115,7 @@ endif()
|
||||
|
||||
# extra needed libraries
|
||||
if(WIN32)
|
||||
list(APPEND mi_libraries psapi shell32 user32)
|
||||
list(APPEND mi_libraries psapi shell32 user32 bcrypt)
|
||||
else()
|
||||
list(APPEND mi_libraries pthread)
|
||||
find_library(LIBRT rt)
|
||||
|
@ -13,16 +13,31 @@ jobs:
|
||||
pool:
|
||||
vmImage:
|
||||
windows-2019
|
||||
strategy:
|
||||
matrix:
|
||||
Debug:
|
||||
BuildType: debug
|
||||
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON
|
||||
Release:
|
||||
BuildType: release
|
||||
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release
|
||||
Secure:
|
||||
BuildType: secure
|
||||
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON
|
||||
steps:
|
||||
- task: CMake@1
|
||||
inputs:
|
||||
workingDirectory: 'build'
|
||||
cmakeArgs: ..
|
||||
workingDirectory: $(BuildType)
|
||||
cmakeArgs: .. $(cmakeExtraArgs)
|
||||
- task: MSBuild@1
|
||||
inputs:
|
||||
solution: build/libmimalloc.sln
|
||||
- upload: $(Build.SourcesDirectory)/build
|
||||
artifact: windows
|
||||
solution: $(BuildType)/libmimalloc.sln
|
||||
- script: |
|
||||
cd $(BuildType)
|
||||
ctest
|
||||
displayName: CTest
|
||||
- upload: $(Build.SourcesDirectory)/$(BuildType)
|
||||
artifact: mimalloc-windows-$(BuildType)
|
||||
|
||||
- job:
|
||||
displayName: Linux
|
||||
@ -61,32 +76,42 @@ jobs:
|
||||
CXX: clang++
|
||||
BuildType: secure-clang
|
||||
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON
|
||||
|
||||
steps:
|
||||
- task: CMake@1
|
||||
inputs:
|
||||
workingDirectory: $(BuildType)
|
||||
cmakeArgs: .. $(cmakeExtraArgs)
|
||||
|
||||
- script: make -j$(nproc) -C $(BuildType)
|
||||
displayName: Make
|
||||
|
||||
- script: make test -C $(BuildType)
|
||||
displayName: Ctest
|
||||
|
||||
displayName: CTest
|
||||
- upload: $(Build.SourcesDirectory)/$(BuildType)
|
||||
artifact: ubuntu-$(BuildType)
|
||||
artifact: mimalloc-ubuntu-$(BuildType)
|
||||
|
||||
- job:
|
||||
displayName: macOS
|
||||
pool:
|
||||
vmImage:
|
||||
macOS-10.14
|
||||
strategy:
|
||||
matrix:
|
||||
Debug:
|
||||
BuildType: debug
|
||||
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON
|
||||
Release:
|
||||
BuildType: release
|
||||
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release
|
||||
Secure:
|
||||
BuildType: secure
|
||||
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON
|
||||
steps:
|
||||
- task: CMake@1
|
||||
inputs:
|
||||
workingDirectory: 'build'
|
||||
cmakeArgs: ..
|
||||
- script: make -j$(sysctl -n hw.ncpu) -C build
|
||||
- upload: $(Build.SourcesDirectory)/build
|
||||
artifact: macos
|
||||
workingDirectory: $(BuildType)
|
||||
cmakeArgs: .. $(cmakeExtraArgs)
|
||||
- script: make -j$(sysctl -n hw.ncpu) -C $(BuildType)
|
||||
displayName: Make
|
||||
- script: make test -C $(BuildType)
|
||||
displayName: CTest
|
||||
- upload: $(Build.SourcesDirectory)/$(BuildType)
|
||||
artifact: mimalloc-macos-$(BuildType)
|
||||
|
@ -129,7 +129,7 @@
|
||||
<CompileAs>Default</CompileAs>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect.lib;bcrypt.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<IgnoreSpecificDefaultLibraries>
|
||||
</IgnoreSpecificDefaultLibraries>
|
||||
<ModuleDefinitionFile>
|
||||
@ -195,7 +195,7 @@
|
||||
<Link>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
<AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect.lib;bcrypt.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<ModuleDefinitionFile>
|
||||
</ModuleDefinitionFile>
|
||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
||||
@ -243,6 +243,7 @@
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\src\page.c" />
|
||||
<ClCompile Include="..\..\src\random.c" />
|
||||
<ClCompile Include="..\..\src\segment.c" />
|
||||
<ClCompile Include="..\..\src\stats.c" />
|
||||
</ItemGroup>
|
||||
|
@ -70,5 +70,8 @@
|
||||
<ClCompile Include="..\..\src\arena.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\src\random.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
</Project>
|
@ -228,6 +228,7 @@
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\src\page.c" />
|
||||
<ClCompile Include="..\..\src\random.c" />
|
||||
<ClCompile Include="..\..\src\segment.c" />
|
||||
<ClCompile Include="..\..\src\os.c" />
|
||||
<ClCompile Include="..\..\src\stats.c" />
|
||||
|
@ -56,6 +56,9 @@
|
||||
<ClCompile Include="..\..\src\arena.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\src\random.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">
|
||||
|
@ -246,6 +246,7 @@
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\src\page.c" />
|
||||
<ClCompile Include="..\..\src\random.c" />
|
||||
<ClCompile Include="..\..\src\segment.c" />
|
||||
<ClCompile Include="..\..\src\stats.c" />
|
||||
</ItemGroup>
|
||||
|
@ -43,6 +43,9 @@
|
||||
<ClCompile Include="..\..\src\bitmap.inc.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\src\random.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">
|
||||
|
@ -231,6 +231,7 @@
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\src\page.c" />
|
||||
<ClCompile Include="..\..\src\random.c" />
|
||||
<ClCompile Include="..\..\src\segment.c" />
|
||||
<ClCompile Include="..\..\src\os.c" />
|
||||
<ClCompile Include="..\..\src\stats.c" />
|
||||
|
@ -46,6 +46,9 @@
|
||||
<ClCompile Include="..\..\src\bitmap.inc.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\src\random.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">
|
||||
|
@ -10,7 +10,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
|
||||
#include "mimalloc-types.h"
|
||||
|
||||
#if defined(MI_MALLOC_OVERRIDE) && (defined(__APPLE__) || defined(__OpenBSD__))
|
||||
#if defined(MI_MALLOC_OVERRIDE) && (defined(__APPLE__) || defined(__OpenBSD__) || defined(__DragonFly__))
|
||||
#define MI_TLS_RECURSE_GUARD
|
||||
#endif
|
||||
|
||||
@ -42,12 +42,17 @@ void _mi_trace_message(const char* fmt, ...);
|
||||
void _mi_options_init(void);
|
||||
void _mi_fatal_error(const char* fmt, ...) mi_attr_noreturn;
|
||||
|
||||
// "init.c"
|
||||
// random.c
|
||||
void _mi_random_init(mi_random_ctx_t* ctx);
|
||||
void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx);
|
||||
uintptr_t _mi_random_next(mi_random_ctx_t* ctx);
|
||||
uintptr_t _mi_heap_random_next(mi_heap_t* heap);
|
||||
static inline uintptr_t _mi_random_shuffle(uintptr_t x);
|
||||
|
||||
// init.c
|
||||
extern mi_stats_t _mi_stats_main;
|
||||
extern const mi_page_t _mi_page_empty;
|
||||
bool _mi_is_main_thread(void);
|
||||
uintptr_t _mi_random_shuffle(uintptr_t x);
|
||||
uintptr_t _mi_random_init(uintptr_t seed /* can be zero */);
|
||||
bool _mi_preloading(); // true while the C runtime is not ready
|
||||
|
||||
// os.c
|
||||
@ -86,8 +91,9 @@ void _mi_page_unfull(mi_page_t* page);
|
||||
void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force); // free the page
|
||||
void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq); // abandon the page, to be picked up by another thread...
|
||||
void _mi_heap_delayed_free(mi_heap_t* heap);
|
||||
void _mi_heap_collect_retired(mi_heap_t* heap, bool force);
|
||||
|
||||
void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay);
|
||||
void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never);
|
||||
size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append);
|
||||
void _mi_deferred_free(mi_heap_t* heap, bool force);
|
||||
|
||||
@ -101,7 +107,6 @@ uint8_t _mi_bsr(uintptr_t x); // bit-scan-right, used on BSD i
|
||||
// "heap.c"
|
||||
void _mi_heap_destroy_pages(mi_heap_t* heap);
|
||||
void _mi_heap_collect_abandon(mi_heap_t* heap);
|
||||
uintptr_t _mi_heap_random(mi_heap_t* heap);
|
||||
void _mi_heap_set_default_direct(mi_heap_t* heap);
|
||||
|
||||
// "stats.c"
|
||||
@ -236,7 +241,7 @@ extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate
|
||||
|
||||
static inline mi_heap_t* mi_get_default_heap(void) {
|
||||
#ifdef MI_TLS_RECURSE_GUARD
|
||||
// on some platforms, like macOS, the dynamic loader calls `malloc`
|
||||
// on some BSD platforms, like macOS, the dynamic loader calls `malloc`
|
||||
// to initialize thread local data. To avoid recursion, we need to avoid
|
||||
// accessing the thread local `_mi_default_heap` until our module is loaded
|
||||
// and use the statically allocated main heap until that time.
|
||||
@ -406,12 +411,30 @@ static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) {
|
||||
}
|
||||
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
// Encoding/Decoding the free list next pointers
|
||||
// Note: we pass a `null` value to be used as the `NULL` value for the
|
||||
// end of a free list. This is to prevent the cookie itself to ever
|
||||
// be present among user blocks (as `cookie^0==cookie`).
|
||||
// -------------------------------------------------------------------
|
||||
/* -------------------------------------------------------------------
|
||||
Encoding/Decoding the free list next pointers
|
||||
|
||||
This is to protect against buffer overflow exploits where the
|
||||
free list is mutated. Many hardened allocators xor the next pointer `p`
|
||||
with a secret key `k1`, as `p^k1`. This prevents overwriting with known
|
||||
values but might be still too weak: if the attacker can guess
|
||||
the pointer `p` this can reveal `k1` (since `p^k1^p == k1`).
|
||||
Moreover, if multiple blocks can be read as well, the attacker can
|
||||
xor both as `(p1^k1) ^ (p2^k1) == p1^p2` which may reveal a lot
|
||||
about the pointers (and subsequently `k1`).
|
||||
|
||||
Instead mimalloc uses an extra key `k2` and encodes as `((p^k2)<<<k1)+k1`.
|
||||
Since these operations are not associative, the above approaches do not
|
||||
work so well any more even if the `p` can be guesstimated. For example,
|
||||
for the read case we can subtract two entries to discard the `+k1` term,
|
||||
but that leads to `((p1^k2)<<<k1) - ((p2^k2)<<<k1)` at best.
|
||||
We include the left-rotation since xor and addition are otherwise linear
|
||||
in the lowest bit. Finally, both keys are unique per page which reduces
|
||||
the re-use of keys by a large factor.
|
||||
|
||||
We also pass a separate `null` value to be used as `NULL` or otherwise
|
||||
`(k2<<<k1)+k1` would appear (too) often as a sentinel value.
|
||||
------------------------------------------------------------------- */
|
||||
|
||||
static inline bool mi_is_in_same_segment(const void* p, const void* q) {
|
||||
return (_mi_ptr_segment(p) == _mi_ptr_segment(q));
|
||||
@ -423,52 +446,84 @@ static inline bool mi_is_in_same_page(const void* p, const void* q) {
|
||||
return (_mi_segment_page_of(segment, p) == _mi_segment_page_of(segment, q));
|
||||
}
|
||||
|
||||
static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, uintptr_t cookie ) {
|
||||
static inline uintptr_t mi_rotl(uintptr_t x, uintptr_t shift) {
|
||||
shift %= MI_INTPTR_BITS;
|
||||
return ((x << shift) | (x >> (MI_INTPTR_BITS - shift)));
|
||||
}
|
||||
static inline uintptr_t mi_rotr(uintptr_t x, uintptr_t shift) {
|
||||
shift %= MI_INTPTR_BITS;
|
||||
return ((x >> shift) | (x << (MI_INTPTR_BITS - shift)));
|
||||
}
|
||||
|
||||
static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, uintptr_t key1, uintptr_t key2 ) {
|
||||
#ifdef MI_ENCODE_FREELIST
|
||||
mi_block_t* b = (mi_block_t*)(block->next ^ cookie);
|
||||
mi_block_t* b = (mi_block_t*)(mi_rotr(block->next - key1, key1) ^ key2);
|
||||
if (mi_unlikely((void*)b==null)) { b = NULL; }
|
||||
return b;
|
||||
#else
|
||||
UNUSED(cookie); UNUSED(null);
|
||||
UNUSED(key1); UNUSED(key2); UNUSED(null);
|
||||
return (mi_block_t*)block->next;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, uintptr_t cookie) {
|
||||
static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, uintptr_t key1, uintptr_t key2) {
|
||||
#ifdef MI_ENCODE_FREELIST
|
||||
if (mi_unlikely(next==NULL)) { next = (mi_block_t*)null; }
|
||||
block->next = (mi_encoded_t)next ^ cookie;
|
||||
block->next = mi_rotl((uintptr_t)next ^ key2, key1) + key1;
|
||||
#else
|
||||
UNUSED(cookie); UNUSED(null);
|
||||
UNUSED(key1); UNUSED(key2); UNUSED(null);
|
||||
block->next = (mi_encoded_t)next;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* block) {
|
||||
#ifdef MI_ENCODE_FREELIST
|
||||
mi_block_t* next = mi_block_nextx(page,block,page->cookie);
|
||||
// check for free list corruption: is `next` at least in our segment range?
|
||||
mi_block_t* next = mi_block_nextx(page,block,page->key[0],page->key[1]);
|
||||
// check for free list corruption: is `next` at least in the same page?
|
||||
// TODO: check if `next` is `page->block_size` aligned?
|
||||
if (next!=NULL && !mi_is_in_same_page(block, next)) {
|
||||
if (mi_unlikely(next!=NULL && !mi_is_in_same_page(block, next))) {
|
||||
_mi_fatal_error("corrupted free list entry of size %zub at %p: value 0x%zx\n", page->block_size, block, (uintptr_t)next);
|
||||
next = NULL;
|
||||
}
|
||||
return next;
|
||||
#else
|
||||
UNUSED(page);
|
||||
return mi_block_nextx(page,block,0);
|
||||
return mi_block_nextx(page,block,0,0);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, const mi_block_t* next) {
|
||||
#ifdef MI_ENCODE_FREELIST
|
||||
mi_block_set_nextx(page,block,next, page->cookie);
|
||||
mi_block_set_nextx(page,block,next, page->key[0], page->key[1]);
|
||||
#else
|
||||
UNUSED(page);
|
||||
mi_block_set_nextx(page,block, next,0);
|
||||
mi_block_set_nextx(page,block, next,0,0);
|
||||
#endif
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
// Fast "random" shuffle
|
||||
// -------------------------------------------------------------------
|
||||
|
||||
static inline uintptr_t _mi_random_shuffle(uintptr_t x) {
|
||||
if (x==0) { x = 17; } // ensure we don't get stuck in generating zeros
|
||||
#if (MI_INTPTR_SIZE==8)
|
||||
// by Sebastiano Vigna, see: <http://xoshiro.di.unimi.it/splitmix64.c>
|
||||
x ^= x >> 30;
|
||||
x *= 0xbf58476d1ce4e5b9UL;
|
||||
x ^= x >> 27;
|
||||
x *= 0x94d049bb133111ebUL;
|
||||
x ^= x >> 31;
|
||||
#elif (MI_INTPTR_SIZE==4)
|
||||
// by Chris Wellons, see: <https://nullprogram.com/blog/2018/07/31/>
|
||||
x ^= x >> 16;
|
||||
x *= 0x7feb352dUL;
|
||||
x ^= x >> 15;
|
||||
x *= 0x846ca68bUL;
|
||||
x ^= x >> 16;
|
||||
#endif
|
||||
return x;
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
// Optimize numa node access for the common case (= one node)
|
||||
|
@ -46,7 +46,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
|
||||
// Encoded free lists allow detection of corrupted free lists
|
||||
// and can detect buffer overflows and double `free`s.
|
||||
#if (MI_SECURE>=3 || MI_DEBUG>=1)
|
||||
#if (MI_SECURE>=3 || MI_DEBUG>=1)
|
||||
#define MI_ENCODE_FREELIST 1
|
||||
#endif
|
||||
|
||||
@ -76,7 +76,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
#endif
|
||||
|
||||
#define MI_INTPTR_SIZE (1<<MI_INTPTR_SHIFT)
|
||||
#define MI_INTPTR_BITS (8*MI_INTPTR_SIZE)
|
||||
#define MI_INTPTR_BITS (MI_INTPTR_SIZE*8)
|
||||
|
||||
#define KiB ((size_t)1024)
|
||||
#define MiB (KiB*KiB)
|
||||
@ -112,6 +112,8 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
#define MI_LARGE_OBJ_SIZE_MAX (MI_SEGMENT_SIZE/2) // 32mb on 64-bit
|
||||
#define MI_LARGE_OBJ_WSIZE_MAX (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE)
|
||||
|
||||
#define MI_HUGE_OBJ_SIZE_MAX (2*MI_INTPTR_SIZE*MI_SEGMENT_SIZE) // (must match MI_REGION_MAX_ALLOC_SIZE in memory.c)
|
||||
|
||||
// Minimal alignment necessary. On most platforms 16 bytes are needed
|
||||
// due to SSE registers for example. This must be at least `MI_INTPTR_SIZE`
|
||||
#define MI_MAX_ALIGN_SIZE 16 // sizeof(max_align_t)
|
||||
@ -145,14 +147,14 @@ typedef enum mi_delayed_e {
|
||||
} mi_delayed_t;
|
||||
|
||||
|
||||
// The `in_full` and `has_aligned` page flags are put in a union to efficiently
|
||||
// The `in_full` and `has_aligned` page flags are put in a union to efficiently
|
||||
// test if both are false (`full_aligned == 0`) in the `mi_free` routine.
|
||||
typedef union mi_page_flags_s {
|
||||
uint8_t full_aligned;
|
||||
struct {
|
||||
uint8_t in_full : 1;
|
||||
uint8_t has_aligned : 1;
|
||||
} x;
|
||||
} x;
|
||||
} mi_page_flags_t;
|
||||
|
||||
// Thread free list.
|
||||
@ -189,11 +191,12 @@ typedef struct mi_page_s {
|
||||
uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear`
|
||||
uint16_t reserved; // number of blocks reserved in memory
|
||||
mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits)
|
||||
bool is_zero; // `true` if the blocks in the free list are zero initialized
|
||||
uint8_t is_zero:1; // `true` if the blocks in the free list are zero initialized
|
||||
uint8_t retire_expire:7; // expiration count for retired blocks
|
||||
|
||||
mi_block_t* free; // list of available free blocks (`malloc` allocates from this list)
|
||||
#ifdef MI_ENCODE_FREELIST
|
||||
uintptr_t cookie; // random cookie to encode the free lists
|
||||
uintptr_t key[2]; // two random keys to encode the free lists (see `_mi_block_next`)
|
||||
#endif
|
||||
size_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`)
|
||||
|
||||
@ -208,9 +211,9 @@ typedef struct mi_page_s {
|
||||
struct mi_page_s* prev; // previous page owned by this thread with the same `block_size`
|
||||
|
||||
// improve page index calculation
|
||||
// without padding: 11 words on 64-bit, 13 on 32-bit.
|
||||
#ifndef MI_ENCODE_FREELIST
|
||||
void* padding[1]; // 12 words on 64-bit, 14 words on 32-bit
|
||||
// without padding: 10 words on 64-bit, 11 on 32-bit. Secure adds two words
|
||||
#if (MI_INTPTR_SIZE==4)
|
||||
void* padding[1]; // 12/14 words on 32-bit plain
|
||||
#endif
|
||||
} mi_page_t;
|
||||
|
||||
@ -253,8 +256,8 @@ typedef struct mi_segment_s {
|
||||
uintptr_t commit_mask;
|
||||
|
||||
// from here is zero initialized
|
||||
struct mi_segment_s* next; // the list of freed segments in the cache
|
||||
volatile _Atomic(struct mi_segment_s*) abandoned_next;
|
||||
struct mi_segment_s* next; // the list of freed segments in the cache
|
||||
struct mi_segment_s* abandoned_next;
|
||||
|
||||
size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`)
|
||||
size_t used; // count of pages in use
|
||||
@ -296,6 +299,14 @@ typedef struct mi_page_queue_s {
|
||||
|
||||
#define MI_BIN_FULL (MI_BIN_HUGE+1)
|
||||
|
||||
// Random context
|
||||
typedef struct mi_random_cxt_s {
|
||||
uint32_t input[16];
|
||||
uint32_t output[16];
|
||||
int output_available;
|
||||
} mi_random_ctx_t;
|
||||
|
||||
|
||||
// A heap owns a set of pages.
|
||||
struct mi_heap_s {
|
||||
mi_tld_t* tld;
|
||||
@ -303,8 +314,9 @@ struct mi_heap_s {
|
||||
mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin")
|
||||
volatile _Atomic(mi_block_t*) thread_delayed_free;
|
||||
uintptr_t thread_id; // thread this heap belongs too
|
||||
uintptr_t cookie;
|
||||
uintptr_t random; // random number used for secure allocation
|
||||
uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`)
|
||||
uintptr_t key[2]; // twb random keys used to encode the `thread_delayed_free` list
|
||||
mi_random_ctx_t random; // random number context used for secure allocation
|
||||
size_t page_count; // total number of pages in the `pages` queues.
|
||||
bool no_reclaim; // `true` if this heap should not reclaim abandoned pages
|
||||
};
|
||||
|
45
src/alloc.c
45
src/alloc.c
@ -125,7 +125,7 @@ mi_decl_allocator void* mi_zalloc(size_t size) mi_attr_noexcept {
|
||||
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Check for double free in secure and debug mode
|
||||
// Check for double free in secure and debug mode
|
||||
// This is somewhat expensive so only enabled for secure mode 4
|
||||
// ------------------------------------------------------
|
||||
|
||||
@ -139,32 +139,29 @@ static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, cons
|
||||
return false;
|
||||
}
|
||||
|
||||
static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block, const mi_block_t* n) {
|
||||
size_t psize;
|
||||
uint8_t* pstart = _mi_page_start(_mi_page_segment(page), page, &psize);
|
||||
if (n == NULL || ((uint8_t*)n >= pstart && (uint8_t*)n < (pstart + psize))) {
|
||||
// Suspicious: the decoded value is in the same page (or NULL).
|
||||
// Walk the free lists to verify positively if it is already freed
|
||||
if (mi_list_contains(page, page->free, block) ||
|
||||
mi_list_contains(page, page->local_free, block) ||
|
||||
mi_list_contains(page, (const mi_block_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&page->thread_free)), block))
|
||||
{
|
||||
_mi_fatal_error("double free detected of block %p with size %zu\n", block, page->block_size);
|
||||
return true;
|
||||
}
|
||||
static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block) {
|
||||
// The decoded value is in the same page (or NULL).
|
||||
// Walk the free lists to verify positively if it is already freed
|
||||
mi_thread_free_t tf = (mi_thread_free_t)mi_atomic_read_relaxed(mi_atomic_cast(uintptr_t, &page->thread_free));
|
||||
if (mi_list_contains(page, page->free, block) ||
|
||||
mi_list_contains(page, page->local_free, block) ||
|
||||
mi_list_contains(page, mi_tf_block(tf), block))
|
||||
{
|
||||
_mi_fatal_error("double free detected of block %p with size %zu\n", block, page->block_size);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) {
|
||||
mi_block_t* n = mi_block_nextx(page, block, page->cookie); // pretend it is freed, and get the decoded first field
|
||||
if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer?
|
||||
(n==NULL || mi_is_in_same_segment(block, n))) // quick check: in same segment or NULL?
|
||||
{
|
||||
// Suspicous: decoded value in block is in the same segment (or NULL) -- maybe a double free?
|
||||
mi_block_t* n = mi_block_nextx(page, block, page->key[0], page->key[1]); // pretend it is freed, and get the decoded first field
|
||||
if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer?
|
||||
(n==NULL || mi_is_in_same_page(block, n))) // quick check: in same page or NULL?
|
||||
{
|
||||
// Suspicous: decoded value a in block is in the same page (or NULL) -- maybe a double free?
|
||||
// (continue in separate function to improve code generation)
|
||||
return mi_check_is_double_freex(page, block, n);
|
||||
}
|
||||
return mi_check_is_double_freex(page, block);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
#else
|
||||
@ -237,7 +234,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc
|
||||
mi_block_t* dfree;
|
||||
do {
|
||||
dfree = (mi_block_t*)heap->thread_delayed_free;
|
||||
mi_block_set_nextx(heap,block,dfree, heap->cookie);
|
||||
mi_block_set_nextx(heap,block,dfree, heap->key[0], heap->key[1]);
|
||||
} while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), block, dfree));
|
||||
}
|
||||
|
||||
@ -261,7 +258,7 @@ static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block
|
||||
// and push it on the free list
|
||||
if (mi_likely(local)) {
|
||||
// owning thread can free a block directly
|
||||
if (mi_check_is_double_free(page, block)) return;
|
||||
if (mi_unlikely(mi_check_is_double_free(page, block))) return;
|
||||
mi_block_set_next(page, block, page->local_free);
|
||||
page->local_free = block;
|
||||
page->used--;
|
||||
@ -336,7 +333,7 @@ void mi_free(void* p) mi_attr_noexcept
|
||||
if (mi_likely(tid == segment->thread_id && page->flags.full_aligned == 0)) { // the thread id matches and it is not a full page, nor has aligned blocks
|
||||
// local, and not full or aligned
|
||||
mi_block_t* block = (mi_block_t*)p;
|
||||
if (mi_check_is_double_free(page,block)) return;
|
||||
if (mi_unlikely(mi_check_is_double_free(page,block))) return;
|
||||
mi_block_set_next(page, block, page->local_free);
|
||||
page->local_free = block;
|
||||
page->used--;
|
||||
|
63
src/arena.c
63
src/arena.c
@ -7,13 +7,13 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
"Arenas" are fixed area's of OS memory from which we can allocate
|
||||
large blocks (>= MI_ARENA_BLOCK_SIZE, 32MiB).
|
||||
In contrast to the rest of mimalloc, the arenas are shared between
|
||||
large blocks (>= MI_ARENA_BLOCK_SIZE, 32MiB).
|
||||
In contrast to the rest of mimalloc, the arenas are shared between
|
||||
threads and need to be accessed using atomic operations.
|
||||
|
||||
Currently arenas are only used to for huge OS page (1GiB) reservations,
|
||||
otherwise it delegates to direct allocation from the OS.
|
||||
In the future, we can expose an API to manually add more kinds of arenas
|
||||
In the future, we can expose an API to manually add more kinds of arenas
|
||||
which is sometimes needed for embedded devices or shared memory for example.
|
||||
(We can also employ this with WASI or `sbrk` systems to reserve large arenas
|
||||
on demand and be able to reuse them efficiently).
|
||||
@ -41,7 +41,7 @@ void _mi_os_free(void* p, size_t size, mi_stats_t* stats);
|
||||
void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize);
|
||||
void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats);
|
||||
|
||||
bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
|
||||
bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
|
||||
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
@ -87,13 +87,13 @@ static _Atomic(uintptr_t) mi_arena_count; // = 0
|
||||
// Use `0` as a special id for direct OS allocated memory.
|
||||
#define MI_MEMID_OS 0
|
||||
|
||||
static size_t mi_memid_create(size_t arena_index, mi_bitmap_index_t bitmap_index) {
|
||||
static size_t mi_arena_id_create(size_t arena_index, mi_bitmap_index_t bitmap_index) {
|
||||
mi_assert_internal(arena_index < 0xFE);
|
||||
mi_assert_internal(((bitmap_index << 8) >> 8) == bitmap_index); // no overflow?
|
||||
return ((bitmap_index << 8) | ((arena_index+1) & 0xFF));
|
||||
}
|
||||
|
||||
static void mi_memid_indices(size_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) {
|
||||
static void mi_arena_id_indices(size_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) {
|
||||
mi_assert_internal(memid != MI_MEMID_OS);
|
||||
*arena_index = (memid & 0xFF) - 1;
|
||||
*bitmap_index = (memid >> 8);
|
||||
@ -106,7 +106,7 @@ static size_t mi_block_count_of_size(size_t size) {
|
||||
/* -----------------------------------------------------------
|
||||
Thread safe allocation in an arena
|
||||
----------------------------------------------------------- */
|
||||
static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx)
|
||||
static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx)
|
||||
{
|
||||
const size_t fcount = arena->field_count;
|
||||
size_t idx = mi_atomic_read(&arena->search_idx); // start from last search
|
||||
@ -261,15 +261,15 @@ static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_commit
|
||||
Arena Allocation
|
||||
----------------------------------------------------------- */
|
||||
|
||||
static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount,
|
||||
bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
|
||||
static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount,
|
||||
bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
|
||||
{
|
||||
mi_bitmap_index_t bitmap_index;
|
||||
if (!mi_arena_alloc(arena, needed_bcount, &bitmap_index)) return NULL;
|
||||
|
||||
// claimed it! set the dirty bits (todo: no need for an atomic op here?)
|
||||
void* p = arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE);
|
||||
*memid = mi_memid_create(arena_index, bitmap_index);
|
||||
*memid = mi_arena_id_create(arena_index, bitmap_index);
|
||||
*is_zero = mi_bitmap_claim(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL);
|
||||
*large = arena->is_large;
|
||||
if (arena->is_committed) {
|
||||
@ -293,23 +293,23 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n
|
||||
return p;
|
||||
}
|
||||
|
||||
void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
|
||||
bool* commit, bool* large, bool* is_zero,
|
||||
size_t* memid, mi_os_tld_t* tld)
|
||||
void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
|
||||
bool* commit, bool* large, bool* is_zero,
|
||||
size_t* memid, mi_os_tld_t* tld)
|
||||
{
|
||||
mi_assert_internal(commit != NULL && large != NULL && is_zero != NULL && memid != NULL && tld != NULL);
|
||||
mi_assert_internal(size > 0);
|
||||
*memid = MI_MEMID_OS;
|
||||
*is_zero = false;
|
||||
|
||||
bool default_large = false;
|
||||
if (large==NULL) large = &default_large; // ensure `large != NULL`
|
||||
|
||||
const int numa_node = _mi_os_numa_node(tld); // current numa node
|
||||
|
||||
// try to allocate in an arena if the alignment is small enough
|
||||
// and the object is not too large or too small.
|
||||
if (alignment <= MI_SEGMENT_ALIGN &&
|
||||
size <= MI_ARENA_MAX_OBJ_SIZE &&
|
||||
if (alignment <= MI_SEGMENT_ALIGN &&
|
||||
size <= MI_ARENA_MAX_OBJ_SIZE &&
|
||||
size >= MI_ARENA_MIN_OBJ_SIZE)
|
||||
{
|
||||
const size_t bcount = mi_block_count_of_size(size);
|
||||
@ -321,7 +321,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
|
||||
if (arena==NULL) break; // end reached
|
||||
if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local?
|
||||
(*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
|
||||
{
|
||||
{
|
||||
void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid, tld);
|
||||
mi_assert_internal((uintptr_t)p % alignment == 0);
|
||||
if (p != NULL) return p;
|
||||
@ -376,7 +376,7 @@ void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, bool
|
||||
// allocated in an arena
|
||||
size_t arena_idx;
|
||||
size_t bitmap_idx;
|
||||
mi_memid_indices(memid, &arena_idx, &bitmap_idx);
|
||||
mi_arena_id_indices(memid, &arena_idx, &bitmap_idx);
|
||||
mi_assert_internal(arena_idx < MI_MAX_ARENAS);
|
||||
mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[arena_idx]));
|
||||
mi_assert_internal(arena != NULL);
|
||||
@ -406,7 +406,7 @@ static bool mi_arena_add(mi_arena_t* arena) {
|
||||
mi_assert_internal(arena != NULL);
|
||||
mi_assert_internal((uintptr_t)arena->start % MI_SEGMENT_ALIGN == 0);
|
||||
mi_assert_internal(arena->block_count > 0);
|
||||
|
||||
|
||||
uintptr_t i = mi_atomic_addu(&mi_arena_count,1);
|
||||
if (i >= MI_MAX_ARENAS) {
|
||||
mi_atomic_subu(&mi_arena_count, 1);
|
||||
@ -434,11 +434,11 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec
|
||||
_mi_warning_message("failed to reserve %zu gb huge pages\n", pages);
|
||||
return ENOMEM;
|
||||
}
|
||||
_mi_verbose_message("reserved %zu gb huge pages\n", pages_reserved);
|
||||
|
||||
_mi_verbose_message("reserved %zu gb huge pages (of the %zu gb requested)\n", pages_reserved, pages);
|
||||
|
||||
size_t bcount = mi_block_count_of_size(hsize);
|
||||
size_t fields = (bcount + MI_BITMAP_FIELD_BITS - 1) / MI_BITMAP_FIELD_BITS;
|
||||
size_t asize = sizeof(mi_arena_t) + (2*fields*sizeof(mi_bitmap_field_t));
|
||||
size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS);
|
||||
size_t asize = sizeof(mi_arena_t) + (2*fields*sizeof(mi_bitmap_field_t));
|
||||
mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS?
|
||||
if (arena == NULL) {
|
||||
_mi_os_free_huge_pages(p, hsize, &_mi_stats_main);
|
||||
@ -446,23 +446,24 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec
|
||||
}
|
||||
arena->block_count = bcount;
|
||||
arena->field_count = fields;
|
||||
arena->start = (uint8_t*)p;
|
||||
arena->start = (uint8_t*)p;
|
||||
arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1)
|
||||
arena->is_large = true;
|
||||
arena->is_zero_init = true;
|
||||
arena->is_committed = true;
|
||||
arena->search_idx = 0;
|
||||
arena->blocks_dirty = &arena->blocks_inuse[bcount];
|
||||
arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap
|
||||
arena->blocks_committed = NULL;
|
||||
// the bitmaps are already zero initialized due to os_alloc
|
||||
// just claim leftover blocks if needed
|
||||
size_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount;
|
||||
ptrdiff_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount;
|
||||
mi_assert_internal(post >= 0);
|
||||
if (post > 0) {
|
||||
// don't use leftover bits at the end
|
||||
mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post);
|
||||
mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL);
|
||||
mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL);
|
||||
}
|
||||
|
||||
|
||||
mi_arena_add(arena);
|
||||
return 0;
|
||||
}
|
||||
@ -477,8 +478,8 @@ int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t
|
||||
if (numa_count <= 0) numa_count = 1;
|
||||
const size_t pages_per = pages / numa_count;
|
||||
const size_t pages_mod = pages % numa_count;
|
||||
const size_t timeout_per = (timeout_msecs / numa_count) + 50;
|
||||
|
||||
const size_t timeout_per = (timeout_msecs==0 ? 0 : (timeout_msecs / numa_count) + 50);
|
||||
|
||||
// reserve evenly among numa nodes
|
||||
for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
|
||||
size_t node_pages = pages_per; // can be 0
|
||||
@ -500,7 +501,7 @@ int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserv
|
||||
UNUSED(max_secs);
|
||||
_mi_warning_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n");
|
||||
if (pages_reserved != NULL) *pages_reserved = 0;
|
||||
int err = mi_reserve_huge_os_pages_interleave(pages, 0, (size_t)(max_secs * 1000.0));
|
||||
int err = mi_reserve_huge_os_pages_interleave(pages, 0, (size_t)(max_secs * 1000.0));
|
||||
if (err==0 && pages_reserved!=NULL) *pages_reserved = pages;
|
||||
return err;
|
||||
}
|
||||
|
27
src/heap.c
27
src/heap.c
@ -45,8 +45,8 @@ static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void
|
||||
}
|
||||
|
||||
|
||||
#if MI_DEBUG>=3
|
||||
static bool _mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) {
|
||||
#if MI_DEBUG>=2
|
||||
static bool mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) {
|
||||
UNUSED(arg1);
|
||||
UNUSED(arg2);
|
||||
UNUSED(pq);
|
||||
@ -59,7 +59,7 @@ static bool _mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page
|
||||
|
||||
static bool mi_heap_is_valid(mi_heap_t* heap) {
|
||||
mi_assert_internal(heap!=NULL);
|
||||
mi_heap_visit_pages(heap, &_mi_heap_page_is_valid, NULL, NULL);
|
||||
mi_heap_visit_pages(heap, &mi_heap_page_is_valid, NULL, NULL);
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
@ -84,6 +84,7 @@ typedef enum mi_collect_e {
|
||||
static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg_collect, void* arg2 ) {
|
||||
UNUSED(arg2);
|
||||
UNUSED(heap);
|
||||
mi_assert_internal(mi_heap_page_is_valid(heap, pq, page, NULL, NULL));
|
||||
mi_collect_t collect = *((mi_collect_t*)arg_collect);
|
||||
_mi_page_free_collect(page, collect >= ABANDON);
|
||||
if (mi_page_all_free(page)) {
|
||||
@ -102,7 +103,7 @@ static bool mi_heap_page_never_delayed_free(mi_heap_t* heap, mi_page_queue_t* pq
|
||||
UNUSED(arg2);
|
||||
UNUSED(heap);
|
||||
UNUSED(pq);
|
||||
_mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE);
|
||||
_mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false);
|
||||
return true; // don't break
|
||||
}
|
||||
|
||||
@ -184,12 +185,6 @@ mi_heap_t* mi_heap_get_backing(void) {
|
||||
return bheap;
|
||||
}
|
||||
|
||||
uintptr_t _mi_heap_random(mi_heap_t* heap) {
|
||||
uintptr_t r = heap->random;
|
||||
heap->random = _mi_random_shuffle(r);
|
||||
return r;
|
||||
}
|
||||
|
||||
mi_heap_t* mi_heap_new(void) {
|
||||
mi_heap_t* bheap = mi_heap_get_backing();
|
||||
mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t);
|
||||
@ -197,12 +192,18 @@ mi_heap_t* mi_heap_new(void) {
|
||||
memcpy(heap, &_mi_heap_empty, sizeof(mi_heap_t));
|
||||
heap->tld = bheap->tld;
|
||||
heap->thread_id = _mi_thread_id();
|
||||
heap->cookie = ((uintptr_t)heap ^ _mi_heap_random(bheap)) | 1;
|
||||
heap->random = _mi_heap_random(bheap);
|
||||
_mi_random_split(&bheap->random, &heap->random);
|
||||
heap->cookie = _mi_heap_random_next(heap) | 1;
|
||||
heap->key[0] = _mi_heap_random_next(heap);
|
||||
heap->key[1] = _mi_heap_random_next(heap);
|
||||
heap->no_reclaim = true; // don't reclaim abandoned pages or otherwise destroy is unsafe
|
||||
return heap;
|
||||
}
|
||||
|
||||
uintptr_t _mi_heap_random_next(mi_heap_t* heap) {
|
||||
return _mi_random_next(&heap->random);
|
||||
}
|
||||
|
||||
// zero out the page queues
|
||||
static void mi_heap_reset_pages(mi_heap_t* heap) {
|
||||
mi_assert_internal(mi_heap_is_initialized(heap));
|
||||
@ -241,7 +242,7 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_
|
||||
UNUSED(pq);
|
||||
|
||||
// ensure no more thread_delayed_free will be added
|
||||
_mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE);
|
||||
_mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false);
|
||||
|
||||
// stats
|
||||
if (page->block_size > MI_MEDIUM_OBJ_SIZE_MAX) {
|
||||
|
121
src/init.c
121
src/init.c
@ -12,17 +12,21 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
|
||||
// Empty page used to initialize the small free pages array
|
||||
const mi_page_t _mi_page_empty = {
|
||||
0, false, false, false, false, 0, 0,
|
||||
{ 0 }, false,
|
||||
0, false, false, false, false,
|
||||
0, // capacity
|
||||
0, // reserved capacity
|
||||
{ 0 }, // flags
|
||||
false, // is_zero
|
||||
0, // retire_expire
|
||||
NULL, // free
|
||||
#if MI_ENCODE_FREELIST
|
||||
0,
|
||||
{ 0, 0 },
|
||||
#endif
|
||||
0, // used
|
||||
NULL,
|
||||
ATOMIC_VAR_INIT(0), ATOMIC_VAR_INIT(0),
|
||||
0, NULL, NULL, NULL
|
||||
#ifndef MI_ENCODE_FREELIST
|
||||
#if (MI_INTPTR_SIZE==4)
|
||||
, { NULL } // padding
|
||||
#endif
|
||||
};
|
||||
@ -95,10 +99,11 @@ const mi_heap_t _mi_heap_empty = {
|
||||
MI_SMALL_PAGES_EMPTY,
|
||||
MI_PAGE_QUEUES_EMPTY,
|
||||
ATOMIC_VAR_INIT(NULL),
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0, // tid
|
||||
0, // cookie
|
||||
{ 0, 0 }, // keys
|
||||
{ {0}, {0}, 0 },
|
||||
0, // page count
|
||||
false
|
||||
};
|
||||
|
||||
@ -130,86 +135,29 @@ static mi_tld_t tld_main = {
|
||||
{ MI_STATS_NULL } // stats
|
||||
};
|
||||
|
||||
#if MI_INTPTR_SIZE==8
|
||||
#define MI_INIT_COOKIE (0xCDCDCDCDCDCDCDCDUL)
|
||||
#else
|
||||
#define MI_INIT_COOKIE (0xCDCDCDCDUL)
|
||||
#endif
|
||||
|
||||
mi_heap_t _mi_heap_main = {
|
||||
&tld_main,
|
||||
MI_SMALL_PAGES_EMPTY,
|
||||
MI_PAGE_QUEUES_EMPTY,
|
||||
NULL,
|
||||
0, // thread id
|
||||
#if MI_INTPTR_SIZE==8 // the cookie of the main heap can be fixed (unlike page cookies that need to be secure!)
|
||||
0xCDCDCDCDCDCDCDCDUL,
|
||||
#else
|
||||
0xCDCDCDCDUL,
|
||||
#endif
|
||||
0, // random
|
||||
0, // page count
|
||||
false // can reclaim
|
||||
0, // thread id
|
||||
MI_INIT_COOKIE, // initial cookie
|
||||
{ MI_INIT_COOKIE, MI_INIT_COOKIE }, // the key of the main heap can be fixed (unlike page keys that need to be secure!)
|
||||
{ {0}, {0}, 0 }, // random
|
||||
0, // page count
|
||||
false // can reclaim
|
||||
};
|
||||
|
||||
bool _mi_process_is_initialized = false; // set to `true` in `mi_process_init`.
|
||||
|
||||
mi_stats_t _mi_stats_main = { MI_STATS_NULL };
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Initialization of random numbers
|
||||
----------------------------------------------------------- */
|
||||
|
||||
#if defined(_WIN32)
|
||||
#include <windows.h>
|
||||
#elif defined(__APPLE__)
|
||||
#include <mach/mach_time.h>
|
||||
#else
|
||||
#include <time.h>
|
||||
#endif
|
||||
|
||||
uintptr_t _mi_random_shuffle(uintptr_t x) {
|
||||
#if (MI_INTPTR_SIZE==8)
|
||||
// by Sebastiano Vigna, see: <http://xoshiro.di.unimi.it/splitmix64.c>
|
||||
x ^= x >> 30;
|
||||
x *= 0xbf58476d1ce4e5b9UL;
|
||||
x ^= x >> 27;
|
||||
x *= 0x94d049bb133111ebUL;
|
||||
x ^= x >> 31;
|
||||
#elif (MI_INTPTR_SIZE==4)
|
||||
// by Chris Wellons, see: <https://nullprogram.com/blog/2018/07/31/>
|
||||
x ^= x >> 16;
|
||||
x *= 0x7feb352dUL;
|
||||
x ^= x >> 15;
|
||||
x *= 0x846ca68bUL;
|
||||
x ^= x >> 16;
|
||||
#endif
|
||||
return x;
|
||||
}
|
||||
|
||||
uintptr_t _mi_random_init(uintptr_t seed /* can be zero */) {
|
||||
#ifdef __wasi__ // no ASLR when using WebAssembly, and time granularity may be coarse
|
||||
uintptr_t x;
|
||||
arc4random_buf(&x, sizeof x);
|
||||
#else
|
||||
// Hopefully, ASLR makes our function address random
|
||||
uintptr_t x = (uintptr_t)((void*)&_mi_random_init);
|
||||
x ^= seed;
|
||||
// xor with high res time
|
||||
#if defined(_WIN32)
|
||||
LARGE_INTEGER pcount;
|
||||
QueryPerformanceCounter(&pcount);
|
||||
x ^= (uintptr_t)(pcount.QuadPart);
|
||||
#elif defined(__APPLE__)
|
||||
x ^= (uintptr_t)mach_absolute_time();
|
||||
#else
|
||||
struct timespec time;
|
||||
clock_gettime(CLOCK_MONOTONIC, &time);
|
||||
x ^= (uintptr_t)time.tv_sec;
|
||||
x ^= (uintptr_t)time.tv_nsec;
|
||||
#endif
|
||||
// and do a few randomization steps
|
||||
uintptr_t max = ((x ^ (x >> 17)) & 0x0F) + 1;
|
||||
for (uintptr_t i = 0; i < max; i++) {
|
||||
x = _mi_random_shuffle(x);
|
||||
}
|
||||
#endif
|
||||
return x;
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Initialization and freeing of the thread local heaps
|
||||
@ -217,7 +165,7 @@ uintptr_t _mi_random_init(uintptr_t seed /* can be zero */) {
|
||||
|
||||
typedef struct mi_thread_data_s {
|
||||
mi_heap_t heap; // must come first due to cast in `_mi_heap_done`
|
||||
mi_tld_t tld;
|
||||
mi_tld_t tld;
|
||||
} mi_thread_data_t;
|
||||
|
||||
// Initialize the thread local default heap, called from `mi_thread_init`
|
||||
@ -240,8 +188,10 @@ static bool _mi_heap_init(void) {
|
||||
memcpy(tld, &tld_empty, sizeof(*tld));
|
||||
memcpy(heap, &_mi_heap_empty, sizeof(*heap));
|
||||
heap->thread_id = _mi_thread_id();
|
||||
heap->random = _mi_random_init(heap->thread_id);
|
||||
heap->cookie = ((uintptr_t)heap ^ _mi_heap_random(heap)) | 1;
|
||||
_mi_random_init(&heap->random);
|
||||
heap->cookie = _mi_heap_random_next(heap) | 1;
|
||||
heap->key[0] = _mi_heap_random_next(heap);
|
||||
heap->key[1] = _mi_heap_random_next(heap);
|
||||
heap->tld = tld;
|
||||
tld->heap_backing = heap;
|
||||
tld->segments.stats = &tld->stats;
|
||||
@ -476,16 +426,17 @@ void mi_process_init(void) mi_attr_noexcept {
|
||||
// access _mi_heap_default before setting _mi_process_is_initialized to ensure
|
||||
// that the TLS slot is allocated without getting into recursion on macOS
|
||||
// when using dynamic linking with interpose.
|
||||
mi_heap_t* h = mi_get_default_heap();
|
||||
mi_get_default_heap();
|
||||
_mi_process_is_initialized = true;
|
||||
|
||||
_mi_heap_main.thread_id = _mi_thread_id();
|
||||
_mi_verbose_message("process init: 0x%zx\n", _mi_heap_main.thread_id);
|
||||
uintptr_t random = _mi_random_init(_mi_heap_main.thread_id) ^ (uintptr_t)h;
|
||||
#ifndef __APPLE__
|
||||
_mi_heap_main.cookie = (uintptr_t)&_mi_heap_main ^ random;
|
||||
_mi_random_init(&_mi_heap_main.random);
|
||||
#ifndef __APPLE__ // TODO: fix this? cannot update cookie if allocation already happened..
|
||||
_mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main);
|
||||
_mi_heap_main.key[0] = _mi_heap_random_next(&_mi_heap_main);
|
||||
_mi_heap_main.key[1] = _mi_heap_random_next(&_mi_heap_main);
|
||||
#endif
|
||||
_mi_heap_main.random = _mi_random_shuffle(random);
|
||||
mi_process_setup_auto_thread_done();
|
||||
_mi_os_init();
|
||||
#if (MI_DEBUG)
|
||||
|
@ -79,7 +79,7 @@ typedef union mi_region_info_u {
|
||||
struct {
|
||||
bool valid;
|
||||
bool is_large;
|
||||
int numa_node;
|
||||
short numa_node;
|
||||
};
|
||||
} mi_region_info_t;
|
||||
|
||||
@ -308,7 +308,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo
|
||||
if (mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)) {
|
||||
// some blocks are still reset
|
||||
mi_assert_internal(!info.is_large);
|
||||
mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit);
|
||||
mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit || mi_option_get(mi_option_eager_commit_delay) > 0);
|
||||
mi_bitmap_unclaim(®ion->reset, 1, blocks, bit_idx);
|
||||
if (*commit || !mi_option_is_enabled(mi_option_reset_decommits)) { // only if needed
|
||||
bool reset_zero = false;
|
||||
|
@ -28,7 +28,7 @@ int mi_version(void) mi_attr_noexcept {
|
||||
|
||||
// --------------------------------------------------------
|
||||
// Options
|
||||
// These can be accessed by multiple threads and may be
|
||||
// These can be accessed by multiple threads and may be
|
||||
// concurrently initialized, but an initializing data race
|
||||
// is ok since they resolve to the same value.
|
||||
// --------------------------------------------------------
|
||||
@ -61,7 +61,7 @@ static mi_option_desc_t options[_mi_option_last] =
|
||||
{ 0, UNINIT, MI_OPTION(eager_region_commit) },
|
||||
{ 1, UNINIT, MI_OPTION(reset_decommits) }, // reset decommits memory
|
||||
#else
|
||||
{ 1, UNINIT, MI_OPTION(eager_region_commit) },
|
||||
{ 1, UNINIT, MI_OPTION(eager_region_commit) },
|
||||
{ 0, UNINIT, MI_OPTION(reset_decommits) }, // reset uses MADV_FREE/MADV_DONTNEED
|
||||
#endif
|
||||
{ 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
|
||||
@ -91,7 +91,7 @@ void _mi_options_init(void) {
|
||||
mi_option_desc_t* desc = &options[option];
|
||||
_mi_verbose_message("option '%s': %ld\n", desc->name, desc->value);
|
||||
}
|
||||
}
|
||||
}
|
||||
mi_max_error_count = mi_option_get(mi_option_max_errors);
|
||||
}
|
||||
|
||||
@ -100,7 +100,7 @@ long mi_option_get(mi_option_t option) {
|
||||
mi_option_desc_t* desc = &options[option];
|
||||
mi_assert(desc->option == option); // index should match the option
|
||||
if (mi_unlikely(desc->init == UNINIT)) {
|
||||
mi_option_init(desc);
|
||||
mi_option_init(desc);
|
||||
}
|
||||
return desc->value;
|
||||
}
|
||||
@ -144,7 +144,7 @@ void mi_option_disable(mi_option_t option) {
|
||||
|
||||
static void mi_out_stderr(const char* msg) {
|
||||
#ifdef _WIN32
|
||||
// on windows with redirection, the C runtime cannot handle locale dependent output
|
||||
// on windows with redirection, the C runtime cannot handle locale dependent output
|
||||
// after the main thread closes so we use direct console output.
|
||||
if (!_mi_preloading()) { _cputs(msg); }
|
||||
#else
|
||||
@ -186,7 +186,7 @@ static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf) {
|
||||
out_buf[count] = 0;
|
||||
out(out_buf);
|
||||
if (!no_more_buf) {
|
||||
out_buf[count] = '\n'; // if continue with the buffer, insert a newline
|
||||
out_buf[count] = '\n'; // if continue with the buffer, insert a newline
|
||||
}
|
||||
}
|
||||
|
||||
@ -342,7 +342,7 @@ static void mi_strlcat(char* dest, const char* src, size_t dest_size) {
|
||||
#include <windows.h>
|
||||
static bool mi_getenv(const char* name, char* result, size_t result_size) {
|
||||
result[0] = 0;
|
||||
size_t len = GetEnvironmentVariableA(name, result, (DWORD)result_size);
|
||||
size_t len = GetEnvironmentVariableA(name, result, (DWORD)result_size);
|
||||
return (len > 0 && len < result_size);
|
||||
}
|
||||
#else
|
||||
@ -368,7 +368,11 @@ static bool mi_getenv(const char* name, char* result, size_t result_size) {
|
||||
}
|
||||
}
|
||||
#endif
|
||||
static void mi_option_init(mi_option_desc_t* desc) {
|
||||
static void mi_option_init(mi_option_desc_t* desc) {
|
||||
#ifndef _WIN32
|
||||
// cannot call getenv() when still initializing the C runtime.
|
||||
if (_mi_preloading()) return;
|
||||
#endif
|
||||
// Read option value from the environment
|
||||
char buf[64+1];
|
||||
mi_strlcpy(buf, "mimalloc_", sizeof(buf));
|
||||
|
28
src/os.c
28
src/os.c
@ -165,9 +165,7 @@ void _mi_os_init() {
|
||||
os_page_size = (size_t)result;
|
||||
os_alloc_granularity = os_page_size;
|
||||
}
|
||||
if (mi_option_is_enabled(mi_option_large_os_pages)) {
|
||||
large_os_page_size = 2*MiB;
|
||||
}
|
||||
large_os_page_size = 2*MiB; // TODO: can we query the OS for this?
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -332,7 +330,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
|
||||
mi_atomic_cas_weak(&large_page_try_ok, try_ok - 1, try_ok);
|
||||
}
|
||||
else {
|
||||
int lflags = flags;
|
||||
int lflags = flags & ~MAP_NORESERVE; // using NORESERVE on huge pages seems to fail on Linux
|
||||
int lfd = fd;
|
||||
#ifdef MAP_ALIGNED_SUPER
|
||||
lflags |= MAP_ALIGNED_SUPER;
|
||||
@ -408,8 +406,8 @@ static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
|
||||
if (hint == 0 || hint > ((intptr_t)30<<40)) { // try to wrap around after 30TiB (area after 32TiB is used for huge OS pages)
|
||||
intptr_t init = ((intptr_t)4 << 40); // start at 4TiB area
|
||||
#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode
|
||||
uintptr_t r = _mi_random_init((uintptr_t)&mi_os_get_aligned_hint ^ hint);
|
||||
init = init + (MI_SEGMENT_SIZE * ((r>>17) & 0xFFFF)); // (randomly 0-64k)*4MiB == 0 to 256GiB
|
||||
uintptr_t r = _mi_heap_random_next(mi_get_default_heap());
|
||||
init = init + (MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)); // (randomly 20 bits)*4MiB == 0 to 4TiB
|
||||
#endif
|
||||
mi_atomic_cas_strong(mi_atomic_cast(uintptr_t, &aligned_base), init, hint + size);
|
||||
hint = mi_atomic_add(&aligned_base, size); // this may still give 0 or > 30TiB but that is ok, it is a hint after all
|
||||
@ -597,6 +595,18 @@ static void* mi_os_page_align_area_conservative(void* addr, size_t size, size_t*
|
||||
return mi_os_page_align_areax(true, addr, size, newsize);
|
||||
}
|
||||
|
||||
static void mi_mprotect_hint(int err) {
|
||||
#if defined(MI_OS_USE_MMAP) && (MI_SECURE>=2) // guard page around every mimalloc page
|
||||
if (err == ENOMEM) {
|
||||
_mi_warning_message("the previous warning may have been caused by a low memory map limit.\n"
|
||||
" On Linux this is controlled by the vm.max_map_count. For example:\n"
|
||||
" > sudo sysctl -w vm.max_map_count=262144\n");
|
||||
}
|
||||
#else
|
||||
UNUSED(err);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Commit/Decommit memory.
|
||||
// Usuelly commit is aligned liberal, while decommit is aligned conservative.
|
||||
// (but not for the reset version where we want commit to be conservative as well)
|
||||
@ -645,6 +655,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ
|
||||
#endif
|
||||
if (err != 0) {
|
||||
_mi_warning_message("%s error: start: 0x%p, csize: 0x%x, err: %i\n", commit ? "commit" : "decommit", start, csize, err);
|
||||
mi_mprotect_hint(err);
|
||||
}
|
||||
mi_assert_internal(err == 0);
|
||||
return (err == 0);
|
||||
@ -763,6 +774,7 @@ static bool mi_os_protectx(void* addr, size_t size, bool protect) {
|
||||
#endif
|
||||
if (err != 0) {
|
||||
_mi_warning_message("mprotect error: start: 0x%p, csize: 0x%x, err: %i\n", start, csize, err);
|
||||
mi_mprotect_hint(err);
|
||||
}
|
||||
return (err == 0);
|
||||
}
|
||||
@ -908,8 +920,8 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
|
||||
// Initialize the start address after the 32TiB area
|
||||
start = ((uintptr_t)32 << 40); // 32TiB virtual start address
|
||||
#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode
|
||||
uintptr_t r = _mi_random_init((uintptr_t)&mi_os_claim_huge_pages);
|
||||
start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF)); // (randomly 0-1024)*1GiB == 0 to 1TiB
|
||||
uintptr_t r = _mi_heap_random_next(mi_get_default_heap());
|
||||
start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x0FFF)); // (randomly 12bits)*1GiB == between 0 to 4TiB
|
||||
#endif
|
||||
}
|
||||
end = start + size;
|
||||
|
107
src/page.c
107
src/page.c
@ -103,7 +103,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) {
|
||||
bool _mi_page_is_valid(mi_page_t* page) {
|
||||
mi_assert_internal(mi_page_is_valid_init(page));
|
||||
#if MI_SECURE
|
||||
mi_assert_internal(page->cookie != 0);
|
||||
mi_assert_internal(page->key != 0);
|
||||
#endif
|
||||
if (page->heap!=NULL) {
|
||||
mi_segment_t* segment = _mi_page_segment(page);
|
||||
@ -119,26 +119,27 @@ bool _mi_page_is_valid(mi_page_t* page) {
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay ) {
|
||||
void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never) {
|
||||
mi_thread_free_t tfree;
|
||||
mi_thread_free_t tfreex;
|
||||
|
||||
mi_delayed_t old_delay;
|
||||
do {
|
||||
tfreex = tfree = page->thread_free;
|
||||
if (mi_unlikely(mi_tf_delayed(tfree) < MI_DELAYED_FREEING)) {
|
||||
tfreex = mi_tf_set_delayed(tfree,delay);
|
||||
}
|
||||
else if (mi_unlikely(mi_tf_delayed(tfree) == MI_DELAYED_FREEING)) {
|
||||
tfree = mi_atomic_read_relaxed(&page->thread_free);
|
||||
tfreex = mi_tf_set_delayed(tfree, delay);
|
||||
old_delay = mi_tf_delayed(tfree);
|
||||
if (mi_unlikely(old_delay == MI_DELAYED_FREEING)) {
|
||||
mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done.
|
||||
continue; // and try again
|
||||
}
|
||||
}
|
||||
while((mi_tf_delayed(tfreex) != mi_tf_delayed(tfree)) && // avoid atomic operation if already equal
|
||||
!mi_atomic_cas_weak(mi_atomic_cast(uintptr_t,&page->thread_free), tfreex, tfree));
|
||||
else if (delay == old_delay) {
|
||||
break; // avoid atomic operation if already equal
|
||||
}
|
||||
else if (!override_never && old_delay == MI_NEVER_DELAYED_FREE) {
|
||||
break; // leave never set
|
||||
}
|
||||
} while ((old_delay == MI_DELAYED_FREEING) ||
|
||||
!mi_atomic_cas_weak(mi_atomic_cast(uintptr_t, &page->thread_free), tfreex, tfree));
|
||||
}
|
||||
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Page collect the `local_free` and `thread_free` lists
|
||||
----------------------------------------------------------- */
|
||||
@ -231,10 +232,13 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) {
|
||||
|
||||
mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE);
|
||||
mi_assert_internal(!page->is_reset);
|
||||
mi_assert_internal(mi_tf_delayed(page->thread_free) == MI_NEVER_DELAYED_FREE);
|
||||
|
||||
_mi_page_free_collect(page,false);
|
||||
mi_page_queue_t* pq = mi_page_queue(heap, page->block_size);
|
||||
mi_page_queue_push(heap, pq, page);
|
||||
mi_assert_internal(page->heap != NULL);
|
||||
_mi_page_use_delayed_free(page, MI_NO_DELAYED_FREE, true); // override never (after push so heap is set)
|
||||
mi_assert_expensive(_mi_page_is_valid(page));
|
||||
}
|
||||
|
||||
@ -286,7 +290,7 @@ void _mi_heap_delayed_free(mi_heap_t* heap) {
|
||||
|
||||
// and free them all
|
||||
while(block != NULL) {
|
||||
mi_block_t* next = mi_block_nextx(heap,block, heap->cookie);
|
||||
mi_block_t* next = mi_block_nextx(heap,block, heap->key[0], heap->key[1]);
|
||||
// use internal free instead of regular one to keep stats etc correct
|
||||
if (!_mi_free_delayed_block(block)) {
|
||||
// we might already start delayed freeing while another thread has not yet
|
||||
@ -294,9 +298,8 @@ void _mi_heap_delayed_free(mi_heap_t* heap) {
|
||||
mi_block_t* dfree;
|
||||
do {
|
||||
dfree = (mi_block_t*)heap->thread_delayed_free;
|
||||
mi_block_set_nextx(heap, block, dfree, heap->cookie);
|
||||
mi_block_set_nextx(heap, block, dfree, heap->key[0], heap->key[1]);
|
||||
} while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), block, dfree));
|
||||
|
||||
}
|
||||
block = next;
|
||||
}
|
||||
@ -312,7 +315,7 @@ void _mi_page_unfull(mi_page_t* page) {
|
||||
mi_assert_expensive(_mi_page_is_valid(page));
|
||||
mi_assert_internal(mi_page_is_in_full(page));
|
||||
|
||||
_mi_page_use_delayed_free(page, MI_NO_DELAYED_FREE);
|
||||
_mi_page_use_delayed_free(page, MI_NO_DELAYED_FREE, false);
|
||||
if (!mi_page_is_in_full(page)) return;
|
||||
|
||||
mi_heap_t* heap = page->heap;
|
||||
@ -328,7 +331,7 @@ static void mi_page_to_full(mi_page_t* page, mi_page_queue_t* pq) {
|
||||
mi_assert_internal(!mi_page_immediate_available(page));
|
||||
mi_assert_internal(!mi_page_is_in_full(page));
|
||||
|
||||
_mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE);
|
||||
_mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, false);
|
||||
if (mi_page_is_in_full(page)) return;
|
||||
|
||||
mi_page_queue_enqueue_from(&page->heap->pages[MI_BIN_FULL], pq, page);
|
||||
@ -345,7 +348,7 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) {
|
||||
mi_assert_expensive(_mi_page_is_valid(page));
|
||||
mi_assert_internal(pq == mi_page_queue_of(page));
|
||||
mi_assert_internal(page->heap != NULL);
|
||||
|
||||
|
||||
#if MI_DEBUG > 1
|
||||
mi_heap_t* pheap = (mi_heap_t*)mi_atomic_read_ptr(mi_atomic_cast(void*, &page->heap));
|
||||
#endif
|
||||
@ -359,7 +362,7 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) {
|
||||
|
||||
#if MI_DEBUG>1
|
||||
// check there are no references left..
|
||||
for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->cookie)) {
|
||||
for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->key[0], pheap->key[1])) {
|
||||
mi_assert_internal(_mi_ptr_page(block) != page);
|
||||
}
|
||||
#endif
|
||||
@ -394,7 +397,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
|
||||
_mi_stat_decrease(&page->heap->tld->stats.huge, page->block_size);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// remove from the page list
|
||||
// (no need to do _mi_heap_delayed_free first as all blocks are already free)
|
||||
mi_segments_tld_t* segments_tld = &page->heap->tld->segments;
|
||||
@ -422,19 +425,39 @@ void _mi_page_retire(mi_page_t* page) {
|
||||
// (or we end up retiring and re-allocating most of the time)
|
||||
// NOTE: refine this more: we should not retire if this
|
||||
// is the only page left with free blocks. It is not clear
|
||||
// how to check this efficiently though...
|
||||
// how to check this efficiently though...
|
||||
// for now, we don't retire if it is the only page left of this size class.
|
||||
mi_page_queue_t* pq = mi_page_queue_of(page);
|
||||
if (mi_likely(page->block_size <= (MI_SMALL_SIZE_MAX/4))) {
|
||||
// if (mi_page_mostly_used(page->prev) && mi_page_mostly_used(page->next)) {
|
||||
if (pq->last==page && pq->first==page) {
|
||||
if (mi_likely(page->block_size <= MI_SMALL_SIZE_MAX)) {
|
||||
if (pq->last==page && pq->first==page) { // the only page in the queue?
|
||||
mi_stat_counter_increase(_mi_stats_main.page_no_retire,1);
|
||||
return; // dont't retire after all
|
||||
page->retire_expire = 4;
|
||||
mi_assert_internal(mi_page_all_free(page));
|
||||
return; // dont't free after all
|
||||
}
|
||||
}
|
||||
_mi_page_free(page, pq, false);
|
||||
}
|
||||
|
||||
// free retired pages: we don't need to look at the entire queues
|
||||
// since we only retire pages that are the last one in a queue.
|
||||
void _mi_heap_collect_retired(mi_heap_t* heap, bool force) {
|
||||
for(mi_page_queue_t* pq = heap->pages; pq->block_size <= MI_SMALL_SIZE_MAX; pq++) {
|
||||
mi_page_t* page = pq->first;
|
||||
if (page != NULL && page->retire_expire != 0) {
|
||||
if (mi_page_all_free(page)) {
|
||||
page->retire_expire--;
|
||||
if (force || page->retire_expire == 0) {
|
||||
_mi_page_free(pq->first, pq, force);
|
||||
}
|
||||
}
|
||||
else {
|
||||
page->retire_expire = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Initialize the initial free list in a page.
|
||||
@ -475,11 +498,12 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co
|
||||
|
||||
// and initialize the free list by randomly threading through them
|
||||
// set up first element
|
||||
size_t current = _mi_heap_random(heap) % slice_count;
|
||||
const uintptr_t r = _mi_heap_random_next(heap);
|
||||
size_t current = r % slice_count;
|
||||
counts[current]--;
|
||||
mi_block_t* const free_start = blocks[current];
|
||||
// and iterate through the rest
|
||||
uintptr_t rnd = heap->random;
|
||||
// and iterate through the rest; use `random_shuffle` for performance
|
||||
uintptr_t rnd = _mi_random_shuffle(r|1); // ensure not 0
|
||||
for (size_t i = 1; i < extend; i++) {
|
||||
// call random_shuffle only every INTPTR_SIZE rounds
|
||||
const size_t round = i%MI_INTPTR_SIZE;
|
||||
@ -500,7 +524,6 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co
|
||||
// prepend to the free list (usually NULL)
|
||||
mi_block_set_next(page, blocks[current], page->free); // end of the list
|
||||
page->free = free_start;
|
||||
heap->random = _mi_random_shuffle(rnd);
|
||||
}
|
||||
|
||||
static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t extend, mi_stats_t* const stats)
|
||||
@ -514,15 +537,15 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co
|
||||
void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL );
|
||||
const size_t bsize = page->block_size;
|
||||
mi_block_t* const start = mi_page_block_at(page, page_area, page->capacity);
|
||||
|
||||
|
||||
// initialize a sequential free list
|
||||
mi_block_t* const last = mi_page_block_at(page, page_area, page->capacity + extend - 1);
|
||||
mi_block_t* const last = mi_page_block_at(page, page_area, page->capacity + extend - 1);
|
||||
mi_block_t* block = start;
|
||||
while(block <= last) {
|
||||
mi_block_t* next = (mi_block_t*)((uint8_t*)block + bsize);
|
||||
mi_block_set_next(page,block,next);
|
||||
block = next;
|
||||
}
|
||||
}
|
||||
// prepend to free list (usually `NULL`)
|
||||
mi_block_set_next(page, last, page->free);
|
||||
page->free = start;
|
||||
@ -607,7 +630,8 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
|
||||
mi_assert_internal(page_size / block_size < (1L<<16));
|
||||
page->reserved = (uint16_t)(page_size / block_size);
|
||||
#ifdef MI_ENCODE_FREELIST
|
||||
page->cookie = _mi_heap_random(heap) | 1;
|
||||
page->key[0] = _mi_heap_random_next(heap);
|
||||
page->key[1] = _mi_heap_random_next(heap);
|
||||
#endif
|
||||
page->is_zero = page->is_zero_init;
|
||||
|
||||
@ -618,9 +642,10 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
|
||||
mi_assert_internal(page->thread_freed == 0);
|
||||
mi_assert_internal(page->next == NULL);
|
||||
mi_assert_internal(page->prev == NULL);
|
||||
mi_assert_internal(page->retire_expire == 0);
|
||||
mi_assert_internal(!mi_page_has_aligned(page));
|
||||
#if (MI_ENCODE_FREELIST)
|
||||
mi_assert_internal(page->cookie != 0);
|
||||
mi_assert_internal(page->key != 0);
|
||||
#endif
|
||||
mi_assert_expensive(mi_page_is_valid_init(page));
|
||||
|
||||
@ -698,8 +723,12 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
|
||||
}
|
||||
else {
|
||||
mi_assert(pq->first == page);
|
||||
page->retire_expire = 0;
|
||||
}
|
||||
mi_assert_internal(page == NULL || mi_page_immediate_available(page));
|
||||
|
||||
// finally collect retired pages
|
||||
_mi_heap_collect_retired(heap,false);
|
||||
return page;
|
||||
}
|
||||
|
||||
@ -709,7 +738,7 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) {
|
||||
mi_page_queue_t* pq = mi_page_queue(heap,size);
|
||||
mi_page_t* page = pq->first;
|
||||
if (page != NULL) {
|
||||
if ((MI_SECURE >= 3) && page->capacity < page->reserved && ((_mi_heap_random(heap) & 1) == 1)) {
|
||||
if ((MI_SECURE >= 3) && page->capacity < page->reserved && ((_mi_heap_random_next(heap) & 1) == 1)) {
|
||||
// in secure mode, we extend half the time to increase randomness
|
||||
mi_page_extend_free(heap, page, heap->tld);
|
||||
mi_assert_internal(mi_page_immediate_available(page));
|
||||
@ -718,6 +747,7 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) {
|
||||
_mi_page_free_collect(page,false);
|
||||
}
|
||||
if (mi_page_immediate_available(page)) {
|
||||
page->retire_expire = 0;
|
||||
return page; // fast path
|
||||
}
|
||||
}
|
||||
@ -757,7 +787,6 @@ void mi_register_deferred_free(mi_deferred_free_fun* fn) mi_attr_noexcept {
|
||||
// Because huge pages contain just one block, and the segment contains
|
||||
// just that page, we always treat them as abandoned and any thread
|
||||
// that frees the block can free the whole page and segment directly.
|
||||
|
||||
static mi_page_t* mi_large_huge_page_alloc(mi_heap_t* heap, size_t size) {
|
||||
size_t block_size = _mi_wsize_from_size(size) * sizeof(uintptr_t);
|
||||
mi_assert_internal(_mi_bin(block_size) == MI_BIN_HUGE);
|
||||
@ -786,7 +815,7 @@ static mi_page_t* mi_large_huge_page_alloc(mi_heap_t* heap, size_t size) {
|
||||
_mi_stat_increase(&heap->tld->stats.huge, block_size);
|
||||
_mi_stat_counter_increase(&heap->tld->stats.huge_count, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
return page;
|
||||
}
|
||||
|
||||
|
328
src/random.c
Normal file
328
src/random.c
Normal file
@ -0,0 +1,328 @@
|
||||
/* ----------------------------------------------------------------------------
|
||||
Copyright (c) 2019, Microsoft Research, Daan Leijen
|
||||
This is free software; you can redistribute it and/or modify it under the
|
||||
terms of the MIT license. A copy of the license can be found in the file
|
||||
"LICENSE" at the root of this distribution.
|
||||
-----------------------------------------------------------------------------*/
|
||||
#include "mimalloc.h"
|
||||
#include "mimalloc-internal.h"
|
||||
|
||||
#include <string.h> // memset
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
We use our own PRNG to keep predictable performance of random number generation
|
||||
and to avoid implementations that use a lock. We only use the OS provided
|
||||
random source to initialize the initial seeds. Since we do not need ultimate
|
||||
performance but we do rely on the security (for secret cookies in secure mode)
|
||||
we use a cryptographically secure generator (chacha20).
|
||||
-----------------------------------------------------------------------------*/
|
||||
|
||||
#define MI_CHACHA_ROUNDS (20) // perhaps use 12 for better performance?
|
||||
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
Chacha20 implementation as the original algorithm with a 64-bit nonce
|
||||
and counter: https://en.wikipedia.org/wiki/Salsa20
|
||||
The input matrix has sixteen 32-bit values:
|
||||
Position 0 to 3: constant key
|
||||
Position 4 to 11: the key
|
||||
Position 12 to 13: the counter.
|
||||
Position 14 to 15: the nonce.
|
||||
|
||||
The implementation uses regular C code which compiles very well on modern compilers.
|
||||
(gcc x64 has no register spills, and clang 6+ uses SSE instructions)
|
||||
-----------------------------------------------------------------------------*/
|
||||
|
||||
static inline uint32_t rotl(uint32_t x, uint32_t shift) {
|
||||
return (x << shift) | (x >> (32 - shift));
|
||||
}
|
||||
|
||||
static inline void qround(uint32_t x[16], size_t a, size_t b, size_t c, size_t d) {
|
||||
x[a] += x[b]; x[d] = rotl(x[d] ^ x[a], 16);
|
||||
x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 12);
|
||||
x[a] += x[b]; x[d] = rotl(x[d] ^ x[a], 8);
|
||||
x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 7);
|
||||
}
|
||||
|
||||
static void chacha_block(mi_random_ctx_t* ctx)
|
||||
{
|
||||
// scramble into `x`
|
||||
uint32_t x[16];
|
||||
for (size_t i = 0; i < 16; i++) {
|
||||
x[i] = ctx->input[i];
|
||||
}
|
||||
for (size_t i = 0; i < MI_CHACHA_ROUNDS; i += 2) {
|
||||
qround(x, 0, 4, 8, 12);
|
||||
qround(x, 1, 5, 9, 13);
|
||||
qround(x, 2, 6, 10, 14);
|
||||
qround(x, 3, 7, 11, 15);
|
||||
qround(x, 0, 5, 10, 15);
|
||||
qround(x, 1, 6, 11, 12);
|
||||
qround(x, 2, 7, 8, 13);
|
||||
qround(x, 3, 4, 9, 14);
|
||||
}
|
||||
|
||||
// add scrambled data to the initial state
|
||||
for (size_t i = 0; i < 16; i++) {
|
||||
ctx->output[i] = x[i] + ctx->input[i];
|
||||
}
|
||||
ctx->output_available = 16;
|
||||
|
||||
// increment the counter for the next round
|
||||
ctx->input[12] += 1;
|
||||
if (ctx->input[12] == 0) {
|
||||
ctx->input[13] += 1;
|
||||
if (ctx->input[13] == 0) { // and keep increasing into the nonce
|
||||
ctx->input[14] += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t chacha_next32(mi_random_ctx_t* ctx) {
|
||||
if (ctx->output_available <= 0) {
|
||||
chacha_block(ctx);
|
||||
ctx->output_available = 16; // (assign again to suppress static analysis warning)
|
||||
}
|
||||
const uint32_t x = ctx->output[16 - ctx->output_available];
|
||||
ctx->output[16 - ctx->output_available] = 0; // reset once the data is handed out
|
||||
ctx->output_available--;
|
||||
return x;
|
||||
}
|
||||
|
||||
static inline uint32_t read32(const uint8_t* p, size_t idx32) {
|
||||
const size_t i = 4*idx32;
|
||||
return ((uint32_t)p[i+0] | (uint32_t)p[i+1] << 8 | (uint32_t)p[i+2] << 16 | (uint32_t)p[i+3] << 24);
|
||||
}
|
||||
|
||||
static void chacha_init(mi_random_ctx_t* ctx, const uint8_t key[32], uint64_t nonce)
|
||||
{
|
||||
// since we only use chacha for randomness (and not encryption) we
|
||||
// do not _need_ to read 32-bit values as little endian but we do anyways
|
||||
// just for being compatible :-)
|
||||
memset(ctx, 0, sizeof(*ctx));
|
||||
for (size_t i = 0; i < 4; i++) {
|
||||
const uint8_t* sigma = (uint8_t*)"expand 32-byte k";
|
||||
ctx->input[i] = read32(sigma,i);
|
||||
}
|
||||
for (size_t i = 0; i < 8; i++) {
|
||||
ctx->input[i + 4] = read32(key,i);
|
||||
}
|
||||
ctx->input[12] = 0;
|
||||
ctx->input[13] = 0;
|
||||
ctx->input[14] = (uint32_t)nonce;
|
||||
ctx->input[15] = (uint32_t)(nonce >> 32);
|
||||
}
|
||||
|
||||
static void chacha_split(mi_random_ctx_t* ctx, uint64_t nonce, mi_random_ctx_t* ctx_new) {
|
||||
memset(ctx_new, 0, sizeof(*ctx_new));
|
||||
memcpy(ctx_new->input, ctx->input, sizeof(ctx_new->input));
|
||||
ctx_new->input[12] = 0;
|
||||
ctx_new->input[13] = 0;
|
||||
ctx_new->input[14] = (uint32_t)nonce;
|
||||
ctx_new->input[15] = (uint32_t)(nonce >> 32);
|
||||
mi_assert_internal(ctx->input[14] != ctx_new->input[14] || ctx->input[15] != ctx_new->input[15]); // do not reuse nonces!
|
||||
chacha_block(ctx_new);
|
||||
}
|
||||
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
Random interface
|
||||
-----------------------------------------------------------------------------*/
|
||||
|
||||
#if MI_DEBUG>1
|
||||
static bool mi_random_is_initialized(mi_random_ctx_t* ctx) {
|
||||
return (ctx != NULL && ctx->input[0] != 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* ctx_new) {
|
||||
mi_assert_internal(mi_random_is_initialized(ctx));
|
||||
mi_assert_internal(ctx != ctx_new);
|
||||
chacha_split(ctx, (uintptr_t)ctx_new /*nonce*/, ctx_new);
|
||||
}
|
||||
|
||||
uintptr_t _mi_random_next(mi_random_ctx_t* ctx) {
|
||||
mi_assert_internal(mi_random_is_initialized(ctx));
|
||||
#if MI_INTPTR_SIZE <= 4
|
||||
return chacha_next32(ctx);
|
||||
#elif MI_INTPTR_SIZE == 8
|
||||
return (((uintptr_t)chacha_next32(ctx) << 32) | chacha_next32(ctx));
|
||||
#else
|
||||
# error "define mi_random_next for this platform"
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/* ----------------------------------------------------------------------------
|
||||
To initialize a fresh random context we rely on the OS:
|
||||
- Windows : BCryptGenRandom
|
||||
- osX,bsd,wasi: arc4random_buf
|
||||
- Linux : getrandom,/dev/urandom
|
||||
If we cannot get good randomness, we fall back to weak randomness based on a timer and ASLR.
|
||||
-----------------------------------------------------------------------------*/
|
||||
|
||||
#if defined(_WIN32)
|
||||
#pragma comment (lib,"bcrypt.lib")
|
||||
#include <bcrypt.h>
|
||||
static bool os_random_buf(void* buf, size_t buf_len) {
|
||||
return (BCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0);
|
||||
}
|
||||
/*
|
||||
#define SystemFunction036 NTAPI SystemFunction036
|
||||
#include <NTSecAPI.h>
|
||||
#undef SystemFunction036
|
||||
static bool os_random_buf(void* buf, size_t buf_len) {
|
||||
RtlGenRandom(buf, (ULONG)buf_len);
|
||||
return true;
|
||||
}
|
||||
*/
|
||||
#elif defined(ANDROID) || defined(XP_DARWIN) || defined(__DragonFly__) || \
|
||||
defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
|
||||
defined(__wasi__)
|
||||
#include <stdlib.h>
|
||||
static bool os_random_buf(void* buf, size_t buf_len) {
|
||||
arc4random_buf(buf, buf_len);
|
||||
return true;
|
||||
}
|
||||
#elif defined(__linux__)
|
||||
#include <sys/syscall.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
static bool os_random_buf(void* buf, size_t buf_len) {
|
||||
// Modern Linux provides `getrandom` but different distributions either use `sys/random.h` or `linux/random.h`
|
||||
// and for the latter the actual `getrandom` call is not always defined.
|
||||
// (see <https://stackoverflow.com/questions/45237324/why-doesnt-getrandom-compile>)
|
||||
// We therefore use a syscall directly and fall back dynamically to /dev/urandom when needed.
|
||||
#ifdef SYS_getrandom
|
||||
#ifndef GRND_NONBLOCK
|
||||
#define GRND_NONBLOCK (1)
|
||||
#endif
|
||||
static volatile _Atomic(uintptr_t) no_getrandom; // = 0
|
||||
if (mi_atomic_read(&no_getrandom)==0) {
|
||||
ssize_t ret = syscall(SYS_getrandom, buf, buf_len, GRND_NONBLOCK);
|
||||
if (ret >= 0) return (buf_len == (size_t)ret);
|
||||
if (ret != ENOSYS) return false;
|
||||
mi_atomic_write(&no_getrandom,1); // don't call again, and fall back to /dev/urandom
|
||||
}
|
||||
#endif
|
||||
int flags = O_RDONLY;
|
||||
#if defined(O_CLOEXEC)
|
||||
flags |= O_CLOEXEC;
|
||||
#endif
|
||||
int fd = open("/dev/urandom", flags, 0);
|
||||
if (fd < 0) return false;
|
||||
size_t count = 0;
|
||||
while(count < buf_len) {
|
||||
ssize_t ret = read(fd, (char*)buf + count, buf_len - count);
|
||||
if (ret<=0) {
|
||||
if (errno!=EAGAIN && errno!=EINTR) break;
|
||||
}
|
||||
else {
|
||||
count += ret;
|
||||
}
|
||||
}
|
||||
close(fd);
|
||||
return (count==buf_len);
|
||||
}
|
||||
#else
|
||||
static bool os_random_buf(void* buf, size_t buf_len) {
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(_WIN32)
|
||||
#include <windows.h>
|
||||
#elif defined(__APPLE__)
|
||||
#include <mach/mach_time.h>
|
||||
#else
|
||||
#include <time.h>
|
||||
#endif
|
||||
|
||||
static uintptr_t os_random_weak(uintptr_t extra_seed) {
|
||||
uintptr_t x = (uintptr_t)&os_random_weak ^ extra_seed; // ASLR makes the address random
|
||||
#if defined(_WIN32)
|
||||
LARGE_INTEGER pcount;
|
||||
QueryPerformanceCounter(&pcount);
|
||||
x ^= (uintptr_t)(pcount.QuadPart);
|
||||
#elif defined(__APPLE__)
|
||||
x ^= (uintptr_t)mach_absolute_time();
|
||||
#else
|
||||
struct timespec time;
|
||||
clock_gettime(CLOCK_MONOTONIC, &time);
|
||||
x ^= (uintptr_t)time.tv_sec;
|
||||
x ^= (uintptr_t)time.tv_nsec;
|
||||
#endif
|
||||
// and do a few randomization steps
|
||||
uintptr_t max = ((x ^ (x >> 17)) & 0x0F) + 1;
|
||||
for (uintptr_t i = 0; i < max; i++) {
|
||||
x = _mi_random_shuffle(x);
|
||||
}
|
||||
mi_assert_internal(x != 0);
|
||||
return x;
|
||||
}
|
||||
|
||||
void _mi_random_init(mi_random_ctx_t* ctx) {
|
||||
uint8_t key[32];
|
||||
if (!os_random_buf(key, sizeof(key))) {
|
||||
// if we fail to get random data from the OS, we fall back to a
|
||||
// weak random source based on the current time
|
||||
_mi_warning_message("unable to use secure randomness\n");
|
||||
uintptr_t x = os_random_weak(0);
|
||||
for (size_t i = 0; i < 8; i++) { // key is eight 32-bit words.
|
||||
x = _mi_random_shuffle(x);
|
||||
((uint32_t*)key)[i] = (uint32_t)x;
|
||||
}
|
||||
}
|
||||
chacha_init(ctx, key, (uintptr_t)ctx /*nonce*/ );
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------
|
||||
test vectors from <https://tools.ietf.org/html/rfc8439>
|
||||
----------------------------------------------------------- */
|
||||
/*
|
||||
static bool array_equals(uint32_t* x, uint32_t* y, size_t n) {
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
if (x[i] != y[i]) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
static void chacha_test(void)
|
||||
{
|
||||
uint32_t x[4] = { 0x11111111, 0x01020304, 0x9b8d6f43, 0x01234567 };
|
||||
uint32_t x_out[4] = { 0xea2a92f4, 0xcb1cf8ce, 0x4581472e, 0x5881c4bb };
|
||||
qround(x, 0, 1, 2, 3);
|
||||
mi_assert_internal(array_equals(x, x_out, 4));
|
||||
|
||||
uint32_t y[16] = {
|
||||
0x879531e0, 0xc5ecf37d, 0x516461b1, 0xc9a62f8a,
|
||||
0x44c20ef3, 0x3390af7f, 0xd9fc690b, 0x2a5f714c,
|
||||
0x53372767, 0xb00a5631, 0x974c541a, 0x359e9963,
|
||||
0x5c971061, 0x3d631689, 0x2098d9d6, 0x91dbd320 };
|
||||
uint32_t y_out[16] = {
|
||||
0x879531e0, 0xc5ecf37d, 0xbdb886dc, 0xc9a62f8a,
|
||||
0x44c20ef3, 0x3390af7f, 0xd9fc690b, 0xcfacafd2,
|
||||
0xe46bea80, 0xb00a5631, 0x974c541a, 0x359e9963,
|
||||
0x5c971061, 0xccc07c79, 0x2098d9d6, 0x91dbd320 };
|
||||
qround(y, 2, 7, 8, 13);
|
||||
mi_assert_internal(array_equals(y, y_out, 16));
|
||||
|
||||
mi_random_ctx_t r = {
|
||||
{ 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574,
|
||||
0x03020100, 0x07060504, 0x0b0a0908, 0x0f0e0d0c,
|
||||
0x13121110, 0x17161514, 0x1b1a1918, 0x1f1e1d1c,
|
||||
0x00000001, 0x09000000, 0x4a000000, 0x00000000 },
|
||||
{0},
|
||||
0
|
||||
};
|
||||
uint32_t r_out[16] = {
|
||||
0xe4e7f110, 0x15593bd1, 0x1fdd0f50, 0xc47120a3,
|
||||
0xc7f4d1c7, 0x0368c033, 0x9aaa2204, 0x4e6cd4c3,
|
||||
0x466482d2, 0x09aa9f07, 0x05d7c214, 0xa2028bd9,
|
||||
0xd19c12b5, 0xb94e16de, 0xe883d0cb, 0x4e3c50a2 };
|
||||
chacha_block(&r);
|
||||
mi_assert_internal(array_equals(r.output, r_out, 16));
|
||||
}
|
||||
*/
|
@ -310,13 +310,13 @@ static mi_segment_t* mi_segment_cache_pop(size_t segment_slices, mi_segments_tld
|
||||
return segment;
|
||||
}
|
||||
|
||||
static bool mi_segment_cache_full(mi_segments_tld_t* tld)
|
||||
static bool mi_segment_cache_full(mi_segments_tld_t* tld)
|
||||
{
|
||||
// if (tld->count == 1 && tld->cache_count==0) return false; // always cache at least the final segment of a thread
|
||||
size_t max_cache = mi_option_get(mi_option_segment_cache);
|
||||
if (tld->cache_count < max_cache
|
||||
&& tld->cache_count < (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION)) // at least allow a 1 element cache
|
||||
) {
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
// take the opportunity to reduce the segment cache if it is too large (now)
|
||||
@ -660,6 +660,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m
|
||||
size_t memid = 0;
|
||||
// segment = (mi_segment_t*)_mi_os_alloc_aligned(segment_size, MI_SEGMENT_SIZE, commit, &mem_large, os_tld);
|
||||
segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_zero, &memid, os_tld);
|
||||
|
||||
if (segment == NULL) return NULL; // failed to allocate
|
||||
mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0);
|
||||
if (!commit) {
|
||||
@ -669,6 +670,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m
|
||||
}
|
||||
segment->memid = memid;
|
||||
segment->mem_is_fixed = mem_large;
|
||||
|
||||
segment->mem_is_committed = mi_option_is_enabled(mi_option_eager_commit); // commit;
|
||||
mi_segments_track_size((long)(segment_size), tld);
|
||||
mi_segment_map_allocated_at(segment);
|
||||
@ -813,7 +815,6 @@ static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld
|
||||
_mi_os_reset(start, psize, tld->stats);
|
||||
}
|
||||
|
||||
|
||||
// zero the page data, but not the segment fields
|
||||
page->is_zero_init = false;
|
||||
ptrdiff_t ofs = offsetof(mi_page_t, capacity);
|
||||
@ -854,7 +855,28 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld)
|
||||
// are "abandoned" and will be reclaimed by other threads to
|
||||
// reuse their pages and/or free them eventually
|
||||
static volatile _Atomic(mi_segment_t*) abandoned; // = NULL;
|
||||
static volatile _Atomic(uintptr_t) abandoned_count; // = 0;
|
||||
static volatile _Atomic(uintptr_t) abandoned_count; // = 0; approximate count of abandoned segments
|
||||
|
||||
// prepend a list of abandoned segments atomically to the global abandoned list; O(n)
|
||||
static void mi_segments_prepend_abandoned(mi_segment_t* first) {
|
||||
if (first == NULL) return;
|
||||
|
||||
// first try if the abandoned list happens to be NULL
|
||||
if (mi_atomic_cas_ptr_weak(mi_atomic_cast(void*, &abandoned), first, NULL)) return;
|
||||
|
||||
// if not, find the end of the list
|
||||
mi_segment_t* last = first;
|
||||
while (last->abandoned_next != NULL) {
|
||||
last = last->abandoned_next;
|
||||
}
|
||||
|
||||
// and atomically prepend
|
||||
mi_segment_t* next;
|
||||
do {
|
||||
next = (mi_segment_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &abandoned));
|
||||
last->abandoned_next = next;
|
||||
} while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*, &abandoned), first, next));
|
||||
}
|
||||
|
||||
static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
|
||||
mi_assert_internal(segment->used == segment->abandoned);
|
||||
@ -878,16 +900,13 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
|
||||
// force delayed decommits instead?
|
||||
mi_segment_delayed_decommit(segment, false, tld->stats);
|
||||
|
||||
// add it to the abandoned list
|
||||
// all pages in the segment are abandoned; add it to the abandoned list
|
||||
_mi_stat_increase(&tld->stats->segments_abandoned, 1);
|
||||
mi_segments_track_size(-((long)mi_segment_size(segment)), tld);
|
||||
segment->thread_id = 0;
|
||||
mi_segment_t* next;
|
||||
do {
|
||||
next = (mi_segment_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&abandoned));
|
||||
mi_atomic_write_ptr(mi_atomic_cast(void*,&segment->abandoned_next), next);
|
||||
} while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&abandoned), segment, next));
|
||||
mi_atomic_increment(&abandoned_count);
|
||||
segment->abandoned_next = NULL;
|
||||
mi_segments_prepend_abandoned(segment); // prepend one-element list
|
||||
mi_atomic_increment(&abandoned_count); // keep approximate count
|
||||
}
|
||||
|
||||
void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) {
|
||||
@ -904,24 +923,35 @@ void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) {
|
||||
}
|
||||
|
||||
bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld) {
|
||||
uintptr_t reclaimed = 0;
|
||||
uintptr_t atmost;
|
||||
if (try_all) {
|
||||
atmost = abandoned_count+16; // close enough
|
||||
}
|
||||
else {
|
||||
atmost = abandoned_count/8; // at most 1/8th of all outstanding (estimated)
|
||||
if (atmost < 2) atmost = 2; // but at least 2
|
||||
// To avoid the A-B-A problem, grab the entire list atomically
|
||||
mi_segment_t* segment = (mi_segment_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &abandoned)); // pre-read to avoid expensive atomic operations
|
||||
if (segment == NULL) return false;
|
||||
segment = (mi_segment_t*)mi_atomic_exchange_ptr(mi_atomic_cast(void*, &abandoned), NULL);
|
||||
if (segment == NULL) return false;
|
||||
|
||||
// we got a non-empty list
|
||||
if (!try_all) {
|
||||
// take at most 1/8th of the list and append the rest back to the abandoned list again
|
||||
// this is O(n) but simplifies the code a lot (as we don't have an A-B-A problem)
|
||||
// and probably ok since the length will tend to be not too large.
|
||||
uintptr_t atmost = mi_atomic_read(&abandoned_count)/8; // at most 1/8th of all outstanding (estimated)
|
||||
if (atmost < 8) atmost = 8; // but at least 8
|
||||
|
||||
// find the split point
|
||||
mi_segment_t* last = segment;
|
||||
while (last->abandoned_next != NULL && atmost > 0) {
|
||||
last = last->abandoned_next;
|
||||
atmost--;
|
||||
}
|
||||
// split the list and push back the remaining segments
|
||||
mi_segment_t* next = last->abandoned_next;
|
||||
last->abandoned_next = NULL;
|
||||
mi_segments_prepend_abandoned(next);
|
||||
}
|
||||
|
||||
// for `atmost` `reclaimed` abandoned segments...
|
||||
while(atmost > reclaimed) {
|
||||
// try to claim the head of the abandoned segments
|
||||
mi_segment_t* segment;
|
||||
do {
|
||||
segment = (mi_segment_t*)abandoned;
|
||||
} while(segment != NULL && !mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&abandoned), (mi_segment_t*)segment->abandoned_next, segment));
|
||||
if (segment==NULL) break; // stop early if no more segments available
|
||||
// reclaim all segments that we kept
|
||||
while(segment != NULL) {
|
||||
mi_segment_t* const next = segment->abandoned_next; // save the next segment
|
||||
|
||||
// got it.
|
||||
mi_atomic_decrement(&abandoned_count);
|
||||
@ -962,7 +992,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
|
||||
slice = mi_segment_page_clear(page, tld); // set slice again due to coalesceing
|
||||
}
|
||||
else {
|
||||
// otherwise reclaim it
|
||||
// otherwise reclaim it
|
||||
_mi_page_reclaim(heap,page);
|
||||
}
|
||||
}
|
||||
@ -974,11 +1004,12 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
|
||||
if (segment->used == 0) { // due to page_clear
|
||||
mi_segment_free(segment,false,tld);
|
||||
}
|
||||
else {
|
||||
reclaimed++;
|
||||
}
|
||||
|
||||
// go on
|
||||
segment = next;
|
||||
}
|
||||
return (reclaimed>0);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
@ -14,8 +14,14 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
// it will override all the standard library allocation
|
||||
// functions (on Unix's).
|
||||
#include "stats.c"
|
||||
#include "random.c"
|
||||
#include "os.c"
|
||||
<<<<<<< HEAD
|
||||
//#include "memory.c"
|
||||
=======
|
||||
#include "arena.c"
|
||||
#include "memory.c"
|
||||
>>>>>>> dev
|
||||
#include "segment.c"
|
||||
#include "page.c"
|
||||
#include "heap.c"
|
||||
|
@ -26,8 +26,8 @@ terms of the MIT license.
|
||||
//
|
||||
// argument defaults
|
||||
static int THREADS = 32; // more repeatable if THREADS <= #processors
|
||||
static int SCALE = 50; // scaling factor
|
||||
static int ITER = 10; // N full iterations destructing and re-creating all threads
|
||||
static int SCALE = 10; // scaling factor
|
||||
static int ITER = 50; // N full iterations destructing and re-creating all threads
|
||||
|
||||
// static int THREADS = 8; // more repeatable if THREADS <= #processors
|
||||
// static int SCALE = 100; // scaling factor
|
||||
@ -135,9 +135,9 @@ static void stress(intptr_t tid) {
|
||||
allocs--;
|
||||
if (data_top >= data_size) {
|
||||
data_size += 100000;
|
||||
data = (void**)custom_realloc(data, data_size * sizeof(void*));
|
||||
data = (void**)custom_realloc(data, data_size * sizeof(void*));
|
||||
}
|
||||
data[data_top++] = alloc_items( 1ULL << (pick(&r) % max_item_shift), &r);
|
||||
data[data_top++] = alloc_items(1ULL << (pick(&r) % max_item_shift), &r);
|
||||
}
|
||||
else {
|
||||
// 25% retain
|
||||
@ -209,7 +209,7 @@ int main(int argc, char** argv) {
|
||||
}
|
||||
mi_collect(false);
|
||||
#ifndef NDEBUG
|
||||
if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - n + 1); }
|
||||
if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
|
||||
#endif
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user