merge from dev

This commit is contained in:
daan 2020-01-13 18:01:34 -08:00
commit 4a27ea1643
24 changed files with 773 additions and 304 deletions

View File

@ -18,6 +18,7 @@ include("cmake/mimalloc-config-version.cmake")
set(mi_sources
src/stats.c
src/random.c
src/os.c
src/arena.c
src/segment.c
@ -114,7 +115,7 @@ endif()
# extra needed libraries
if(WIN32)
list(APPEND mi_libraries psapi shell32 user32)
list(APPEND mi_libraries psapi shell32 user32 bcrypt)
else()
list(APPEND mi_libraries pthread)
find_library(LIBRT rt)

View File

@ -13,16 +13,31 @@ jobs:
pool:
vmImage:
windows-2019
strategy:
matrix:
Debug:
BuildType: debug
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON
Release:
BuildType: release
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release
Secure:
BuildType: secure
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON
steps:
- task: CMake@1
inputs:
workingDirectory: 'build'
cmakeArgs: ..
workingDirectory: $(BuildType)
cmakeArgs: .. $(cmakeExtraArgs)
- task: MSBuild@1
inputs:
solution: build/libmimalloc.sln
- upload: $(Build.SourcesDirectory)/build
artifact: windows
solution: $(BuildType)/libmimalloc.sln
- script: |
cd $(BuildType)
ctest
displayName: CTest
- upload: $(Build.SourcesDirectory)/$(BuildType)
artifact: mimalloc-windows-$(BuildType)
- job:
displayName: Linux
@ -61,32 +76,42 @@ jobs:
CXX: clang++
BuildType: secure-clang
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON
steps:
- task: CMake@1
inputs:
workingDirectory: $(BuildType)
cmakeArgs: .. $(cmakeExtraArgs)
- script: make -j$(nproc) -C $(BuildType)
displayName: Make
- script: make test -C $(BuildType)
displayName: Ctest
displayName: CTest
- upload: $(Build.SourcesDirectory)/$(BuildType)
artifact: ubuntu-$(BuildType)
artifact: mimalloc-ubuntu-$(BuildType)
- job:
displayName: macOS
pool:
vmImage:
macOS-10.14
strategy:
matrix:
Debug:
BuildType: debug
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON
Release:
BuildType: release
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release
Secure:
BuildType: secure
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON
steps:
- task: CMake@1
inputs:
workingDirectory: 'build'
cmakeArgs: ..
- script: make -j$(sysctl -n hw.ncpu) -C build
- upload: $(Build.SourcesDirectory)/build
artifact: macos
workingDirectory: $(BuildType)
cmakeArgs: .. $(cmakeExtraArgs)
- script: make -j$(sysctl -n hw.ncpu) -C $(BuildType)
displayName: Make
- script: make test -C $(BuildType)
displayName: CTest
- upload: $(Build.SourcesDirectory)/$(BuildType)
artifact: mimalloc-macos-$(BuildType)

View File

@ -129,7 +129,7 @@
<CompileAs>Default</CompileAs>
</ClCompile>
<Link>
<AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect.lib;bcrypt.lib;%(AdditionalDependencies)</AdditionalDependencies>
<IgnoreSpecificDefaultLibraries>
</IgnoreSpecificDefaultLibraries>
<ModuleDefinitionFile>
@ -195,7 +195,7 @@
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect.lib;bcrypt.lib;%(AdditionalDependencies)</AdditionalDependencies>
<ModuleDefinitionFile>
</ModuleDefinitionFile>
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
@ -243,6 +243,7 @@
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="..\..\src\page.c" />
<ClCompile Include="..\..\src\random.c" />
<ClCompile Include="..\..\src\segment.c" />
<ClCompile Include="..\..\src\stats.c" />
</ItemGroup>

View File

@ -70,5 +70,8 @@
<ClCompile Include="..\..\src\arena.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\random.c">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
</Project>

View File

@ -228,6 +228,7 @@
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="..\..\src\page.c" />
<ClCompile Include="..\..\src\random.c" />
<ClCompile Include="..\..\src\segment.c" />
<ClCompile Include="..\..\src\os.c" />
<ClCompile Include="..\..\src\stats.c" />

View File

@ -56,6 +56,9 @@
<ClCompile Include="..\..\src\arena.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\random.c">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">

View File

@ -246,6 +246,7 @@
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="..\..\src\page.c" />
<ClCompile Include="..\..\src\random.c" />
<ClCompile Include="..\..\src\segment.c" />
<ClCompile Include="..\..\src\stats.c" />
</ItemGroup>

View File

@ -43,6 +43,9 @@
<ClCompile Include="..\..\src\bitmap.inc.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\random.c">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">

View File

@ -231,6 +231,7 @@
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="..\..\src\page.c" />
<ClCompile Include="..\..\src\random.c" />
<ClCompile Include="..\..\src\segment.c" />
<ClCompile Include="..\..\src\os.c" />
<ClCompile Include="..\..\src\stats.c" />

View File

@ -46,6 +46,9 @@
<ClCompile Include="..\..\src\bitmap.inc.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\random.c">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">

View File

@ -10,7 +10,7 @@ terms of the MIT license. A copy of the license can be found in the file
#include "mimalloc-types.h"
#if defined(MI_MALLOC_OVERRIDE) && (defined(__APPLE__) || defined(__OpenBSD__))
#if defined(MI_MALLOC_OVERRIDE) && (defined(__APPLE__) || defined(__OpenBSD__) || defined(__DragonFly__))
#define MI_TLS_RECURSE_GUARD
#endif
@ -42,12 +42,17 @@ void _mi_trace_message(const char* fmt, ...);
void _mi_options_init(void);
void _mi_fatal_error(const char* fmt, ...) mi_attr_noreturn;
// "init.c"
// random.c
void _mi_random_init(mi_random_ctx_t* ctx);
void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx);
uintptr_t _mi_random_next(mi_random_ctx_t* ctx);
uintptr_t _mi_heap_random_next(mi_heap_t* heap);
static inline uintptr_t _mi_random_shuffle(uintptr_t x);
// init.c
extern mi_stats_t _mi_stats_main;
extern const mi_page_t _mi_page_empty;
bool _mi_is_main_thread(void);
uintptr_t _mi_random_shuffle(uintptr_t x);
uintptr_t _mi_random_init(uintptr_t seed /* can be zero */);
bool _mi_preloading(); // true while the C runtime is not ready
// os.c
@ -86,8 +91,9 @@ void _mi_page_unfull(mi_page_t* page);
void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force); // free the page
void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq); // abandon the page, to be picked up by another thread...
void _mi_heap_delayed_free(mi_heap_t* heap);
void _mi_heap_collect_retired(mi_heap_t* heap, bool force);
void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay);
void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never);
size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append);
void _mi_deferred_free(mi_heap_t* heap, bool force);
@ -101,7 +107,6 @@ uint8_t _mi_bsr(uintptr_t x); // bit-scan-right, used on BSD i
// "heap.c"
void _mi_heap_destroy_pages(mi_heap_t* heap);
void _mi_heap_collect_abandon(mi_heap_t* heap);
uintptr_t _mi_heap_random(mi_heap_t* heap);
void _mi_heap_set_default_direct(mi_heap_t* heap);
// "stats.c"
@ -236,7 +241,7 @@ extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate
static inline mi_heap_t* mi_get_default_heap(void) {
#ifdef MI_TLS_RECURSE_GUARD
// on some platforms, like macOS, the dynamic loader calls `malloc`
// on some BSD platforms, like macOS, the dynamic loader calls `malloc`
// to initialize thread local data. To avoid recursion, we need to avoid
// accessing the thread local `_mi_default_heap` until our module is loaded
// and use the statically allocated main heap until that time.
@ -406,12 +411,30 @@ static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) {
}
// -------------------------------------------------------------------
// Encoding/Decoding the free list next pointers
// Note: we pass a `null` value to be used as the `NULL` value for the
// end of a free list. This is to prevent the cookie itself to ever
// be present among user blocks (as `cookie^0==cookie`).
// -------------------------------------------------------------------
/* -------------------------------------------------------------------
Encoding/Decoding the free list next pointers
This is to protect against buffer overflow exploits where the
free list is mutated. Many hardened allocators xor the next pointer `p`
with a secret key `k1`, as `p^k1`. This prevents overwriting with known
values but might be still too weak: if the attacker can guess
the pointer `p` this can reveal `k1` (since `p^k1^p == k1`).
Moreover, if multiple blocks can be read as well, the attacker can
xor both as `(p1^k1) ^ (p2^k1) == p1^p2` which may reveal a lot
about the pointers (and subsequently `k1`).
Instead mimalloc uses an extra key `k2` and encodes as `((p^k2)<<<k1)+k1`.
Since these operations are not associative, the above approaches do not
work so well any more even if the `p` can be guesstimated. For example,
for the read case we can subtract two entries to discard the `+k1` term,
but that leads to `((p1^k2)<<<k1) - ((p2^k2)<<<k1)` at best.
We include the left-rotation since xor and addition are otherwise linear
in the lowest bit. Finally, both keys are unique per page which reduces
the re-use of keys by a large factor.
We also pass a separate `null` value to be used as `NULL` or otherwise
`(k2<<<k1)+k1` would appear (too) often as a sentinel value.
------------------------------------------------------------------- */
static inline bool mi_is_in_same_segment(const void* p, const void* q) {
return (_mi_ptr_segment(p) == _mi_ptr_segment(q));
@ -423,52 +446,84 @@ static inline bool mi_is_in_same_page(const void* p, const void* q) {
return (_mi_segment_page_of(segment, p) == _mi_segment_page_of(segment, q));
}
static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, uintptr_t cookie ) {
static inline uintptr_t mi_rotl(uintptr_t x, uintptr_t shift) {
shift %= MI_INTPTR_BITS;
return ((x << shift) | (x >> (MI_INTPTR_BITS - shift)));
}
static inline uintptr_t mi_rotr(uintptr_t x, uintptr_t shift) {
shift %= MI_INTPTR_BITS;
return ((x >> shift) | (x << (MI_INTPTR_BITS - shift)));
}
static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, uintptr_t key1, uintptr_t key2 ) {
#ifdef MI_ENCODE_FREELIST
mi_block_t* b = (mi_block_t*)(block->next ^ cookie);
mi_block_t* b = (mi_block_t*)(mi_rotr(block->next - key1, key1) ^ key2);
if (mi_unlikely((void*)b==null)) { b = NULL; }
return b;
#else
UNUSED(cookie); UNUSED(null);
UNUSED(key1); UNUSED(key2); UNUSED(null);
return (mi_block_t*)block->next;
#endif
}
static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, uintptr_t cookie) {
static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, uintptr_t key1, uintptr_t key2) {
#ifdef MI_ENCODE_FREELIST
if (mi_unlikely(next==NULL)) { next = (mi_block_t*)null; }
block->next = (mi_encoded_t)next ^ cookie;
block->next = mi_rotl((uintptr_t)next ^ key2, key1) + key1;
#else
UNUSED(cookie); UNUSED(null);
UNUSED(key1); UNUSED(key2); UNUSED(null);
block->next = (mi_encoded_t)next;
#endif
}
static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* block) {
#ifdef MI_ENCODE_FREELIST
mi_block_t* next = mi_block_nextx(page,block,page->cookie);
// check for free list corruption: is `next` at least in our segment range?
mi_block_t* next = mi_block_nextx(page,block,page->key[0],page->key[1]);
// check for free list corruption: is `next` at least in the same page?
// TODO: check if `next` is `page->block_size` aligned?
if (next!=NULL && !mi_is_in_same_page(block, next)) {
if (mi_unlikely(next!=NULL && !mi_is_in_same_page(block, next))) {
_mi_fatal_error("corrupted free list entry of size %zub at %p: value 0x%zx\n", page->block_size, block, (uintptr_t)next);
next = NULL;
}
return next;
#else
UNUSED(page);
return mi_block_nextx(page,block,0);
return mi_block_nextx(page,block,0,0);
#endif
}
static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, const mi_block_t* next) {
#ifdef MI_ENCODE_FREELIST
mi_block_set_nextx(page,block,next, page->cookie);
mi_block_set_nextx(page,block,next, page->key[0], page->key[1]);
#else
UNUSED(page);
mi_block_set_nextx(page,block, next,0);
mi_block_set_nextx(page,block, next,0,0);
#endif
}
// -------------------------------------------------------------------
// Fast "random" shuffle
// -------------------------------------------------------------------
static inline uintptr_t _mi_random_shuffle(uintptr_t x) {
if (x==0) { x = 17; } // ensure we don't get stuck in generating zeros
#if (MI_INTPTR_SIZE==8)
// by Sebastiano Vigna, see: <http://xoshiro.di.unimi.it/splitmix64.c>
x ^= x >> 30;
x *= 0xbf58476d1ce4e5b9UL;
x ^= x >> 27;
x *= 0x94d049bb133111ebUL;
x ^= x >> 31;
#elif (MI_INTPTR_SIZE==4)
// by Chris Wellons, see: <https://nullprogram.com/blog/2018/07/31/>
x ^= x >> 16;
x *= 0x7feb352dUL;
x ^= x >> 15;
x *= 0x846ca68bUL;
x ^= x >> 16;
#endif
return x;
}
// -------------------------------------------------------------------
// Optimize numa node access for the common case (= one node)

View File

@ -46,7 +46,7 @@ terms of the MIT license. A copy of the license can be found in the file
// Encoded free lists allow detection of corrupted free lists
// and can detect buffer overflows and double `free`s.
#if (MI_SECURE>=3 || MI_DEBUG>=1)
#if (MI_SECURE>=3 || MI_DEBUG>=1)
#define MI_ENCODE_FREELIST 1
#endif
@ -76,7 +76,7 @@ terms of the MIT license. A copy of the license can be found in the file
#endif
#define MI_INTPTR_SIZE (1<<MI_INTPTR_SHIFT)
#define MI_INTPTR_BITS (8*MI_INTPTR_SIZE)
#define MI_INTPTR_BITS (MI_INTPTR_SIZE*8)
#define KiB ((size_t)1024)
#define MiB (KiB*KiB)
@ -112,6 +112,8 @@ terms of the MIT license. A copy of the license can be found in the file
#define MI_LARGE_OBJ_SIZE_MAX (MI_SEGMENT_SIZE/2) // 32mb on 64-bit
#define MI_LARGE_OBJ_WSIZE_MAX (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE)
#define MI_HUGE_OBJ_SIZE_MAX (2*MI_INTPTR_SIZE*MI_SEGMENT_SIZE) // (must match MI_REGION_MAX_ALLOC_SIZE in memory.c)
// Minimal alignment necessary. On most platforms 16 bytes are needed
// due to SSE registers for example. This must be at least `MI_INTPTR_SIZE`
#define MI_MAX_ALIGN_SIZE 16 // sizeof(max_align_t)
@ -145,14 +147,14 @@ typedef enum mi_delayed_e {
} mi_delayed_t;
// The `in_full` and `has_aligned` page flags are put in a union to efficiently
// The `in_full` and `has_aligned` page flags are put in a union to efficiently
// test if both are false (`full_aligned == 0`) in the `mi_free` routine.
typedef union mi_page_flags_s {
uint8_t full_aligned;
struct {
uint8_t in_full : 1;
uint8_t has_aligned : 1;
} x;
} x;
} mi_page_flags_t;
// Thread free list.
@ -189,11 +191,12 @@ typedef struct mi_page_s {
uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear`
uint16_t reserved; // number of blocks reserved in memory
mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits)
bool is_zero; // `true` if the blocks in the free list are zero initialized
uint8_t is_zero:1; // `true` if the blocks in the free list are zero initialized
uint8_t retire_expire:7; // expiration count for retired blocks
mi_block_t* free; // list of available free blocks (`malloc` allocates from this list)
#ifdef MI_ENCODE_FREELIST
uintptr_t cookie; // random cookie to encode the free lists
uintptr_t key[2]; // two random keys to encode the free lists (see `_mi_block_next`)
#endif
size_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`)
@ -208,9 +211,9 @@ typedef struct mi_page_s {
struct mi_page_s* prev; // previous page owned by this thread with the same `block_size`
// improve page index calculation
// without padding: 11 words on 64-bit, 13 on 32-bit.
#ifndef MI_ENCODE_FREELIST
void* padding[1]; // 12 words on 64-bit, 14 words on 32-bit
// without padding: 10 words on 64-bit, 11 on 32-bit. Secure adds two words
#if (MI_INTPTR_SIZE==4)
void* padding[1]; // 12/14 words on 32-bit plain
#endif
} mi_page_t;
@ -253,8 +256,8 @@ typedef struct mi_segment_s {
uintptr_t commit_mask;
// from here is zero initialized
struct mi_segment_s* next; // the list of freed segments in the cache
volatile _Atomic(struct mi_segment_s*) abandoned_next;
struct mi_segment_s* next; // the list of freed segments in the cache
struct mi_segment_s* abandoned_next;
size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`)
size_t used; // count of pages in use
@ -296,6 +299,14 @@ typedef struct mi_page_queue_s {
#define MI_BIN_FULL (MI_BIN_HUGE+1)
// Random context
typedef struct mi_random_cxt_s {
uint32_t input[16];
uint32_t output[16];
int output_available;
} mi_random_ctx_t;
// A heap owns a set of pages.
struct mi_heap_s {
mi_tld_t* tld;
@ -303,8 +314,9 @@ struct mi_heap_s {
mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin")
volatile _Atomic(mi_block_t*) thread_delayed_free;
uintptr_t thread_id; // thread this heap belongs too
uintptr_t cookie;
uintptr_t random; // random number used for secure allocation
uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`)
uintptr_t key[2]; // twb random keys used to encode the `thread_delayed_free` list
mi_random_ctx_t random; // random number context used for secure allocation
size_t page_count; // total number of pages in the `pages` queues.
bool no_reclaim; // `true` if this heap should not reclaim abandoned pages
};

View File

@ -125,7 +125,7 @@ mi_decl_allocator void* mi_zalloc(size_t size) mi_attr_noexcept {
// ------------------------------------------------------
// Check for double free in secure and debug mode
// Check for double free in secure and debug mode
// This is somewhat expensive so only enabled for secure mode 4
// ------------------------------------------------------
@ -139,32 +139,29 @@ static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, cons
return false;
}
static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block, const mi_block_t* n) {
size_t psize;
uint8_t* pstart = _mi_page_start(_mi_page_segment(page), page, &psize);
if (n == NULL || ((uint8_t*)n >= pstart && (uint8_t*)n < (pstart + psize))) {
// Suspicious: the decoded value is in the same page (or NULL).
// Walk the free lists to verify positively if it is already freed
if (mi_list_contains(page, page->free, block) ||
mi_list_contains(page, page->local_free, block) ||
mi_list_contains(page, (const mi_block_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&page->thread_free)), block))
{
_mi_fatal_error("double free detected of block %p with size %zu\n", block, page->block_size);
return true;
}
static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block) {
// The decoded value is in the same page (or NULL).
// Walk the free lists to verify positively if it is already freed
mi_thread_free_t tf = (mi_thread_free_t)mi_atomic_read_relaxed(mi_atomic_cast(uintptr_t, &page->thread_free));
if (mi_list_contains(page, page->free, block) ||
mi_list_contains(page, page->local_free, block) ||
mi_list_contains(page, mi_tf_block(tf), block))
{
_mi_fatal_error("double free detected of block %p with size %zu\n", block, page->block_size);
return true;
}
return false;
}
static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) {
mi_block_t* n = mi_block_nextx(page, block, page->cookie); // pretend it is freed, and get the decoded first field
if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer?
(n==NULL || mi_is_in_same_segment(block, n))) // quick check: in same segment or NULL?
{
// Suspicous: decoded value in block is in the same segment (or NULL) -- maybe a double free?
mi_block_t* n = mi_block_nextx(page, block, page->key[0], page->key[1]); // pretend it is freed, and get the decoded first field
if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer?
(n==NULL || mi_is_in_same_page(block, n))) // quick check: in same page or NULL?
{
// Suspicous: decoded value a in block is in the same page (or NULL) -- maybe a double free?
// (continue in separate function to improve code generation)
return mi_check_is_double_freex(page, block, n);
}
return mi_check_is_double_freex(page, block);
}
return false;
}
#else
@ -237,7 +234,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc
mi_block_t* dfree;
do {
dfree = (mi_block_t*)heap->thread_delayed_free;
mi_block_set_nextx(heap,block,dfree, heap->cookie);
mi_block_set_nextx(heap,block,dfree, heap->key[0], heap->key[1]);
} while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), block, dfree));
}
@ -261,7 +258,7 @@ static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block
// and push it on the free list
if (mi_likely(local)) {
// owning thread can free a block directly
if (mi_check_is_double_free(page, block)) return;
if (mi_unlikely(mi_check_is_double_free(page, block))) return;
mi_block_set_next(page, block, page->local_free);
page->local_free = block;
page->used--;
@ -336,7 +333,7 @@ void mi_free(void* p) mi_attr_noexcept
if (mi_likely(tid == segment->thread_id && page->flags.full_aligned == 0)) { // the thread id matches and it is not a full page, nor has aligned blocks
// local, and not full or aligned
mi_block_t* block = (mi_block_t*)p;
if (mi_check_is_double_free(page,block)) return;
if (mi_unlikely(mi_check_is_double_free(page,block))) return;
mi_block_set_next(page, block, page->local_free);
page->local_free = block;
page->used--;

View File

@ -7,13 +7,13 @@ terms of the MIT license. A copy of the license can be found in the file
/* ----------------------------------------------------------------------------
"Arenas" are fixed area's of OS memory from which we can allocate
large blocks (>= MI_ARENA_BLOCK_SIZE, 32MiB).
In contrast to the rest of mimalloc, the arenas are shared between
large blocks (>= MI_ARENA_BLOCK_SIZE, 32MiB).
In contrast to the rest of mimalloc, the arenas are shared between
threads and need to be accessed using atomic operations.
Currently arenas are only used to for huge OS page (1GiB) reservations,
otherwise it delegates to direct allocation from the OS.
In the future, we can expose an API to manually add more kinds of arenas
In the future, we can expose an API to manually add more kinds of arenas
which is sometimes needed for embedded devices or shared memory for example.
(We can also employ this with WASI or `sbrk` systems to reserve large arenas
on demand and be able to reuse them efficiently).
@ -41,7 +41,7 @@ void _mi_os_free(void* p, size_t size, mi_stats_t* stats);
void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize);
void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats);
bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
/* -----------------------------------------------------------
@ -87,13 +87,13 @@ static _Atomic(uintptr_t) mi_arena_count; // = 0
// Use `0` as a special id for direct OS allocated memory.
#define MI_MEMID_OS 0
static size_t mi_memid_create(size_t arena_index, mi_bitmap_index_t bitmap_index) {
static size_t mi_arena_id_create(size_t arena_index, mi_bitmap_index_t bitmap_index) {
mi_assert_internal(arena_index < 0xFE);
mi_assert_internal(((bitmap_index << 8) >> 8) == bitmap_index); // no overflow?
return ((bitmap_index << 8) | ((arena_index+1) & 0xFF));
}
static void mi_memid_indices(size_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) {
static void mi_arena_id_indices(size_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) {
mi_assert_internal(memid != MI_MEMID_OS);
*arena_index = (memid & 0xFF) - 1;
*bitmap_index = (memid >> 8);
@ -106,7 +106,7 @@ static size_t mi_block_count_of_size(size_t size) {
/* -----------------------------------------------------------
Thread safe allocation in an arena
----------------------------------------------------------- */
static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx)
static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx)
{
const size_t fcount = arena->field_count;
size_t idx = mi_atomic_read(&arena->search_idx); // start from last search
@ -261,15 +261,15 @@ static bool mi_cache_push(void* start, size_t size, size_t memid, bool is_commit
Arena Allocation
----------------------------------------------------------- */
static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount,
bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount,
bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
{
mi_bitmap_index_t bitmap_index;
if (!mi_arena_alloc(arena, needed_bcount, &bitmap_index)) return NULL;
// claimed it! set the dirty bits (todo: no need for an atomic op here?)
void* p = arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE);
*memid = mi_memid_create(arena_index, bitmap_index);
*memid = mi_arena_id_create(arena_index, bitmap_index);
*is_zero = mi_bitmap_claim(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL);
*large = arena->is_large;
if (arena->is_committed) {
@ -293,23 +293,23 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n
return p;
}
void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
bool* commit, bool* large, bool* is_zero,
size_t* memid, mi_os_tld_t* tld)
void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
bool* commit, bool* large, bool* is_zero,
size_t* memid, mi_os_tld_t* tld)
{
mi_assert_internal(commit != NULL && large != NULL && is_zero != NULL && memid != NULL && tld != NULL);
mi_assert_internal(size > 0);
*memid = MI_MEMID_OS;
*is_zero = false;
bool default_large = false;
if (large==NULL) large = &default_large; // ensure `large != NULL`
const int numa_node = _mi_os_numa_node(tld); // current numa node
// try to allocate in an arena if the alignment is small enough
// and the object is not too large or too small.
if (alignment <= MI_SEGMENT_ALIGN &&
size <= MI_ARENA_MAX_OBJ_SIZE &&
if (alignment <= MI_SEGMENT_ALIGN &&
size <= MI_ARENA_MAX_OBJ_SIZE &&
size >= MI_ARENA_MIN_OBJ_SIZE)
{
const size_t bcount = mi_block_count_of_size(size);
@ -321,7 +321,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
if (arena==NULL) break; // end reached
if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local?
(*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
{
{
void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid, tld);
mi_assert_internal((uintptr_t)p % alignment == 0);
if (p != NULL) return p;
@ -376,7 +376,7 @@ void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, bool
// allocated in an arena
size_t arena_idx;
size_t bitmap_idx;
mi_memid_indices(memid, &arena_idx, &bitmap_idx);
mi_arena_id_indices(memid, &arena_idx, &bitmap_idx);
mi_assert_internal(arena_idx < MI_MAX_ARENAS);
mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[arena_idx]));
mi_assert_internal(arena != NULL);
@ -406,7 +406,7 @@ static bool mi_arena_add(mi_arena_t* arena) {
mi_assert_internal(arena != NULL);
mi_assert_internal((uintptr_t)arena->start % MI_SEGMENT_ALIGN == 0);
mi_assert_internal(arena->block_count > 0);
uintptr_t i = mi_atomic_addu(&mi_arena_count,1);
if (i >= MI_MAX_ARENAS) {
mi_atomic_subu(&mi_arena_count, 1);
@ -434,11 +434,11 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec
_mi_warning_message("failed to reserve %zu gb huge pages\n", pages);
return ENOMEM;
}
_mi_verbose_message("reserved %zu gb huge pages\n", pages_reserved);
_mi_verbose_message("reserved %zu gb huge pages (of the %zu gb requested)\n", pages_reserved, pages);
size_t bcount = mi_block_count_of_size(hsize);
size_t fields = (bcount + MI_BITMAP_FIELD_BITS - 1) / MI_BITMAP_FIELD_BITS;
size_t asize = sizeof(mi_arena_t) + (2*fields*sizeof(mi_bitmap_field_t));
size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS);
size_t asize = sizeof(mi_arena_t) + (2*fields*sizeof(mi_bitmap_field_t));
mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS?
if (arena == NULL) {
_mi_os_free_huge_pages(p, hsize, &_mi_stats_main);
@ -446,23 +446,24 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec
}
arena->block_count = bcount;
arena->field_count = fields;
arena->start = (uint8_t*)p;
arena->start = (uint8_t*)p;
arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1)
arena->is_large = true;
arena->is_zero_init = true;
arena->is_committed = true;
arena->search_idx = 0;
arena->blocks_dirty = &arena->blocks_inuse[bcount];
arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap
arena->blocks_committed = NULL;
// the bitmaps are already zero initialized due to os_alloc
// just claim leftover blocks if needed
size_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount;
ptrdiff_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount;
mi_assert_internal(post >= 0);
if (post > 0) {
// don't use leftover bits at the end
mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post);
mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL);
mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL);
}
mi_arena_add(arena);
return 0;
}
@ -477,8 +478,8 @@ int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t
if (numa_count <= 0) numa_count = 1;
const size_t pages_per = pages / numa_count;
const size_t pages_mod = pages % numa_count;
const size_t timeout_per = (timeout_msecs / numa_count) + 50;
const size_t timeout_per = (timeout_msecs==0 ? 0 : (timeout_msecs / numa_count) + 50);
// reserve evenly among numa nodes
for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
size_t node_pages = pages_per; // can be 0
@ -500,7 +501,7 @@ int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserv
UNUSED(max_secs);
_mi_warning_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n");
if (pages_reserved != NULL) *pages_reserved = 0;
int err = mi_reserve_huge_os_pages_interleave(pages, 0, (size_t)(max_secs * 1000.0));
int err = mi_reserve_huge_os_pages_interleave(pages, 0, (size_t)(max_secs * 1000.0));
if (err==0 && pages_reserved!=NULL) *pages_reserved = pages;
return err;
}

View File

@ -45,8 +45,8 @@ static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void
}
#if MI_DEBUG>=3
static bool _mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) {
#if MI_DEBUG>=2
static bool mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) {
UNUSED(arg1);
UNUSED(arg2);
UNUSED(pq);
@ -59,7 +59,7 @@ static bool _mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page
static bool mi_heap_is_valid(mi_heap_t* heap) {
mi_assert_internal(heap!=NULL);
mi_heap_visit_pages(heap, &_mi_heap_page_is_valid, NULL, NULL);
mi_heap_visit_pages(heap, &mi_heap_page_is_valid, NULL, NULL);
return true;
}
#endif
@ -84,6 +84,7 @@ typedef enum mi_collect_e {
static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg_collect, void* arg2 ) {
UNUSED(arg2);
UNUSED(heap);
mi_assert_internal(mi_heap_page_is_valid(heap, pq, page, NULL, NULL));
mi_collect_t collect = *((mi_collect_t*)arg_collect);
_mi_page_free_collect(page, collect >= ABANDON);
if (mi_page_all_free(page)) {
@ -102,7 +103,7 @@ static bool mi_heap_page_never_delayed_free(mi_heap_t* heap, mi_page_queue_t* pq
UNUSED(arg2);
UNUSED(heap);
UNUSED(pq);
_mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE);
_mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false);
return true; // don't break
}
@ -184,12 +185,6 @@ mi_heap_t* mi_heap_get_backing(void) {
return bheap;
}
uintptr_t _mi_heap_random(mi_heap_t* heap) {
uintptr_t r = heap->random;
heap->random = _mi_random_shuffle(r);
return r;
}
mi_heap_t* mi_heap_new(void) {
mi_heap_t* bheap = mi_heap_get_backing();
mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t);
@ -197,12 +192,18 @@ mi_heap_t* mi_heap_new(void) {
memcpy(heap, &_mi_heap_empty, sizeof(mi_heap_t));
heap->tld = bheap->tld;
heap->thread_id = _mi_thread_id();
heap->cookie = ((uintptr_t)heap ^ _mi_heap_random(bheap)) | 1;
heap->random = _mi_heap_random(bheap);
_mi_random_split(&bheap->random, &heap->random);
heap->cookie = _mi_heap_random_next(heap) | 1;
heap->key[0] = _mi_heap_random_next(heap);
heap->key[1] = _mi_heap_random_next(heap);
heap->no_reclaim = true; // don't reclaim abandoned pages or otherwise destroy is unsafe
return heap;
}
uintptr_t _mi_heap_random_next(mi_heap_t* heap) {
return _mi_random_next(&heap->random);
}
// zero out the page queues
static void mi_heap_reset_pages(mi_heap_t* heap) {
mi_assert_internal(mi_heap_is_initialized(heap));
@ -241,7 +242,7 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_
UNUSED(pq);
// ensure no more thread_delayed_free will be added
_mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE);
_mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false);
// stats
if (page->block_size > MI_MEDIUM_OBJ_SIZE_MAX) {

View File

@ -12,17 +12,21 @@ terms of the MIT license. A copy of the license can be found in the file
// Empty page used to initialize the small free pages array
const mi_page_t _mi_page_empty = {
0, false, false, false, false, 0, 0,
{ 0 }, false,
0, false, false, false, false,
0, // capacity
0, // reserved capacity
{ 0 }, // flags
false, // is_zero
0, // retire_expire
NULL, // free
#if MI_ENCODE_FREELIST
0,
{ 0, 0 },
#endif
0, // used
NULL,
ATOMIC_VAR_INIT(0), ATOMIC_VAR_INIT(0),
0, NULL, NULL, NULL
#ifndef MI_ENCODE_FREELIST
#if (MI_INTPTR_SIZE==4)
, { NULL } // padding
#endif
};
@ -95,10 +99,11 @@ const mi_heap_t _mi_heap_empty = {
MI_SMALL_PAGES_EMPTY,
MI_PAGE_QUEUES_EMPTY,
ATOMIC_VAR_INIT(NULL),
0,
0,
0,
0,
0, // tid
0, // cookie
{ 0, 0 }, // keys
{ {0}, {0}, 0 },
0, // page count
false
};
@ -130,86 +135,29 @@ static mi_tld_t tld_main = {
{ MI_STATS_NULL } // stats
};
#if MI_INTPTR_SIZE==8
#define MI_INIT_COOKIE (0xCDCDCDCDCDCDCDCDUL)
#else
#define MI_INIT_COOKIE (0xCDCDCDCDUL)
#endif
mi_heap_t _mi_heap_main = {
&tld_main,
MI_SMALL_PAGES_EMPTY,
MI_PAGE_QUEUES_EMPTY,
NULL,
0, // thread id
#if MI_INTPTR_SIZE==8 // the cookie of the main heap can be fixed (unlike page cookies that need to be secure!)
0xCDCDCDCDCDCDCDCDUL,
#else
0xCDCDCDCDUL,
#endif
0, // random
0, // page count
false // can reclaim
0, // thread id
MI_INIT_COOKIE, // initial cookie
{ MI_INIT_COOKIE, MI_INIT_COOKIE }, // the key of the main heap can be fixed (unlike page keys that need to be secure!)
{ {0}, {0}, 0 }, // random
0, // page count
false // can reclaim
};
bool _mi_process_is_initialized = false; // set to `true` in `mi_process_init`.
mi_stats_t _mi_stats_main = { MI_STATS_NULL };
/* -----------------------------------------------------------
Initialization of random numbers
----------------------------------------------------------- */
#if defined(_WIN32)
#include <windows.h>
#elif defined(__APPLE__)
#include <mach/mach_time.h>
#else
#include <time.h>
#endif
uintptr_t _mi_random_shuffle(uintptr_t x) {
#if (MI_INTPTR_SIZE==8)
// by Sebastiano Vigna, see: <http://xoshiro.di.unimi.it/splitmix64.c>
x ^= x >> 30;
x *= 0xbf58476d1ce4e5b9UL;
x ^= x >> 27;
x *= 0x94d049bb133111ebUL;
x ^= x >> 31;
#elif (MI_INTPTR_SIZE==4)
// by Chris Wellons, see: <https://nullprogram.com/blog/2018/07/31/>
x ^= x >> 16;
x *= 0x7feb352dUL;
x ^= x >> 15;
x *= 0x846ca68bUL;
x ^= x >> 16;
#endif
return x;
}
uintptr_t _mi_random_init(uintptr_t seed /* can be zero */) {
#ifdef __wasi__ // no ASLR when using WebAssembly, and time granularity may be coarse
uintptr_t x;
arc4random_buf(&x, sizeof x);
#else
// Hopefully, ASLR makes our function address random
uintptr_t x = (uintptr_t)((void*)&_mi_random_init);
x ^= seed;
// xor with high res time
#if defined(_WIN32)
LARGE_INTEGER pcount;
QueryPerformanceCounter(&pcount);
x ^= (uintptr_t)(pcount.QuadPart);
#elif defined(__APPLE__)
x ^= (uintptr_t)mach_absolute_time();
#else
struct timespec time;
clock_gettime(CLOCK_MONOTONIC, &time);
x ^= (uintptr_t)time.tv_sec;
x ^= (uintptr_t)time.tv_nsec;
#endif
// and do a few randomization steps
uintptr_t max = ((x ^ (x >> 17)) & 0x0F) + 1;
for (uintptr_t i = 0; i < max; i++) {
x = _mi_random_shuffle(x);
}
#endif
return x;
}
/* -----------------------------------------------------------
Initialization and freeing of the thread local heaps
@ -217,7 +165,7 @@ uintptr_t _mi_random_init(uintptr_t seed /* can be zero */) {
typedef struct mi_thread_data_s {
mi_heap_t heap; // must come first due to cast in `_mi_heap_done`
mi_tld_t tld;
mi_tld_t tld;
} mi_thread_data_t;
// Initialize the thread local default heap, called from `mi_thread_init`
@ -240,8 +188,10 @@ static bool _mi_heap_init(void) {
memcpy(tld, &tld_empty, sizeof(*tld));
memcpy(heap, &_mi_heap_empty, sizeof(*heap));
heap->thread_id = _mi_thread_id();
heap->random = _mi_random_init(heap->thread_id);
heap->cookie = ((uintptr_t)heap ^ _mi_heap_random(heap)) | 1;
_mi_random_init(&heap->random);
heap->cookie = _mi_heap_random_next(heap) | 1;
heap->key[0] = _mi_heap_random_next(heap);
heap->key[1] = _mi_heap_random_next(heap);
heap->tld = tld;
tld->heap_backing = heap;
tld->segments.stats = &tld->stats;
@ -476,16 +426,17 @@ void mi_process_init(void) mi_attr_noexcept {
// access _mi_heap_default before setting _mi_process_is_initialized to ensure
// that the TLS slot is allocated without getting into recursion on macOS
// when using dynamic linking with interpose.
mi_heap_t* h = mi_get_default_heap();
mi_get_default_heap();
_mi_process_is_initialized = true;
_mi_heap_main.thread_id = _mi_thread_id();
_mi_verbose_message("process init: 0x%zx\n", _mi_heap_main.thread_id);
uintptr_t random = _mi_random_init(_mi_heap_main.thread_id) ^ (uintptr_t)h;
#ifndef __APPLE__
_mi_heap_main.cookie = (uintptr_t)&_mi_heap_main ^ random;
_mi_random_init(&_mi_heap_main.random);
#ifndef __APPLE__ // TODO: fix this? cannot update cookie if allocation already happened..
_mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main);
_mi_heap_main.key[0] = _mi_heap_random_next(&_mi_heap_main);
_mi_heap_main.key[1] = _mi_heap_random_next(&_mi_heap_main);
#endif
_mi_heap_main.random = _mi_random_shuffle(random);
mi_process_setup_auto_thread_done();
_mi_os_init();
#if (MI_DEBUG)

View File

@ -79,7 +79,7 @@ typedef union mi_region_info_u {
struct {
bool valid;
bool is_large;
int numa_node;
short numa_node;
};
} mi_region_info_t;
@ -308,7 +308,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo
if (mi_bitmap_is_any_claimed(&region->reset, 1, blocks, bit_idx)) {
// some blocks are still reset
mi_assert_internal(!info.is_large);
mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit);
mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit || mi_option_get(mi_option_eager_commit_delay) > 0);
mi_bitmap_unclaim(&region->reset, 1, blocks, bit_idx);
if (*commit || !mi_option_is_enabled(mi_option_reset_decommits)) { // only if needed
bool reset_zero = false;

View File

@ -28,7 +28,7 @@ int mi_version(void) mi_attr_noexcept {
// --------------------------------------------------------
// Options
// These can be accessed by multiple threads and may be
// These can be accessed by multiple threads and may be
// concurrently initialized, but an initializing data race
// is ok since they resolve to the same value.
// --------------------------------------------------------
@ -61,7 +61,7 @@ static mi_option_desc_t options[_mi_option_last] =
{ 0, UNINIT, MI_OPTION(eager_region_commit) },
{ 1, UNINIT, MI_OPTION(reset_decommits) }, // reset decommits memory
#else
{ 1, UNINIT, MI_OPTION(eager_region_commit) },
{ 1, UNINIT, MI_OPTION(eager_region_commit) },
{ 0, UNINIT, MI_OPTION(reset_decommits) }, // reset uses MADV_FREE/MADV_DONTNEED
#endif
{ 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
@ -91,7 +91,7 @@ void _mi_options_init(void) {
mi_option_desc_t* desc = &options[option];
_mi_verbose_message("option '%s': %ld\n", desc->name, desc->value);
}
}
}
mi_max_error_count = mi_option_get(mi_option_max_errors);
}
@ -100,7 +100,7 @@ long mi_option_get(mi_option_t option) {
mi_option_desc_t* desc = &options[option];
mi_assert(desc->option == option); // index should match the option
if (mi_unlikely(desc->init == UNINIT)) {
mi_option_init(desc);
mi_option_init(desc);
}
return desc->value;
}
@ -144,7 +144,7 @@ void mi_option_disable(mi_option_t option) {
static void mi_out_stderr(const char* msg) {
#ifdef _WIN32
// on windows with redirection, the C runtime cannot handle locale dependent output
// on windows with redirection, the C runtime cannot handle locale dependent output
// after the main thread closes so we use direct console output.
if (!_mi_preloading()) { _cputs(msg); }
#else
@ -186,7 +186,7 @@ static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf) {
out_buf[count] = 0;
out(out_buf);
if (!no_more_buf) {
out_buf[count] = '\n'; // if continue with the buffer, insert a newline
out_buf[count] = '\n'; // if continue with the buffer, insert a newline
}
}
@ -342,7 +342,7 @@ static void mi_strlcat(char* dest, const char* src, size_t dest_size) {
#include <windows.h>
static bool mi_getenv(const char* name, char* result, size_t result_size) {
result[0] = 0;
size_t len = GetEnvironmentVariableA(name, result, (DWORD)result_size);
size_t len = GetEnvironmentVariableA(name, result, (DWORD)result_size);
return (len > 0 && len < result_size);
}
#else
@ -368,7 +368,11 @@ static bool mi_getenv(const char* name, char* result, size_t result_size) {
}
}
#endif
static void mi_option_init(mi_option_desc_t* desc) {
static void mi_option_init(mi_option_desc_t* desc) {
#ifndef _WIN32
// cannot call getenv() when still initializing the C runtime.
if (_mi_preloading()) return;
#endif
// Read option value from the environment
char buf[64+1];
mi_strlcpy(buf, "mimalloc_", sizeof(buf));

View File

@ -165,9 +165,7 @@ void _mi_os_init() {
os_page_size = (size_t)result;
os_alloc_granularity = os_page_size;
}
if (mi_option_is_enabled(mi_option_large_os_pages)) {
large_os_page_size = 2*MiB;
}
large_os_page_size = 2*MiB; // TODO: can we query the OS for this?
}
#endif
@ -332,7 +330,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
mi_atomic_cas_weak(&large_page_try_ok, try_ok - 1, try_ok);
}
else {
int lflags = flags;
int lflags = flags & ~MAP_NORESERVE; // using NORESERVE on huge pages seems to fail on Linux
int lfd = fd;
#ifdef MAP_ALIGNED_SUPER
lflags |= MAP_ALIGNED_SUPER;
@ -408,8 +406,8 @@ static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
if (hint == 0 || hint > ((intptr_t)30<<40)) { // try to wrap around after 30TiB (area after 32TiB is used for huge OS pages)
intptr_t init = ((intptr_t)4 << 40); // start at 4TiB area
#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode
uintptr_t r = _mi_random_init((uintptr_t)&mi_os_get_aligned_hint ^ hint);
init = init + (MI_SEGMENT_SIZE * ((r>>17) & 0xFFFF)); // (randomly 0-64k)*4MiB == 0 to 256GiB
uintptr_t r = _mi_heap_random_next(mi_get_default_heap());
init = init + (MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)); // (randomly 20 bits)*4MiB == 0 to 4TiB
#endif
mi_atomic_cas_strong(mi_atomic_cast(uintptr_t, &aligned_base), init, hint + size);
hint = mi_atomic_add(&aligned_base, size); // this may still give 0 or > 30TiB but that is ok, it is a hint after all
@ -597,6 +595,18 @@ static void* mi_os_page_align_area_conservative(void* addr, size_t size, size_t*
return mi_os_page_align_areax(true, addr, size, newsize);
}
static void mi_mprotect_hint(int err) {
#if defined(MI_OS_USE_MMAP) && (MI_SECURE>=2) // guard page around every mimalloc page
if (err == ENOMEM) {
_mi_warning_message("the previous warning may have been caused by a low memory map limit.\n"
" On Linux this is controlled by the vm.max_map_count. For example:\n"
" > sudo sysctl -w vm.max_map_count=262144\n");
}
#else
UNUSED(err);
#endif
}
// Commit/Decommit memory.
// Usuelly commit is aligned liberal, while decommit is aligned conservative.
// (but not for the reset version where we want commit to be conservative as well)
@ -645,6 +655,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ
#endif
if (err != 0) {
_mi_warning_message("%s error: start: 0x%p, csize: 0x%x, err: %i\n", commit ? "commit" : "decommit", start, csize, err);
mi_mprotect_hint(err);
}
mi_assert_internal(err == 0);
return (err == 0);
@ -763,6 +774,7 @@ static bool mi_os_protectx(void* addr, size_t size, bool protect) {
#endif
if (err != 0) {
_mi_warning_message("mprotect error: start: 0x%p, csize: 0x%x, err: %i\n", start, csize, err);
mi_mprotect_hint(err);
}
return (err == 0);
}
@ -908,8 +920,8 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
// Initialize the start address after the 32TiB area
start = ((uintptr_t)32 << 40); // 32TiB virtual start address
#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode
uintptr_t r = _mi_random_init((uintptr_t)&mi_os_claim_huge_pages);
start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF)); // (randomly 0-1024)*1GiB == 0 to 1TiB
uintptr_t r = _mi_heap_random_next(mi_get_default_heap());
start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x0FFF)); // (randomly 12bits)*1GiB == between 0 to 4TiB
#endif
}
end = start + size;

View File

@ -103,7 +103,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) {
bool _mi_page_is_valid(mi_page_t* page) {
mi_assert_internal(mi_page_is_valid_init(page));
#if MI_SECURE
mi_assert_internal(page->cookie != 0);
mi_assert_internal(page->key != 0);
#endif
if (page->heap!=NULL) {
mi_segment_t* segment = _mi_page_segment(page);
@ -119,26 +119,27 @@ bool _mi_page_is_valid(mi_page_t* page) {
}
#endif
void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay ) {
void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never) {
mi_thread_free_t tfree;
mi_thread_free_t tfreex;
mi_delayed_t old_delay;
do {
tfreex = tfree = page->thread_free;
if (mi_unlikely(mi_tf_delayed(tfree) < MI_DELAYED_FREEING)) {
tfreex = mi_tf_set_delayed(tfree,delay);
}
else if (mi_unlikely(mi_tf_delayed(tfree) == MI_DELAYED_FREEING)) {
tfree = mi_atomic_read_relaxed(&page->thread_free);
tfreex = mi_tf_set_delayed(tfree, delay);
old_delay = mi_tf_delayed(tfree);
if (mi_unlikely(old_delay == MI_DELAYED_FREEING)) {
mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done.
continue; // and try again
}
}
while((mi_tf_delayed(tfreex) != mi_tf_delayed(tfree)) && // avoid atomic operation if already equal
!mi_atomic_cas_weak(mi_atomic_cast(uintptr_t,&page->thread_free), tfreex, tfree));
else if (delay == old_delay) {
break; // avoid atomic operation if already equal
}
else if (!override_never && old_delay == MI_NEVER_DELAYED_FREE) {
break; // leave never set
}
} while ((old_delay == MI_DELAYED_FREEING) ||
!mi_atomic_cas_weak(mi_atomic_cast(uintptr_t, &page->thread_free), tfreex, tfree));
}
/* -----------------------------------------------------------
Page collect the `local_free` and `thread_free` lists
----------------------------------------------------------- */
@ -231,10 +232,13 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) {
mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE);
mi_assert_internal(!page->is_reset);
mi_assert_internal(mi_tf_delayed(page->thread_free) == MI_NEVER_DELAYED_FREE);
_mi_page_free_collect(page,false);
mi_page_queue_t* pq = mi_page_queue(heap, page->block_size);
mi_page_queue_push(heap, pq, page);
mi_assert_internal(page->heap != NULL);
_mi_page_use_delayed_free(page, MI_NO_DELAYED_FREE, true); // override never (after push so heap is set)
mi_assert_expensive(_mi_page_is_valid(page));
}
@ -286,7 +290,7 @@ void _mi_heap_delayed_free(mi_heap_t* heap) {
// and free them all
while(block != NULL) {
mi_block_t* next = mi_block_nextx(heap,block, heap->cookie);
mi_block_t* next = mi_block_nextx(heap,block, heap->key[0], heap->key[1]);
// use internal free instead of regular one to keep stats etc correct
if (!_mi_free_delayed_block(block)) {
// we might already start delayed freeing while another thread has not yet
@ -294,9 +298,8 @@ void _mi_heap_delayed_free(mi_heap_t* heap) {
mi_block_t* dfree;
do {
dfree = (mi_block_t*)heap->thread_delayed_free;
mi_block_set_nextx(heap, block, dfree, heap->cookie);
mi_block_set_nextx(heap, block, dfree, heap->key[0], heap->key[1]);
} while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), block, dfree));
}
block = next;
}
@ -312,7 +315,7 @@ void _mi_page_unfull(mi_page_t* page) {
mi_assert_expensive(_mi_page_is_valid(page));
mi_assert_internal(mi_page_is_in_full(page));
_mi_page_use_delayed_free(page, MI_NO_DELAYED_FREE);
_mi_page_use_delayed_free(page, MI_NO_DELAYED_FREE, false);
if (!mi_page_is_in_full(page)) return;
mi_heap_t* heap = page->heap;
@ -328,7 +331,7 @@ static void mi_page_to_full(mi_page_t* page, mi_page_queue_t* pq) {
mi_assert_internal(!mi_page_immediate_available(page));
mi_assert_internal(!mi_page_is_in_full(page));
_mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE);
_mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, false);
if (mi_page_is_in_full(page)) return;
mi_page_queue_enqueue_from(&page->heap->pages[MI_BIN_FULL], pq, page);
@ -345,7 +348,7 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) {
mi_assert_expensive(_mi_page_is_valid(page));
mi_assert_internal(pq == mi_page_queue_of(page));
mi_assert_internal(page->heap != NULL);
#if MI_DEBUG > 1
mi_heap_t* pheap = (mi_heap_t*)mi_atomic_read_ptr(mi_atomic_cast(void*, &page->heap));
#endif
@ -359,7 +362,7 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) {
#if MI_DEBUG>1
// check there are no references left..
for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->cookie)) {
for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->key[0], pheap->key[1])) {
mi_assert_internal(_mi_ptr_page(block) != page);
}
#endif
@ -394,7 +397,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
_mi_stat_decrease(&page->heap->tld->stats.huge, page->block_size);
}
}
// remove from the page list
// (no need to do _mi_heap_delayed_free first as all blocks are already free)
mi_segments_tld_t* segments_tld = &page->heap->tld->segments;
@ -422,19 +425,39 @@ void _mi_page_retire(mi_page_t* page) {
// (or we end up retiring and re-allocating most of the time)
// NOTE: refine this more: we should not retire if this
// is the only page left with free blocks. It is not clear
// how to check this efficiently though...
// how to check this efficiently though...
// for now, we don't retire if it is the only page left of this size class.
mi_page_queue_t* pq = mi_page_queue_of(page);
if (mi_likely(page->block_size <= (MI_SMALL_SIZE_MAX/4))) {
// if (mi_page_mostly_used(page->prev) && mi_page_mostly_used(page->next)) {
if (pq->last==page && pq->first==page) {
if (mi_likely(page->block_size <= MI_SMALL_SIZE_MAX)) {
if (pq->last==page && pq->first==page) { // the only page in the queue?
mi_stat_counter_increase(_mi_stats_main.page_no_retire,1);
return; // dont't retire after all
page->retire_expire = 4;
mi_assert_internal(mi_page_all_free(page));
return; // dont't free after all
}
}
_mi_page_free(page, pq, false);
}
// free retired pages: we don't need to look at the entire queues
// since we only retire pages that are the last one in a queue.
void _mi_heap_collect_retired(mi_heap_t* heap, bool force) {
for(mi_page_queue_t* pq = heap->pages; pq->block_size <= MI_SMALL_SIZE_MAX; pq++) {
mi_page_t* page = pq->first;
if (page != NULL && page->retire_expire != 0) {
if (mi_page_all_free(page)) {
page->retire_expire--;
if (force || page->retire_expire == 0) {
_mi_page_free(pq->first, pq, force);
}
}
else {
page->retire_expire = 0;
}
}
}
}
/* -----------------------------------------------------------
Initialize the initial free list in a page.
@ -475,11 +498,12 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co
// and initialize the free list by randomly threading through them
// set up first element
size_t current = _mi_heap_random(heap) % slice_count;
const uintptr_t r = _mi_heap_random_next(heap);
size_t current = r % slice_count;
counts[current]--;
mi_block_t* const free_start = blocks[current];
// and iterate through the rest
uintptr_t rnd = heap->random;
// and iterate through the rest; use `random_shuffle` for performance
uintptr_t rnd = _mi_random_shuffle(r|1); // ensure not 0
for (size_t i = 1; i < extend; i++) {
// call random_shuffle only every INTPTR_SIZE rounds
const size_t round = i%MI_INTPTR_SIZE;
@ -500,7 +524,6 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co
// prepend to the free list (usually NULL)
mi_block_set_next(page, blocks[current], page->free); // end of the list
page->free = free_start;
heap->random = _mi_random_shuffle(rnd);
}
static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t extend, mi_stats_t* const stats)
@ -514,15 +537,15 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co
void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL );
const size_t bsize = page->block_size;
mi_block_t* const start = mi_page_block_at(page, page_area, page->capacity);
// initialize a sequential free list
mi_block_t* const last = mi_page_block_at(page, page_area, page->capacity + extend - 1);
mi_block_t* const last = mi_page_block_at(page, page_area, page->capacity + extend - 1);
mi_block_t* block = start;
while(block <= last) {
mi_block_t* next = (mi_block_t*)((uint8_t*)block + bsize);
mi_block_set_next(page,block,next);
block = next;
}
}
// prepend to free list (usually `NULL`)
mi_block_set_next(page, last, page->free);
page->free = start;
@ -607,7 +630,8 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
mi_assert_internal(page_size / block_size < (1L<<16));
page->reserved = (uint16_t)(page_size / block_size);
#ifdef MI_ENCODE_FREELIST
page->cookie = _mi_heap_random(heap) | 1;
page->key[0] = _mi_heap_random_next(heap);
page->key[1] = _mi_heap_random_next(heap);
#endif
page->is_zero = page->is_zero_init;
@ -618,9 +642,10 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
mi_assert_internal(page->thread_freed == 0);
mi_assert_internal(page->next == NULL);
mi_assert_internal(page->prev == NULL);
mi_assert_internal(page->retire_expire == 0);
mi_assert_internal(!mi_page_has_aligned(page));
#if (MI_ENCODE_FREELIST)
mi_assert_internal(page->cookie != 0);
mi_assert_internal(page->key != 0);
#endif
mi_assert_expensive(mi_page_is_valid_init(page));
@ -698,8 +723,12 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
}
else {
mi_assert(pq->first == page);
page->retire_expire = 0;
}
mi_assert_internal(page == NULL || mi_page_immediate_available(page));
// finally collect retired pages
_mi_heap_collect_retired(heap,false);
return page;
}
@ -709,7 +738,7 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) {
mi_page_queue_t* pq = mi_page_queue(heap,size);
mi_page_t* page = pq->first;
if (page != NULL) {
if ((MI_SECURE >= 3) && page->capacity < page->reserved && ((_mi_heap_random(heap) & 1) == 1)) {
if ((MI_SECURE >= 3) && page->capacity < page->reserved && ((_mi_heap_random_next(heap) & 1) == 1)) {
// in secure mode, we extend half the time to increase randomness
mi_page_extend_free(heap, page, heap->tld);
mi_assert_internal(mi_page_immediate_available(page));
@ -718,6 +747,7 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) {
_mi_page_free_collect(page,false);
}
if (mi_page_immediate_available(page)) {
page->retire_expire = 0;
return page; // fast path
}
}
@ -757,7 +787,6 @@ void mi_register_deferred_free(mi_deferred_free_fun* fn) mi_attr_noexcept {
// Because huge pages contain just one block, and the segment contains
// just that page, we always treat them as abandoned and any thread
// that frees the block can free the whole page and segment directly.
static mi_page_t* mi_large_huge_page_alloc(mi_heap_t* heap, size_t size) {
size_t block_size = _mi_wsize_from_size(size) * sizeof(uintptr_t);
mi_assert_internal(_mi_bin(block_size) == MI_BIN_HUGE);
@ -786,7 +815,7 @@ static mi_page_t* mi_large_huge_page_alloc(mi_heap_t* heap, size_t size) {
_mi_stat_increase(&heap->tld->stats.huge, block_size);
_mi_stat_counter_increase(&heap->tld->stats.huge_count, 1);
}
}
}
return page;
}

328
src/random.c Normal file
View File

@ -0,0 +1,328 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2019, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
#include "mimalloc.h"
#include "mimalloc-internal.h"
#include <string.h> // memset
/* ----------------------------------------------------------------------------
We use our own PRNG to keep predictable performance of random number generation
and to avoid implementations that use a lock. We only use the OS provided
random source to initialize the initial seeds. Since we do not need ultimate
performance but we do rely on the security (for secret cookies in secure mode)
we use a cryptographically secure generator (chacha20).
-----------------------------------------------------------------------------*/
#define MI_CHACHA_ROUNDS (20) // perhaps use 12 for better performance?
/* ----------------------------------------------------------------------------
Chacha20 implementation as the original algorithm with a 64-bit nonce
and counter: https://en.wikipedia.org/wiki/Salsa20
The input matrix has sixteen 32-bit values:
Position 0 to 3: constant key
Position 4 to 11: the key
Position 12 to 13: the counter.
Position 14 to 15: the nonce.
The implementation uses regular C code which compiles very well on modern compilers.
(gcc x64 has no register spills, and clang 6+ uses SSE instructions)
-----------------------------------------------------------------------------*/
static inline uint32_t rotl(uint32_t x, uint32_t shift) {
return (x << shift) | (x >> (32 - shift));
}
static inline void qround(uint32_t x[16], size_t a, size_t b, size_t c, size_t d) {
x[a] += x[b]; x[d] = rotl(x[d] ^ x[a], 16);
x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 12);
x[a] += x[b]; x[d] = rotl(x[d] ^ x[a], 8);
x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 7);
}
static void chacha_block(mi_random_ctx_t* ctx)
{
// scramble into `x`
uint32_t x[16];
for (size_t i = 0; i < 16; i++) {
x[i] = ctx->input[i];
}
for (size_t i = 0; i < MI_CHACHA_ROUNDS; i += 2) {
qround(x, 0, 4, 8, 12);
qround(x, 1, 5, 9, 13);
qround(x, 2, 6, 10, 14);
qround(x, 3, 7, 11, 15);
qround(x, 0, 5, 10, 15);
qround(x, 1, 6, 11, 12);
qround(x, 2, 7, 8, 13);
qround(x, 3, 4, 9, 14);
}
// add scrambled data to the initial state
for (size_t i = 0; i < 16; i++) {
ctx->output[i] = x[i] + ctx->input[i];
}
ctx->output_available = 16;
// increment the counter for the next round
ctx->input[12] += 1;
if (ctx->input[12] == 0) {
ctx->input[13] += 1;
if (ctx->input[13] == 0) { // and keep increasing into the nonce
ctx->input[14] += 1;
}
}
}
static uint32_t chacha_next32(mi_random_ctx_t* ctx) {
if (ctx->output_available <= 0) {
chacha_block(ctx);
ctx->output_available = 16; // (assign again to suppress static analysis warning)
}
const uint32_t x = ctx->output[16 - ctx->output_available];
ctx->output[16 - ctx->output_available] = 0; // reset once the data is handed out
ctx->output_available--;
return x;
}
static inline uint32_t read32(const uint8_t* p, size_t idx32) {
const size_t i = 4*idx32;
return ((uint32_t)p[i+0] | (uint32_t)p[i+1] << 8 | (uint32_t)p[i+2] << 16 | (uint32_t)p[i+3] << 24);
}
static void chacha_init(mi_random_ctx_t* ctx, const uint8_t key[32], uint64_t nonce)
{
// since we only use chacha for randomness (and not encryption) we
// do not _need_ to read 32-bit values as little endian but we do anyways
// just for being compatible :-)
memset(ctx, 0, sizeof(*ctx));
for (size_t i = 0; i < 4; i++) {
const uint8_t* sigma = (uint8_t*)"expand 32-byte k";
ctx->input[i] = read32(sigma,i);
}
for (size_t i = 0; i < 8; i++) {
ctx->input[i + 4] = read32(key,i);
}
ctx->input[12] = 0;
ctx->input[13] = 0;
ctx->input[14] = (uint32_t)nonce;
ctx->input[15] = (uint32_t)(nonce >> 32);
}
static void chacha_split(mi_random_ctx_t* ctx, uint64_t nonce, mi_random_ctx_t* ctx_new) {
memset(ctx_new, 0, sizeof(*ctx_new));
memcpy(ctx_new->input, ctx->input, sizeof(ctx_new->input));
ctx_new->input[12] = 0;
ctx_new->input[13] = 0;
ctx_new->input[14] = (uint32_t)nonce;
ctx_new->input[15] = (uint32_t)(nonce >> 32);
mi_assert_internal(ctx->input[14] != ctx_new->input[14] || ctx->input[15] != ctx_new->input[15]); // do not reuse nonces!
chacha_block(ctx_new);
}
/* ----------------------------------------------------------------------------
Random interface
-----------------------------------------------------------------------------*/
#if MI_DEBUG>1
static bool mi_random_is_initialized(mi_random_ctx_t* ctx) {
return (ctx != NULL && ctx->input[0] != 0);
}
#endif
void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* ctx_new) {
mi_assert_internal(mi_random_is_initialized(ctx));
mi_assert_internal(ctx != ctx_new);
chacha_split(ctx, (uintptr_t)ctx_new /*nonce*/, ctx_new);
}
uintptr_t _mi_random_next(mi_random_ctx_t* ctx) {
mi_assert_internal(mi_random_is_initialized(ctx));
#if MI_INTPTR_SIZE <= 4
return chacha_next32(ctx);
#elif MI_INTPTR_SIZE == 8
return (((uintptr_t)chacha_next32(ctx) << 32) | chacha_next32(ctx));
#else
# error "define mi_random_next for this platform"
#endif
}
/* ----------------------------------------------------------------------------
To initialize a fresh random context we rely on the OS:
- Windows : BCryptGenRandom
- osX,bsd,wasi: arc4random_buf
- Linux : getrandom,/dev/urandom
If we cannot get good randomness, we fall back to weak randomness based on a timer and ASLR.
-----------------------------------------------------------------------------*/
#if defined(_WIN32)
#pragma comment (lib,"bcrypt.lib")
#include <bcrypt.h>
static bool os_random_buf(void* buf, size_t buf_len) {
return (BCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0);
}
/*
#define SystemFunction036 NTAPI SystemFunction036
#include <NTSecAPI.h>
#undef SystemFunction036
static bool os_random_buf(void* buf, size_t buf_len) {
RtlGenRandom(buf, (ULONG)buf_len);
return true;
}
*/
#elif defined(ANDROID) || defined(XP_DARWIN) || defined(__DragonFly__) || \
defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
defined(__wasi__)
#include <stdlib.h>
static bool os_random_buf(void* buf, size_t buf_len) {
arc4random_buf(buf, buf_len);
return true;
}
#elif defined(__linux__)
#include <sys/syscall.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
static bool os_random_buf(void* buf, size_t buf_len) {
// Modern Linux provides `getrandom` but different distributions either use `sys/random.h` or `linux/random.h`
// and for the latter the actual `getrandom` call is not always defined.
// (see <https://stackoverflow.com/questions/45237324/why-doesnt-getrandom-compile>)
// We therefore use a syscall directly and fall back dynamically to /dev/urandom when needed.
#ifdef SYS_getrandom
#ifndef GRND_NONBLOCK
#define GRND_NONBLOCK (1)
#endif
static volatile _Atomic(uintptr_t) no_getrandom; // = 0
if (mi_atomic_read(&no_getrandom)==0) {
ssize_t ret = syscall(SYS_getrandom, buf, buf_len, GRND_NONBLOCK);
if (ret >= 0) return (buf_len == (size_t)ret);
if (ret != ENOSYS) return false;
mi_atomic_write(&no_getrandom,1); // don't call again, and fall back to /dev/urandom
}
#endif
int flags = O_RDONLY;
#if defined(O_CLOEXEC)
flags |= O_CLOEXEC;
#endif
int fd = open("/dev/urandom", flags, 0);
if (fd < 0) return false;
size_t count = 0;
while(count < buf_len) {
ssize_t ret = read(fd, (char*)buf + count, buf_len - count);
if (ret<=0) {
if (errno!=EAGAIN && errno!=EINTR) break;
}
else {
count += ret;
}
}
close(fd);
return (count==buf_len);
}
#else
static bool os_random_buf(void* buf, size_t buf_len) {
return false;
}
#endif
#if defined(_WIN32)
#include <windows.h>
#elif defined(__APPLE__)
#include <mach/mach_time.h>
#else
#include <time.h>
#endif
static uintptr_t os_random_weak(uintptr_t extra_seed) {
uintptr_t x = (uintptr_t)&os_random_weak ^ extra_seed; // ASLR makes the address random
#if defined(_WIN32)
LARGE_INTEGER pcount;
QueryPerformanceCounter(&pcount);
x ^= (uintptr_t)(pcount.QuadPart);
#elif defined(__APPLE__)
x ^= (uintptr_t)mach_absolute_time();
#else
struct timespec time;
clock_gettime(CLOCK_MONOTONIC, &time);
x ^= (uintptr_t)time.tv_sec;
x ^= (uintptr_t)time.tv_nsec;
#endif
// and do a few randomization steps
uintptr_t max = ((x ^ (x >> 17)) & 0x0F) + 1;
for (uintptr_t i = 0; i < max; i++) {
x = _mi_random_shuffle(x);
}
mi_assert_internal(x != 0);
return x;
}
void _mi_random_init(mi_random_ctx_t* ctx) {
uint8_t key[32];
if (!os_random_buf(key, sizeof(key))) {
// if we fail to get random data from the OS, we fall back to a
// weak random source based on the current time
_mi_warning_message("unable to use secure randomness\n");
uintptr_t x = os_random_weak(0);
for (size_t i = 0; i < 8; i++) { // key is eight 32-bit words.
x = _mi_random_shuffle(x);
((uint32_t*)key)[i] = (uint32_t)x;
}
}
chacha_init(ctx, key, (uintptr_t)ctx /*nonce*/ );
}
/* --------------------------------------------------------
test vectors from <https://tools.ietf.org/html/rfc8439>
----------------------------------------------------------- */
/*
static bool array_equals(uint32_t* x, uint32_t* y, size_t n) {
for (size_t i = 0; i < n; i++) {
if (x[i] != y[i]) return false;
}
return true;
}
static void chacha_test(void)
{
uint32_t x[4] = { 0x11111111, 0x01020304, 0x9b8d6f43, 0x01234567 };
uint32_t x_out[4] = { 0xea2a92f4, 0xcb1cf8ce, 0x4581472e, 0x5881c4bb };
qround(x, 0, 1, 2, 3);
mi_assert_internal(array_equals(x, x_out, 4));
uint32_t y[16] = {
0x879531e0, 0xc5ecf37d, 0x516461b1, 0xc9a62f8a,
0x44c20ef3, 0x3390af7f, 0xd9fc690b, 0x2a5f714c,
0x53372767, 0xb00a5631, 0x974c541a, 0x359e9963,
0x5c971061, 0x3d631689, 0x2098d9d6, 0x91dbd320 };
uint32_t y_out[16] = {
0x879531e0, 0xc5ecf37d, 0xbdb886dc, 0xc9a62f8a,
0x44c20ef3, 0x3390af7f, 0xd9fc690b, 0xcfacafd2,
0xe46bea80, 0xb00a5631, 0x974c541a, 0x359e9963,
0x5c971061, 0xccc07c79, 0x2098d9d6, 0x91dbd320 };
qround(y, 2, 7, 8, 13);
mi_assert_internal(array_equals(y, y_out, 16));
mi_random_ctx_t r = {
{ 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574,
0x03020100, 0x07060504, 0x0b0a0908, 0x0f0e0d0c,
0x13121110, 0x17161514, 0x1b1a1918, 0x1f1e1d1c,
0x00000001, 0x09000000, 0x4a000000, 0x00000000 },
{0},
0
};
uint32_t r_out[16] = {
0xe4e7f110, 0x15593bd1, 0x1fdd0f50, 0xc47120a3,
0xc7f4d1c7, 0x0368c033, 0x9aaa2204, 0x4e6cd4c3,
0x466482d2, 0x09aa9f07, 0x05d7c214, 0xa2028bd9,
0xd19c12b5, 0xb94e16de, 0xe883d0cb, 0x4e3c50a2 };
chacha_block(&r);
mi_assert_internal(array_equals(r.output, r_out, 16));
}
*/

View File

@ -310,13 +310,13 @@ static mi_segment_t* mi_segment_cache_pop(size_t segment_slices, mi_segments_tld
return segment;
}
static bool mi_segment_cache_full(mi_segments_tld_t* tld)
static bool mi_segment_cache_full(mi_segments_tld_t* tld)
{
// if (tld->count == 1 && tld->cache_count==0) return false; // always cache at least the final segment of a thread
size_t max_cache = mi_option_get(mi_option_segment_cache);
if (tld->cache_count < max_cache
&& tld->cache_count < (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION)) // at least allow a 1 element cache
) {
) {
return false;
}
// take the opportunity to reduce the segment cache if it is too large (now)
@ -660,6 +660,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m
size_t memid = 0;
// segment = (mi_segment_t*)_mi_os_alloc_aligned(segment_size, MI_SEGMENT_SIZE, commit, &mem_large, os_tld);
segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_zero, &memid, os_tld);
if (segment == NULL) return NULL; // failed to allocate
mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0);
if (!commit) {
@ -669,6 +670,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, m
}
segment->memid = memid;
segment->mem_is_fixed = mem_large;
segment->mem_is_committed = mi_option_is_enabled(mi_option_eager_commit); // commit;
mi_segments_track_size((long)(segment_size), tld);
mi_segment_map_allocated_at(segment);
@ -813,7 +815,6 @@ static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld
_mi_os_reset(start, psize, tld->stats);
}
// zero the page data, but not the segment fields
page->is_zero_init = false;
ptrdiff_t ofs = offsetof(mi_page_t, capacity);
@ -854,7 +855,28 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld)
// are "abandoned" and will be reclaimed by other threads to
// reuse their pages and/or free them eventually
static volatile _Atomic(mi_segment_t*) abandoned; // = NULL;
static volatile _Atomic(uintptr_t) abandoned_count; // = 0;
static volatile _Atomic(uintptr_t) abandoned_count; // = 0; approximate count of abandoned segments
// prepend a list of abandoned segments atomically to the global abandoned list; O(n)
static void mi_segments_prepend_abandoned(mi_segment_t* first) {
if (first == NULL) return;
// first try if the abandoned list happens to be NULL
if (mi_atomic_cas_ptr_weak(mi_atomic_cast(void*, &abandoned), first, NULL)) return;
// if not, find the end of the list
mi_segment_t* last = first;
while (last->abandoned_next != NULL) {
last = last->abandoned_next;
}
// and atomically prepend
mi_segment_t* next;
do {
next = (mi_segment_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &abandoned));
last->abandoned_next = next;
} while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*, &abandoned), first, next));
}
static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
mi_assert_internal(segment->used == segment->abandoned);
@ -878,16 +900,13 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
// force delayed decommits instead?
mi_segment_delayed_decommit(segment, false, tld->stats);
// add it to the abandoned list
// all pages in the segment are abandoned; add it to the abandoned list
_mi_stat_increase(&tld->stats->segments_abandoned, 1);
mi_segments_track_size(-((long)mi_segment_size(segment)), tld);
segment->thread_id = 0;
mi_segment_t* next;
do {
next = (mi_segment_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&abandoned));
mi_atomic_write_ptr(mi_atomic_cast(void*,&segment->abandoned_next), next);
} while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&abandoned), segment, next));
mi_atomic_increment(&abandoned_count);
segment->abandoned_next = NULL;
mi_segments_prepend_abandoned(segment); // prepend one-element list
mi_atomic_increment(&abandoned_count); // keep approximate count
}
void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) {
@ -904,24 +923,35 @@ void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) {
}
bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld) {
uintptr_t reclaimed = 0;
uintptr_t atmost;
if (try_all) {
atmost = abandoned_count+16; // close enough
}
else {
atmost = abandoned_count/8; // at most 1/8th of all outstanding (estimated)
if (atmost < 2) atmost = 2; // but at least 2
// To avoid the A-B-A problem, grab the entire list atomically
mi_segment_t* segment = (mi_segment_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &abandoned)); // pre-read to avoid expensive atomic operations
if (segment == NULL) return false;
segment = (mi_segment_t*)mi_atomic_exchange_ptr(mi_atomic_cast(void*, &abandoned), NULL);
if (segment == NULL) return false;
// we got a non-empty list
if (!try_all) {
// take at most 1/8th of the list and append the rest back to the abandoned list again
// this is O(n) but simplifies the code a lot (as we don't have an A-B-A problem)
// and probably ok since the length will tend to be not too large.
uintptr_t atmost = mi_atomic_read(&abandoned_count)/8; // at most 1/8th of all outstanding (estimated)
if (atmost < 8) atmost = 8; // but at least 8
// find the split point
mi_segment_t* last = segment;
while (last->abandoned_next != NULL && atmost > 0) {
last = last->abandoned_next;
atmost--;
}
// split the list and push back the remaining segments
mi_segment_t* next = last->abandoned_next;
last->abandoned_next = NULL;
mi_segments_prepend_abandoned(next);
}
// for `atmost` `reclaimed` abandoned segments...
while(atmost > reclaimed) {
// try to claim the head of the abandoned segments
mi_segment_t* segment;
do {
segment = (mi_segment_t*)abandoned;
} while(segment != NULL && !mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&abandoned), (mi_segment_t*)segment->abandoned_next, segment));
if (segment==NULL) break; // stop early if no more segments available
// reclaim all segments that we kept
while(segment != NULL) {
mi_segment_t* const next = segment->abandoned_next; // save the next segment
// got it.
mi_atomic_decrement(&abandoned_count);
@ -962,7 +992,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
slice = mi_segment_page_clear(page, tld); // set slice again due to coalesceing
}
else {
// otherwise reclaim it
// otherwise reclaim it
_mi_page_reclaim(heap,page);
}
}
@ -974,11 +1004,12 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
if (segment->used == 0) { // due to page_clear
mi_segment_free(segment,false,tld);
}
else {
reclaimed++;
}
// go on
segment = next;
}
return (reclaimed>0);
return true;
}

View File

@ -14,8 +14,14 @@ terms of the MIT license. A copy of the license can be found in the file
// it will override all the standard library allocation
// functions (on Unix's).
#include "stats.c"
#include "random.c"
#include "os.c"
<<<<<<< HEAD
//#include "memory.c"
=======
#include "arena.c"
#include "memory.c"
>>>>>>> dev
#include "segment.c"
#include "page.c"
#include "heap.c"

View File

@ -26,8 +26,8 @@ terms of the MIT license.
//
// argument defaults
static int THREADS = 32; // more repeatable if THREADS <= #processors
static int SCALE = 50; // scaling factor
static int ITER = 10; // N full iterations destructing and re-creating all threads
static int SCALE = 10; // scaling factor
static int ITER = 50; // N full iterations destructing and re-creating all threads
// static int THREADS = 8; // more repeatable if THREADS <= #processors
// static int SCALE = 100; // scaling factor
@ -135,9 +135,9 @@ static void stress(intptr_t tid) {
allocs--;
if (data_top >= data_size) {
data_size += 100000;
data = (void**)custom_realloc(data, data_size * sizeof(void*));
data = (void**)custom_realloc(data, data_size * sizeof(void*));
}
data[data_top++] = alloc_items( 1ULL << (pick(&r) % max_item_shift), &r);
data[data_top++] = alloc_items(1ULL << (pick(&r) % max_item_shift), &r);
}
else {
// 25% retain
@ -209,7 +209,7 @@ int main(int argc, char** argv) {
}
mi_collect(false);
#ifndef NDEBUG
if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - n + 1); }
if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
#endif
}