Merge branch 'dev-random' into dev

This commit is contained in:
daan 2019-12-28 16:19:45 -08:00
commit 384808e340
19 changed files with 473 additions and 158 deletions

View File

@ -18,6 +18,7 @@ include("cmake/mimalloc-config-version.cmake")
set(mi_sources
src/stats.c
src/random.c
src/os.c
src/arena.c
src/memory.c
@ -115,7 +116,7 @@ endif()
# extra needed libraries
if(WIN32)
list(APPEND mi_libraries psapi shell32 user32)
list(APPEND mi_libraries psapi shell32 user32 bcrypt)
else()
list(APPEND mi_libraries pthread)
find_library(LIBRT rt)

View File

@ -129,7 +129,7 @@
<CompileAs>Default</CompileAs>
</ClCompile>
<Link>
<AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect.lib;bcrypt.lib;%(AdditionalDependencies)</AdditionalDependencies>
<IgnoreSpecificDefaultLibraries>
</IgnoreSpecificDefaultLibraries>
<ModuleDefinitionFile>
@ -195,7 +195,7 @@
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect.lib;bcrypt.lib;%(AdditionalDependencies)</AdditionalDependencies>
<ModuleDefinitionFile>
</ModuleDefinitionFile>
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
@ -244,6 +244,7 @@
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="..\..\src\page.c" />
<ClCompile Include="..\..\src\random.c" />
<ClCompile Include="..\..\src\segment.c" />
<ClCompile Include="..\..\src\stats.c" />
</ItemGroup>

View File

@ -73,5 +73,8 @@
<ClCompile Include="..\..\src\arena.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\random.c">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
</Project>

View File

@ -229,6 +229,7 @@
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="..\..\src\page.c" />
<ClCompile Include="..\..\src\random.c" />
<ClCompile Include="..\..\src\segment.c" />
<ClCompile Include="..\..\src\os.c" />
<ClCompile Include="..\..\src\stats.c" />

View File

@ -56,6 +56,9 @@
<ClCompile Include="..\..\src\arena.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\random.c">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">

View File

@ -247,6 +247,7 @@
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="..\..\src\page.c" />
<ClCompile Include="..\..\src\random.c" />
<ClCompile Include="..\..\src\segment.c" />
<ClCompile Include="..\..\src\stats.c" />
</ItemGroup>

View File

@ -46,6 +46,9 @@
<ClCompile Include="..\..\src\bitmap.inc.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\random.c">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">

View File

@ -232,6 +232,7 @@
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="..\..\src\page.c" />
<ClCompile Include="..\..\src\random.c" />
<ClCompile Include="..\..\src\segment.c" />
<ClCompile Include="..\..\src\os.c" />
<ClCompile Include="..\..\src\stats.c" />

View File

@ -49,6 +49,9 @@
<ClCompile Include="..\..\src\bitmap.inc.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\random.c">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">

View File

@ -42,12 +42,17 @@ void _mi_trace_message(const char* fmt, ...);
void _mi_options_init(void);
void _mi_fatal_error(const char* fmt, ...) mi_attr_noreturn;
// "init.c"
// random.c
void _mi_random_init(mi_random_ctx_t* ctx);
void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx);
uintptr_t _mi_random_next(mi_random_ctx_t* ctx);
uintptr_t _mi_heap_random_next(mi_heap_t* heap);
static inline uintptr_t _mi_random_shuffle(uintptr_t x);
// init.c
extern mi_stats_t _mi_stats_main;
extern const mi_page_t _mi_page_empty;
bool _mi_is_main_thread(void);
uintptr_t _mi_random_shuffle(uintptr_t x);
uintptr_t _mi_random_init(uintptr_t seed /* can be zero */);
bool _mi_preloading(); // true while the C runtime is not ready
// os.c
@ -100,7 +105,6 @@ uint8_t _mi_bsr(uintptr_t x); // bit-scan-right, used on BSD i
// "heap.c"
void _mi_heap_destroy_pages(mi_heap_t* heap);
void _mi_heap_collect_abandon(mi_heap_t* heap);
uintptr_t _mi_heap_random(mi_heap_t* heap);
void _mi_heap_set_default_direct(mi_heap_t* heap);
// "stats.c"
@ -388,12 +392,30 @@ static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) {
}
// -------------------------------------------------------------------
// Encoding/Decoding the free list next pointers
// Note: we pass a `null` value to be used as the `NULL` value for the
// end of a free list. This is to prevent the cookie itself to ever
// be present among user blocks (as `cookie^0==cookie`).
// -------------------------------------------------------------------
/* -------------------------------------------------------------------
Encoding/Decoding the free list next pointers
This is to protect against buffer overflow exploits where the
free list is mutated. Many hardened allocators xor the next pointer `p`
with a secret key `k1`, as `p^k1`. This prevents overwriting with known
values but might be still too weak: if the attacker can guess
the pointer `p` this can reveal `k1` (since `p^k1^p == k1`).
Moreover, if multiple blocks can be read as well, the attacker can
xor both as `(p1^k1) ^ (p2^k1) == p1^p2` which may reveal a lot
about the pointers (and subsequently `k1`).
Instead mimalloc uses an extra key `k2` and encodes as `((p^k2)<<<k1)+k1`.
Since these operations are not associative, the above approaches do not
work so well any more even if the `p` can be guesstimated. For example,
for the read case we can subtract two entries to discard the `+k1` term,
but that leads to `((p1^k2)<<<k1) - ((p2^k2)<<<k1)` at best.
We include the left-rotation since xor and addition are otherwise linear
in the lowest bit. Finally, both keys are unique per page which reduces
the re-use of keys by a large factor.
We also pass a separate `null` value to be used as `NULL` or otherwise
`(k2<<<k1)+k1` would appear (too) often as a sentinel value.
------------------------------------------------------------------- */
static inline bool mi_is_in_same_segment(const void* p, const void* q) {
return (_mi_ptr_segment(p) == _mi_ptr_segment(q));
@ -408,52 +430,84 @@ static inline bool mi_is_in_same_page(const void* p, const void* q) {
return (idxp == idxq);
}
static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, uintptr_t cookie ) {
static inline uintptr_t mi_rotl(uintptr_t x, uintptr_t shift) {
shift %= MI_INTPTR_BITS;
return ((x << shift) | (x >> (MI_INTPTR_BITS - shift)));
}
static inline uintptr_t mi_rotr(uintptr_t x, uintptr_t shift) {
shift %= MI_INTPTR_BITS;
return ((x >> shift) | (x << (MI_INTPTR_BITS - shift)));
}
static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, uintptr_t key1, uintptr_t key2 ) {
#ifdef MI_ENCODE_FREELIST
mi_block_t* b = (mi_block_t*)(block->next ^ cookie);
mi_block_t* b = (mi_block_t*)(mi_rotr(block->next - key1, key1) ^ key2);
if (mi_unlikely((void*)b==null)) { b = NULL; }
return b;
#else
UNUSED(cookie); UNUSED(null);
UNUSED(key1); UNUSED(key2); UNUSED(null);
return (mi_block_t*)block->next;
#endif
}
static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, uintptr_t cookie) {
static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, uintptr_t key1, uintptr_t key2) {
#ifdef MI_ENCODE_FREELIST
if (mi_unlikely(next==NULL)) { next = (mi_block_t*)null; }
block->next = (mi_encoded_t)next ^ cookie;
block->next = mi_rotl((uintptr_t)next ^ key2, key1) + key1;
#else
UNUSED(cookie); UNUSED(null);
UNUSED(key1); UNUSED(key2); UNUSED(null);
block->next = (mi_encoded_t)next;
#endif
}
static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* block) {
#ifdef MI_ENCODE_FREELIST
mi_block_t* next = mi_block_nextx(page,block,page->cookie);
// check for free list corruption: is `next` at least in our segment range?
mi_block_t* next = mi_block_nextx(page,block,page->key[0],page->key[1]);
// check for free list corruption: is `next` at least in the same page?
// TODO: check if `next` is `page->block_size` aligned?
if (next!=NULL && !mi_is_in_same_page(block, next)) {
if (mi_unlikely(next!=NULL && !mi_is_in_same_page(block, next))) {
_mi_fatal_error("corrupted free list entry of size %zub at %p: value 0x%zx\n", page->block_size, block, (uintptr_t)next);
next = NULL;
}
return next;
#else
UNUSED(page);
return mi_block_nextx(page,block,0);
return mi_block_nextx(page,block,0,0);
#endif
}
static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, const mi_block_t* next) {
#ifdef MI_ENCODE_FREELIST
mi_block_set_nextx(page,block,next, page->cookie);
mi_block_set_nextx(page,block,next, page->key[0], page->key[1]);
#else
UNUSED(page);
mi_block_set_nextx(page,block, next,0);
mi_block_set_nextx(page,block, next,0,0);
#endif
}
// -------------------------------------------------------------------
// Fast "random" shuffle
// -------------------------------------------------------------------
static inline uintptr_t _mi_random_shuffle(uintptr_t x) {
if (x==0) { x = 17; } // ensure we don't get stuck in generating zeros
#if (MI_INTPTR_SIZE==8)
// by Sebastiano Vigna, see: <http://xoshiro.di.unimi.it/splitmix64.c>
x ^= x >> 30;
x *= 0xbf58476d1ce4e5b9UL;
x ^= x >> 27;
x *= 0x94d049bb133111ebUL;
x ^= x >> 31;
#elif (MI_INTPTR_SIZE==4)
// by Chris Wellons, see: <https://nullprogram.com/blog/2018/07/31/>
x ^= x >> 16;
x *= 0x7feb352dUL;
x ^= x >> 15;
x *= 0x846ca68bUL;
x ^= x >> 16;
#endif
return x;
}
// -------------------------------------------------------------------
// Optimize numa node access for the common case (= one node)

View File

@ -29,7 +29,7 @@ terms of the MIT license. A copy of the license can be found in the file
// #define MI_SECURE 4 // checks for double free. (may be more expensive)
#if !defined(MI_SECURE)
#define MI_SECURE 0
#define MI_SECURE 4
#endif
// Define MI_DEBUG for debug mode
@ -76,6 +76,7 @@ terms of the MIT license. A copy of the license can be found in the file
#endif
#define MI_INTPTR_SIZE (1<<MI_INTPTR_SHIFT)
#define MI_INTPTR_BITS (MI_INTPTR_SIZE*8)
#define KiB ((size_t)1024)
#define MiB (KiB*KiB)
@ -190,7 +191,7 @@ typedef struct mi_page_s {
mi_block_t* free; // list of available free blocks (`malloc` allocates from this list)
#ifdef MI_ENCODE_FREELIST
uintptr_t cookie; // random cookie to encode the free lists
uintptr_t key[2]; // two random keys to encode the free lists (see `_mi_block_next`)
#endif
size_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`)
@ -205,9 +206,9 @@ typedef struct mi_page_s {
struct mi_page_s* prev; // previous page owned by this thread with the same `block_size`
// improve page index calculation
// without padding: 10 words on 64-bit, 11 on 32-bit. Secure adds one word
#if (MI_INTPTR_SIZE==8 && defined(MI_ENCODE_FREELIST)) || (MI_INTPTR_SIZE==4 && !defined(MI_ENCODE_FREELIST))
void* padding[1]; // 12 words on 64-bit with cookie, 12 words on 32-bit plain
// without padding: 10 words on 64-bit, 11 on 32-bit. Secure adds two words
#if (MI_INTPTR_SIZE==4)
void* padding[1]; // 12/14 words on 32-bit plain
#endif
} mi_page_t;
@ -238,7 +239,7 @@ typedef struct mi_segment_s {
size_t capacity; // count of available pages (`#free + used`)
size_t segment_size;// for huge pages this may be different from `MI_SEGMENT_SIZE`
size_t segment_info_size; // space we are using from the first page for segment meta-data and possible guard pages.
uintptr_t cookie; // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie`
uintptr_t cookie; // verify addresses in secure mode: `_mi_ptr_cookie(segment) == segment->cookie`
// layout like this to optimize access in `mi_free`
size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`).
@ -273,6 +274,14 @@ typedef struct mi_page_queue_s {
#define MI_BIN_FULL (MI_BIN_HUGE+1)
// Random context
typedef struct mi_random_cxt_s {
uint32_t input[16];
uint32_t output[16];
int output_available;
} mi_random_ctx_t;
// A heap owns a set of pages.
struct mi_heap_s {
mi_tld_t* tld;
@ -280,8 +289,9 @@ struct mi_heap_s {
mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin")
volatile _Atomic(mi_block_t*) thread_delayed_free;
uintptr_t thread_id; // thread this heap belongs too
uintptr_t cookie;
uintptr_t random; // random number used for secure allocation
uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`)
uintptr_t key[2]; // twb random keys used to encode the `thread_delayed_free` list
mi_random_ctx_t random; // random number context used for secure allocation
size_t page_count; // total number of pages in the `pages` queues.
bool no_reclaim; // `true` if this heap should not reclaim abandoned pages
};

View File

@ -140,28 +140,24 @@ static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, cons
}
static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block, const mi_block_t* n) {
size_t psize;
uint8_t* pstart = _mi_page_start(_mi_page_segment(page), page, &psize);
if (n == NULL || ((uint8_t*)n >= pstart && (uint8_t*)n < (pstart + psize))) {
// Suspicious: the decoded value is in the same page (or NULL).
// Walk the free lists to verify positively if it is already freed
if (mi_list_contains(page, page->free, block) ||
mi_list_contains(page, page->local_free, block) ||
mi_list_contains(page, (const mi_block_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&page->thread_free)), block))
{
_mi_fatal_error("double free detected of block %p with size %zu\n", block, page->block_size);
return true;
}
// The decoded value is in the same page (or NULL).
// Walk the free lists to verify positively if it is already freed
if (mi_list_contains(page, page->free, block) ||
mi_list_contains(page, page->local_free, block) ||
mi_list_contains(page, (const mi_block_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&page->thread_free)), block))
{
_mi_fatal_error("double free detected of block %p with size %zu\n", block, page->block_size);
return true;
}
return false;
}
static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) {
mi_block_t* n = mi_block_nextx(page, block, page->cookie); // pretend it is freed, and get the decoded first field
if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer?
(n==NULL || mi_is_in_same_segment(block, n))) // quick check: in same segment or NULL?
mi_block_t* n = mi_block_nextx(page, block, page->key[0], page->key[1]); // pretend it is freed, and get the decoded first field
if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer?
(n==NULL || mi_is_in_same_page(block, n))) // quick check: in same page or NULL?
{
// Suspicous: decoded value in block is in the same segment (or NULL) -- maybe a double free?
// Suspicous: decoded value a in block is in the same page (or NULL) -- maybe a double free?
// (continue in separate function to improve code generation)
return mi_check_is_double_freex(page, block, n);
}
@ -242,7 +238,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc
mi_block_t* dfree;
do {
dfree = (mi_block_t*)heap->thread_delayed_free;
mi_block_set_nextx(heap,block,dfree, heap->cookie);
mi_block_set_nextx(heap,block,dfree, heap->key[0], heap->key[1]);
} while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), block, dfree));
}
@ -266,7 +262,7 @@ static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block
// and push it on the free list
if (mi_likely(local)) {
// owning thread can free a block directly
if (mi_check_is_double_free(page, block)) return;
if (mi_unlikely(mi_check_is_double_free(page, block))) return;
mi_block_set_next(page, block, page->local_free);
page->local_free = block;
page->used--;
@ -341,7 +337,7 @@ void mi_free(void* p) mi_attr_noexcept
if (mi_likely(tid == segment->thread_id && page->flags.full_aligned == 0)) { // the thread id matches and it is not a full page, nor has aligned blocks
// local, and not full or aligned
mi_block_t* block = (mi_block_t*)p;
if (mi_check_is_double_free(page,block)) return;
if (mi_unlikely(mi_check_is_double_free(page,block))) return;
mi_block_set_next(page, block, page->local_free);
page->local_free = block;
page->used--;

View File

@ -184,12 +184,6 @@ mi_heap_t* mi_heap_get_backing(void) {
return bheap;
}
uintptr_t _mi_heap_random(mi_heap_t* heap) {
uintptr_t r = heap->random;
heap->random = _mi_random_shuffle(r);
return r;
}
mi_heap_t* mi_heap_new(void) {
mi_heap_t* bheap = mi_heap_get_backing();
mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t);
@ -197,12 +191,18 @@ mi_heap_t* mi_heap_new(void) {
memcpy(heap, &_mi_heap_empty, sizeof(mi_heap_t));
heap->tld = bheap->tld;
heap->thread_id = _mi_thread_id();
heap->cookie = ((uintptr_t)heap ^ _mi_heap_random(bheap)) | 1;
heap->random = _mi_heap_random(bheap);
_mi_random_split(&bheap->random, &heap->random);
heap->cookie = _mi_heap_random_next(heap) | 1;
heap->key[0] = _mi_heap_random_next(heap);
heap->key[1] = _mi_heap_random_next(heap);
heap->no_reclaim = true; // don't reclaim abandoned pages or otherwise destroy is unsafe
return heap;
}
uintptr_t _mi_heap_random_next(mi_heap_t* heap) {
return _mi_random_next(&heap->random);
}
// zero out the page queues
static void mi_heap_reset_pages(mi_heap_t* heap) {
mi_assert_internal(mi_heap_is_initialized(heap));

View File

@ -16,13 +16,13 @@ const mi_page_t _mi_page_empty = {
{ 0 }, false,
NULL, // free
#if MI_ENCODE_FREELIST
0,
{ 0, 0 },
#endif
0, // used
NULL,
ATOMIC_VAR_INIT(0), ATOMIC_VAR_INIT(0),
0, NULL, NULL, NULL
#if (MI_INTPTR_SIZE==8 && defined(MI_ENCODE_FREELIST)) || (MI_INTPTR_SIZE==4 && !defined(MI_ENCODE_FREELIST))
#if (MI_INTPTR_SIZE==4)
, { NULL } // padding
#endif
};
@ -83,9 +83,10 @@ const mi_heap_t _mi_heap_empty = {
MI_SMALL_PAGES_EMPTY,
MI_PAGE_QUEUES_EMPTY,
ATOMIC_VAR_INIT(NULL),
0,
0,
0,
0, // tid
0, // cookie
{ 0, 0 }, // keys
{ {0}, {0}, 0 },
0,
false
};
@ -105,18 +106,21 @@ static mi_tld_t tld_main = {
{ MI_STATS_NULL } // stats
};
#if MI_INTPTR_SIZE==8
#define MI_INIT_COOKIE (0xCDCDCDCDCDCDCDCDUL)
#else
#define MI_INIT_COOKIE (0xCDCDCDCDUL)
#endif
mi_heap_t _mi_heap_main = {
&tld_main,
MI_SMALL_PAGES_EMPTY,
MI_PAGE_QUEUES_EMPTY,
NULL,
0, // thread id
#if MI_INTPTR_SIZE==8 // the cookie of the main heap can be fixed (unlike page cookies that need to be secure!)
0xCDCDCDCDCDCDCDCDUL,
#else
0xCDCDCDCDUL,
#endif
0, // random
0, // thread id
MI_INIT_COOKIE, // initial cookie
{ MI_INIT_COOKIE, MI_INIT_COOKIE }, // the key of the main heap can be fixed (unlike page keys that need to be secure!)
{ {0}, {0}, 0 }, // random
0, // page count
false // can reclaim
};
@ -125,66 +129,6 @@ bool _mi_process_is_initialized = false; // set to `true` in `mi_process_init`.
mi_stats_t _mi_stats_main = { MI_STATS_NULL };
/* -----------------------------------------------------------
Initialization of random numbers
----------------------------------------------------------- */
#if defined(_WIN32)
#include <windows.h>
#elif defined(__APPLE__)
#include <mach/mach_time.h>
#else
#include <time.h>
#endif
uintptr_t _mi_random_shuffle(uintptr_t x) {
#if (MI_INTPTR_SIZE==8)
// by Sebastiano Vigna, see: <http://xoshiro.di.unimi.it/splitmix64.c>
x ^= x >> 30;
x *= 0xbf58476d1ce4e5b9UL;
x ^= x >> 27;
x *= 0x94d049bb133111ebUL;
x ^= x >> 31;
#elif (MI_INTPTR_SIZE==4)
// by Chris Wellons, see: <https://nullprogram.com/blog/2018/07/31/>
x ^= x >> 16;
x *= 0x7feb352dUL;
x ^= x >> 15;
x *= 0x846ca68bUL;
x ^= x >> 16;
#endif
return x;
}
uintptr_t _mi_random_init(uintptr_t seed /* can be zero */) {
#ifdef __wasi__ // no ASLR when using WebAssembly, and time granularity may be coarse
uintptr_t x;
arc4random_buf(&x, sizeof x);
#else
// Hopefully, ASLR makes our function address random
uintptr_t x = (uintptr_t)((void*)&_mi_random_init);
x ^= seed;
// xor with high res time
#if defined(_WIN32)
LARGE_INTEGER pcount;
QueryPerformanceCounter(&pcount);
x ^= (uintptr_t)(pcount.QuadPart);
#elif defined(__APPLE__)
x ^= (uintptr_t)mach_absolute_time();
#else
struct timespec time;
clock_gettime(CLOCK_MONOTONIC, &time);
x ^= (uintptr_t)time.tv_sec;
x ^= (uintptr_t)time.tv_nsec;
#endif
// and do a few randomization steps
uintptr_t max = ((x ^ (x >> 17)) & 0x0F) + 1;
for (uintptr_t i = 0; i < max; i++) {
x = _mi_random_shuffle(x);
}
#endif
return x;
}
/* -----------------------------------------------------------
Initialization and freeing of the thread local heaps
@ -214,8 +158,10 @@ static bool _mi_heap_init(void) {
mi_heap_t* heap = &td->heap;
memcpy(heap, &_mi_heap_empty, sizeof(*heap));
heap->thread_id = _mi_thread_id();
heap->random = _mi_random_init(heap->thread_id);
heap->cookie = ((uintptr_t)heap ^ _mi_heap_random(heap)) | 1;
_mi_random_init(&heap->random);
heap->cookie = _mi_heap_random_next(heap) | 1;
heap->key[0] = _mi_heap_random_next(heap);
heap->key[1] = _mi_heap_random_next(heap);
heap->tld = tld;
memset(tld, 0, sizeof(*tld));
tld->heap_backing = heap;
@ -451,16 +397,17 @@ void mi_process_init(void) mi_attr_noexcept {
// access _mi_heap_default before setting _mi_process_is_initialized to ensure
// that the TLS slot is allocated without getting into recursion on macOS
// when using dynamic linking with interpose.
mi_heap_t* h = mi_get_default_heap();
mi_get_default_heap();
_mi_process_is_initialized = true;
_mi_heap_main.thread_id = _mi_thread_id();
_mi_verbose_message("process init: 0x%zx\n", _mi_heap_main.thread_id);
uintptr_t random = _mi_random_init(_mi_heap_main.thread_id) ^ (uintptr_t)h;
#ifndef __APPLE__
_mi_heap_main.cookie = (uintptr_t)&_mi_heap_main ^ random;
_mi_random_init(&_mi_heap_main.random);
#ifndef __APPLE__ // TODO: fix this? cannot update cookie if allocation already happened..
_mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main);
_mi_heap_main.key[0] = _mi_heap_random_next(&_mi_heap_main);
_mi_heap_main.key[1] = _mi_heap_random_next(&_mi_heap_main);
#endif
_mi_heap_main.random = _mi_random_shuffle(random);
mi_process_setup_auto_thread_done();
_mi_os_init();
#if (MI_DEBUG)

View File

@ -407,8 +407,8 @@ static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
if (hint == 0 || hint > ((intptr_t)30<<40)) { // try to wrap around after 30TiB (area after 32TiB is used for huge OS pages)
intptr_t init = ((intptr_t)4 << 40); // start at 4TiB area
#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode
uintptr_t r = _mi_random_init((uintptr_t)&mi_os_get_aligned_hint ^ hint);
init = init + (MI_SEGMENT_SIZE * ((r>>17) & 0xFFFF)); // (randomly 0-64k)*4MiB == 0 to 256GiB
uintptr_t r = _mi_heap_random_next(mi_get_default_heap());
init = init + (MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)); // (randomly 20 bits)*4MiB == 0 to 4TiB
#endif
mi_atomic_cas_strong(mi_atomic_cast(uintptr_t, &aligned_base), init, hint + size);
hint = mi_atomic_add(&aligned_base, size); // this may still give 0 or > 30TiB but that is ok, it is a hint after all
@ -907,8 +907,8 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
// Initialize the start address after the 32TiB area
start = ((uintptr_t)32 << 40); // 32TiB virtual start address
#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode
uintptr_t r = _mi_random_init((uintptr_t)&mi_os_claim_huge_pages);
start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF)); // (randomly 0-1024)*1GiB == 0 to 1TiB
uintptr_t r = _mi_heap_random_next(mi_get_default_heap());
start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x0FFF)); // (randomly 12bits)*1GiB == between 0 to 4TiB
#endif
}
end = start + size;

View File

@ -103,7 +103,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) {
bool _mi_page_is_valid(mi_page_t* page) {
mi_assert_internal(mi_page_is_valid_init(page));
#if MI_SECURE
mi_assert_internal(page->cookie != 0);
mi_assert_internal(page->key != 0);
#endif
if (page->heap!=NULL) {
mi_segment_t* segment = _mi_page_segment(page);
@ -284,7 +284,7 @@ void _mi_heap_delayed_free(mi_heap_t* heap) {
// and free them all
while(block != NULL) {
mi_block_t* next = mi_block_nextx(heap,block, heap->cookie);
mi_block_t* next = mi_block_nextx(heap,block, heap->key[0], heap->key[1]);
// use internal free instead of regular one to keep stats etc correct
if (!_mi_free_delayed_block(block)) {
// we might already start delayed freeing while another thread has not yet
@ -292,9 +292,8 @@ void _mi_heap_delayed_free(mi_heap_t* heap) {
mi_block_t* dfree;
do {
dfree = (mi_block_t*)heap->thread_delayed_free;
mi_block_set_nextx(heap, block, dfree, heap->cookie);
mi_block_set_nextx(heap, block, dfree, heap->key[0], heap->key[1]);
} while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), block, dfree));
}
block = next;
}
@ -357,7 +356,7 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) {
#if MI_DEBUG>1
// check there are no references left..
for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->cookie)) {
for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->key[0], pheap->key[1])) {
mi_assert_internal(_mi_ptr_page(block) != page);
}
#endif
@ -475,11 +474,12 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co
// and initialize the free list by randomly threading through them
// set up first element
size_t current = _mi_heap_random(heap) % slice_count;
const uintptr_t r = _mi_heap_random_next(heap);
size_t current = r % slice_count;
counts[current]--;
mi_block_t* const free_start = blocks[current];
// and iterate through the rest
uintptr_t rnd = heap->random;
// and iterate through the rest; use `random_shuffle` for performance
uintptr_t rnd = _mi_random_shuffle(r|1); // ensure not 0
for (size_t i = 1; i < extend; i++) {
// call random_shuffle only every INTPTR_SIZE rounds
const size_t round = i%MI_INTPTR_SIZE;
@ -500,7 +500,6 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co
// prepend to the free list (usually NULL)
mi_block_set_next(page, blocks[current], page->free); // end of the list
page->free = free_start;
heap->random = _mi_random_shuffle(rnd);
}
static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t extend, mi_stats_t* const stats)
@ -608,7 +607,8 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
mi_assert_internal(page_size / block_size < (1L<<16));
page->reserved = (uint16_t)(page_size / block_size);
#ifdef MI_ENCODE_FREELIST
page->cookie = _mi_heap_random(heap) | 1;
page->key[0] = _mi_heap_random_next(heap);
page->key[1] = _mi_heap_random_next(heap);
#endif
page->is_zero = page->is_zero_init;
@ -621,7 +621,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
mi_assert_internal(page->prev == NULL);
mi_assert_internal(!mi_page_has_aligned(page));
#if (MI_ENCODE_FREELIST)
mi_assert_internal(page->cookie != 0);
mi_assert_internal(page->key != 0);
#endif
mi_assert_expensive(mi_page_is_valid_init(page));
@ -710,7 +710,7 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) {
mi_page_queue_t* pq = mi_page_queue(heap,size);
mi_page_t* page = pq->first;
if (page != NULL) {
if ((MI_SECURE >= 3) && page->capacity < page->reserved && ((_mi_heap_random(heap) & 1) == 1)) {
if ((MI_SECURE >= 3) && page->capacity < page->reserved && ((_mi_heap_random_next(heap) & 1) == 1)) {
// in secure mode, we extend half the time to increase randomness
mi_page_extend_free(heap, page, heap->tld);
mi_assert_internal(mi_page_immediate_available(page));

290
src/random.c Normal file
View File

@ -0,0 +1,290 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2019, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
#include "mimalloc.h"
#include "mimalloc-internal.h"
#include <string.h> // memset
/* ----------------------------------------------------------------------------
We use our own PRNG to keep predictable performance of random number generation
and to avoid implementations that use a lock. We only use the OS provided
random source to initialize the initial seeds. Since we do not need ultimate
performance but we do rely on the security (for secret cookies in secure mode)
we use a cryptographically secure generator (chacha20).
-----------------------------------------------------------------------------*/
#define MI_CHACHA_ROUNDS (20) // perhaps use 12 for better performance?
/* ----------------------------------------------------------------------------
Chacha20 implementation as the original algorithm with a 64-bit nonce
and counter: https://en.wikipedia.org/wiki/Salsa20
The input matrix has sixteen 32-bit values:
Position 0 to 3: constant key
Position 4 to 11: the key
Position 12 to 13: the counter.
Position 14 to 15: the nonce.
The implementation uses regular C code which compiles very well on modern compilers.
(gcc x64 has no register spills, and clang 6+ uses SSE instructions)
-----------------------------------------------------------------------------*/
static inline uint32_t rotl(uint32_t x, uint32_t shift) {
return (x << shift) | (x >> (32 - shift));
}
static inline void qround(uint32_t x[16], size_t a, size_t b, size_t c, size_t d) {
x[a] += x[b]; x[d] = rotl(x[d] ^ x[a], 16);
x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 12);
x[a] += x[b]; x[d] = rotl(x[d] ^ x[a], 8);
x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 7);
}
static void chacha_block(mi_random_ctx_t* ctx)
{
// scramble into `x`
uint32_t x[16];
for (size_t i = 0; i < 16; i++) {
x[i] = ctx->input[i];
}
for (size_t i = 0; i < MI_CHACHA_ROUNDS; i += 2) {
qround(x, 0, 4, 8, 12);
qround(x, 1, 5, 9, 13);
qround(x, 2, 6, 10, 14);
qround(x, 3, 7, 11, 15);
qround(x, 0, 5, 10, 15);
qround(x, 1, 6, 11, 12);
qround(x, 2, 7, 8, 13);
qround(x, 3, 4, 9, 14);
}
// add scrambled data to the initial state
for (size_t i = 0; i < 16; i++) {
ctx->output[i] = x[i] + ctx->input[i];
}
ctx->output_available = 16;
// increment the counter for the next round
ctx->input[12] += 1;
if (ctx->input[12] == 0) {
ctx->input[13] += 1;
if (ctx->input[13] == 0) { // and keep increasing into the nonce
ctx->input[14] += 1;
}
}
}
static uint32_t chacha_next32(mi_random_ctx_t* ctx) {
if (ctx->output_available <= 0) {
chacha_block(ctx);
ctx->output_available = 16; // (assign again to suppress static analysis warning)
}
const uint32_t x = ctx->output[16 - ctx->output_available];
ctx->output[16 - ctx->output_available] = 0; // reset once the data is handed out
ctx->output_available--;
return x;
}
static inline uint32_t read32(const uint8_t* p, size_t idx32) {
const size_t i = 4*idx32;
return ((uint32_t)p[i+0] | (uint32_t)p[i+1] << 8 | (uint32_t)p[i+2] << 16 | (uint32_t)p[i+3] << 24);
}
static void chacha_init(mi_random_ctx_t* ctx, const uint8_t key[32], uint64_t nonce)
{
// since we only use chacha for randomness (and not encryption) we
// do not _need_ to read 32-bit values as little endian but we do anyways
// just for being compatible :-)
memset(ctx, 0, sizeof(*ctx));
for (size_t i = 0; i < 4; i++) {
const uint8_t* sigma = (uint8_t*)"expand 32-byte k";
ctx->input[i] = read32(sigma,i);
}
for (size_t i = 0; i < 8; i++) {
ctx->input[i + 4] = read32(key,i);
}
ctx->input[12] = 0;
ctx->input[13] = 0;
ctx->input[14] = (uint32_t)nonce;
ctx->input[15] = (uint32_t)(nonce >> 32);
}
static void chacha_split(mi_random_ctx_t* ctx, uint64_t nonce, mi_random_ctx_t* ctx_new) {
memset(ctx_new, 0, sizeof(*ctx_new));
memcpy(ctx_new->input, ctx->input, sizeof(ctx_new->input));
ctx_new->input[12] = 0;
ctx_new->input[13] = 0;
ctx_new->input[14] = (uint32_t)nonce;
ctx_new->input[15] = (uint32_t)(nonce >> 32);
mi_assert_internal(ctx->input[14] != ctx_new->input[14] || ctx->input[15] != ctx_new->input[15]); // do not reuse nonces!
chacha_block(ctx_new);
}
/* ----------------------------------------------------------------------------
Random interface
-----------------------------------------------------------------------------*/
#if MI_DEBUG>1
static bool mi_random_is_initialized(mi_random_ctx_t* ctx) {
return (ctx != NULL && ctx->input[0] != 0);
}
#endif
void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* ctx_new) {
mi_assert_internal(mi_random_is_initialized(ctx));
mi_assert_internal(ctx != ctx_new);
chacha_split(ctx, (uintptr_t)ctx_new /*nonce*/, ctx_new);
}
uintptr_t _mi_random_next(mi_random_ctx_t* ctx) {
mi_assert_internal(mi_random_is_initialized(ctx));
#if MI_INTPTR_SIZE <= 4
return chacha_next32(ctx);
#elif MI_INTPTR_SIZE == 8
return (((uintptr_t)chacha_next32(ctx) << 32) | chacha_next32(ctx));
#else
# error "define mi_random_next for this platform"
#endif
}
/* ----------------------------------------------------------------------------
To initialize a fresh random context we rely on the OS:
- windows: BCryptGenRandom
- bsd,wasi: arc4random_buf
- linux: getrandom
If we cannot get good randomness, we fall back to weak randomness based on a timer and ASLR.
-----------------------------------------------------------------------------*/
#if defined(_WIN32)
#pragma comment (lib,"bcrypt.lib")
#include <bcrypt.h>
static bool os_random_buf(void* buf, size_t buf_len) {
return (BCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0);
}
/*
#define SystemFunction036 NTAPI SystemFunction036
#include <NTSecAPI.h>
#undef SystemFunction036
static bool os_random_buf(void* buf, size_t buf_len) {
RtlGenRandom(buf, (ULONG)buf_len);
return true;
}
*/
#elif defined(ANDROID) || defined(XP_DARWIN) || defined(__DragonFly__) || \
defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
defined(__wasi__)
#include <stdlib.h>
static bool os_random_buf(void* buf, size_t buf_len) {
arc4random_buf(buf, buf_len);
return true;
}
#elif defined(__linux__)
#include <sys/random.h>
static bool os_random_buf(void* buf, size_t buf_len) {
return (getrandom(buf, buf_len, GRND_NONBLOCK) == (ssize_t)buf_len);
}
#else
static bool os_random_buf(void* buf, size_t buf_len) {
return false;
}
#endif
#if defined(_WIN32)
#include <windows.h>
#elif defined(__APPLE__)
#include <mach/mach_time.h>
#else
#include <time.h>
#endif
static uintptr_t os_random_weak(uintptr_t extra_seed) {
uintptr_t x = (uintptr_t)&os_random_weak ^ extra_seed; // ASLR makes the address random
#if defined(_WIN32)
LARGE_INTEGER pcount;
QueryPerformanceCounter(&pcount);
x ^= (uintptr_t)(pcount.QuadPart);
#elif defined(__APPLE__)
x ^= (uintptr_t)mach_absolute_time();
#else
struct timespec time;
clock_gettime(CLOCK_MONOTONIC, &time);
x ^= (uintptr_t)time.tv_sec;
x ^= (uintptr_t)time.tv_nsec;
#endif
// and do a few randomization steps
uintptr_t max = ((x ^ (x >> 17)) & 0x0F) + 1;
for (uintptr_t i = 0; i < max; i++) {
x = _mi_random_shuffle(x);
}
mi_assert_internal(x != 0);
return x;
}
void _mi_random_init(mi_random_ctx_t* ctx) {
uint8_t key[32];
if (!os_random_buf(key, sizeof(key))) {
// if we fail to get random data from the OS, we fall back to a
// weak random source based on the current time
_mi_warning_message("unable to use secure randomness\n");
uintptr_t x = os_random_weak(0);
for (size_t i = 0; i < 8; i++) { // key is eight 32-bit words.
x = _mi_random_shuffle(x);
((uint32_t*)key)[i] = (uint32_t)x;
}
}
chacha_init(ctx, key, (uintptr_t)ctx /*nonce*/ );
}
/* --------------------------------------------------------
test vectors from <https://tools.ietf.org/html/rfc8439>
----------------------------------------------------------- */
/*
static bool array_equals(uint32_t* x, uint32_t* y, size_t n) {
for (size_t i = 0; i < n; i++) {
if (x[i] != y[i]) return false;
}
return true;
}
static void chacha_test(void)
{
uint32_t x[4] = { 0x11111111, 0x01020304, 0x9b8d6f43, 0x01234567 };
uint32_t x_out[4] = { 0xea2a92f4, 0xcb1cf8ce, 0x4581472e, 0x5881c4bb };
qround(x, 0, 1, 2, 3);
mi_assert_internal(array_equals(x, x_out, 4));
uint32_t y[16] = {
0x879531e0, 0xc5ecf37d, 0x516461b1, 0xc9a62f8a,
0x44c20ef3, 0x3390af7f, 0xd9fc690b, 0x2a5f714c,
0x53372767, 0xb00a5631, 0x974c541a, 0x359e9963,
0x5c971061, 0x3d631689, 0x2098d9d6, 0x91dbd320 };
uint32_t y_out[16] = {
0x879531e0, 0xc5ecf37d, 0xbdb886dc, 0xc9a62f8a,
0x44c20ef3, 0x3390af7f, 0xd9fc690b, 0xcfacafd2,
0xe46bea80, 0xb00a5631, 0x974c541a, 0x359e9963,
0x5c971061, 0xccc07c79, 0x2098d9d6, 0x91dbd320 };
qround(y, 2, 7, 8, 13);
mi_assert_internal(array_equals(y, y_out, 16));
mi_random_ctx_t r = {
{ 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574,
0x03020100, 0x07060504, 0x0b0a0908, 0x0f0e0d0c,
0x13121110, 0x17161514, 0x1b1a1918, 0x1f1e1d1c,
0x00000001, 0x09000000, 0x4a000000, 0x00000000 },
{0},
0
};
uint32_t r_out[16] = {
0xe4e7f110, 0x15593bd1, 0x1fdd0f50, 0xc47120a3,
0xc7f4d1c7, 0x0368c033, 0x9aaa2204, 0x4e6cd4c3,
0x466482d2, 0x09aa9f07, 0x05d7c214, 0xa2028bd9,
0xd19c12b5, 0xb94e16de, 0xe883d0cb, 0x4e3c50a2 };
chacha_block(&r);
mi_assert_internal(array_equals(r.output, r_out, 16));
}
*/

View File

@ -14,6 +14,7 @@ terms of the MIT license. A copy of the license can be found in the file
// it will override all the standard library allocation
// functions (on Unix's).
#include "stats.c"
#include "random.c"
#include "os.c"
#include "arena.c"
#include "memory.c"