From ba87a39d9fcfab97fce28c16c7e1c799ee6af524 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 22 Dec 2019 17:07:01 -0800 Subject: [PATCH 1/5] updated random cookie generation using OS primitives and chacha20 --- CMakeLists.txt | 3 +- ide/vs2017/mimalloc-override.vcxproj | 5 +- ide/vs2017/mimalloc-override.vcxproj.filters | 3 + ide/vs2017/mimalloc.vcxproj | 1 + ide/vs2017/mimalloc.vcxproj.filters | 3 + ide/vs2019/mimalloc-override.vcxproj | 1 + ide/vs2019/mimalloc-override.vcxproj.filters | 3 + ide/vs2019/mimalloc.vcxproj | 1 + ide/vs2019/mimalloc.vcxproj.filters | 3 + include/mimalloc-internal.h | 35 ++- include/mimalloc-types.h | 11 +- src/heap.c | 14 +- src/init.c | 77 +---- src/memory.c | 2 +- src/os.c | 8 +- src/page.c | 14 +- src/random.c | 290 +++++++++++++++++++ src/static.c | 1 + 18 files changed, 378 insertions(+), 97 deletions(-) create mode 100644 src/random.c diff --git a/CMakeLists.txt b/CMakeLists.txt index c4480b89..a894de9b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,6 +18,7 @@ include("cmake/mimalloc-config-version.cmake") set(mi_sources src/stats.c + src/random.c src/os.c src/arena.c src/memory.c @@ -115,7 +116,7 @@ endif() # extra needed libraries if(WIN32) - list(APPEND mi_libraries psapi shell32 user32) + list(APPEND mi_libraries psapi shell32 user32 bcrypt) else() list(APPEND mi_libraries pthread) find_library(LIBRT rt) diff --git a/ide/vs2017/mimalloc-override.vcxproj b/ide/vs2017/mimalloc-override.vcxproj index 1fc70b33..821645e9 100644 --- a/ide/vs2017/mimalloc-override.vcxproj +++ b/ide/vs2017/mimalloc-override.vcxproj @@ -129,7 +129,7 @@ Default - $(ProjectDir)\..\..\bin\mimalloc-redirect.lib;%(AdditionalDependencies) + $(ProjectDir)\..\..\bin\mimalloc-redirect.lib;bcrypt.lib;%(AdditionalDependencies) @@ -195,7 +195,7 @@ true true - $(ProjectDir)\..\..\bin\mimalloc-redirect.lib;%(AdditionalDependencies) + $(ProjectDir)\..\..\bin\mimalloc-redirect.lib;bcrypt.lib;%(AdditionalDependencies) Default @@ -244,6 +244,7 @@ true + diff --git a/ide/vs2017/mimalloc-override.vcxproj.filters b/ide/vs2017/mimalloc-override.vcxproj.filters index 75a8e032..037fbcbb 100644 --- a/ide/vs2017/mimalloc-override.vcxproj.filters +++ b/ide/vs2017/mimalloc-override.vcxproj.filters @@ -73,5 +73,8 @@ Source Files + + Source Files + \ No newline at end of file diff --git a/ide/vs2017/mimalloc.vcxproj b/ide/vs2017/mimalloc.vcxproj index 484c4db8..01c6ad27 100644 --- a/ide/vs2017/mimalloc.vcxproj +++ b/ide/vs2017/mimalloc.vcxproj @@ -229,6 +229,7 @@ true + diff --git a/ide/vs2017/mimalloc.vcxproj.filters b/ide/vs2017/mimalloc.vcxproj.filters index 598b8643..5fe74aa0 100644 --- a/ide/vs2017/mimalloc.vcxproj.filters +++ b/ide/vs2017/mimalloc.vcxproj.filters @@ -56,6 +56,9 @@ Source Files + + Source Files + diff --git a/ide/vs2019/mimalloc-override.vcxproj b/ide/vs2019/mimalloc-override.vcxproj index 49f3d213..6ac6541d 100644 --- a/ide/vs2019/mimalloc-override.vcxproj +++ b/ide/vs2019/mimalloc-override.vcxproj @@ -247,6 +247,7 @@ true + diff --git a/ide/vs2019/mimalloc-override.vcxproj.filters b/ide/vs2019/mimalloc-override.vcxproj.filters index b2dea4e1..a8c5a5de 100644 --- a/ide/vs2019/mimalloc-override.vcxproj.filters +++ b/ide/vs2019/mimalloc-override.vcxproj.filters @@ -46,6 +46,9 @@ Source Files + + Source Files + diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index bae49bab..1860f26a 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -232,6 +232,7 @@ true + diff --git a/ide/vs2019/mimalloc.vcxproj.filters b/ide/vs2019/mimalloc.vcxproj.filters index 0cce0c4f..61de4afe 100644 --- a/ide/vs2019/mimalloc.vcxproj.filters +++ b/ide/vs2019/mimalloc.vcxproj.filters @@ -49,6 +49,9 @@ Source Files + + Source Files + diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 99e4b5ba..e648c1ff 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -42,12 +42,17 @@ void _mi_trace_message(const char* fmt, ...); void _mi_options_init(void); void _mi_fatal_error(const char* fmt, ...) mi_attr_noreturn; -// "init.c" +// random.c +void _mi_random_init(mi_random_ctx_t* ctx); +void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx); +uintptr_t _mi_random_next(mi_random_ctx_t* ctx); +uintptr_t _mi_heap_random_next(mi_heap_t* heap); +static inline uintptr_t _mi_random_shuffle(uintptr_t x); + +// init.c extern mi_stats_t _mi_stats_main; extern const mi_page_t _mi_page_empty; bool _mi_is_main_thread(void); -uintptr_t _mi_random_shuffle(uintptr_t x); -uintptr_t _mi_random_init(uintptr_t seed /* can be zero */); bool _mi_preloading(); // true while the C runtime is not ready // os.c @@ -100,7 +105,6 @@ uint8_t _mi_bsr(uintptr_t x); // bit-scan-right, used on BSD i // "heap.c" void _mi_heap_destroy_pages(mi_heap_t* heap); void _mi_heap_collect_abandon(mi_heap_t* heap); -uintptr_t _mi_heap_random(mi_heap_t* heap); void _mi_heap_set_default_direct(mi_heap_t* heap); // "stats.c" @@ -454,6 +458,29 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c #endif } +// ------------------------------------------------------------------- +// Fast "random" shuffle +// ------------------------------------------------------------------- + +static inline uintptr_t _mi_random_shuffle(uintptr_t x) { + mi_assert_internal(x!=0); +#if (MI_INTPTR_SIZE==8) + // by Sebastiano Vigna, see: + x ^= x >> 30; + x *= 0xbf58476d1ce4e5b9UL; + x ^= x >> 27; + x *= 0x94d049bb133111ebUL; + x ^= x >> 31; +#elif (MI_INTPTR_SIZE==4) + // by Chris Wellons, see: + x ^= x >> 16; + x *= 0x7feb352dUL; + x ^= x >> 15; + x *= 0x846ca68bUL; + x ^= x >> 16; +#endif + return x; +} // ------------------------------------------------------------------- // Optimize numa node access for the common case (= one node) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index f79c5a64..1360c125 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -76,6 +76,7 @@ terms of the MIT license. A copy of the license can be found in the file #endif #define MI_INTPTR_SIZE (1<random; - heap->random = _mi_random_shuffle(r); - return r; -} - mi_heap_t* mi_heap_new(void) { mi_heap_t* bheap = mi_heap_get_backing(); mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t); @@ -197,12 +191,16 @@ mi_heap_t* mi_heap_new(void) { memcpy(heap, &_mi_heap_empty, sizeof(mi_heap_t)); heap->tld = bheap->tld; heap->thread_id = _mi_thread_id(); - heap->cookie = ((uintptr_t)heap ^ _mi_heap_random(bheap)) | 1; - heap->random = _mi_heap_random(bheap); + _mi_random_split(&bheap->random, &heap->random); + heap->cookie = _mi_heap_random_next(heap) | 1; heap->no_reclaim = true; // don't reclaim abandoned pages or otherwise destroy is unsafe return heap; } +uintptr_t _mi_heap_random_next(mi_heap_t* heap) { + return _mi_random_next(&heap->random); +} + // zero out the page queues static void mi_heap_reset_pages(mi_heap_t* heap) { mi_assert_internal(mi_heap_is_initialized(heap)); diff --git a/src/init.c b/src/init.c index d8fff823..768bc2bf 100644 --- a/src/init.c +++ b/src/init.c @@ -85,7 +85,7 @@ const mi_heap_t _mi_heap_empty = { ATOMIC_VAR_INIT(NULL), 0, 0, - 0, + { {0}, {0}, 0 }, 0, false }; @@ -116,7 +116,7 @@ mi_heap_t _mi_heap_main = { #else 0xCDCDCDCDUL, #endif - 0, // random + { {0}, {0}, 0 }, // random 0, // page count false // can reclaim }; @@ -125,66 +125,6 @@ bool _mi_process_is_initialized = false; // set to `true` in `mi_process_init`. mi_stats_t _mi_stats_main = { MI_STATS_NULL }; -/* ----------------------------------------------------------- - Initialization of random numbers ------------------------------------------------------------ */ - -#if defined(_WIN32) -#include -#elif defined(__APPLE__) -#include -#else -#include -#endif - -uintptr_t _mi_random_shuffle(uintptr_t x) { - #if (MI_INTPTR_SIZE==8) - // by Sebastiano Vigna, see: - x ^= x >> 30; - x *= 0xbf58476d1ce4e5b9UL; - x ^= x >> 27; - x *= 0x94d049bb133111ebUL; - x ^= x >> 31; - #elif (MI_INTPTR_SIZE==4) - // by Chris Wellons, see: - x ^= x >> 16; - x *= 0x7feb352dUL; - x ^= x >> 15; - x *= 0x846ca68bUL; - x ^= x >> 16; - #endif - return x; -} - -uintptr_t _mi_random_init(uintptr_t seed /* can be zero */) { -#ifdef __wasi__ // no ASLR when using WebAssembly, and time granularity may be coarse - uintptr_t x; - arc4random_buf(&x, sizeof x); -#else - // Hopefully, ASLR makes our function address random - uintptr_t x = (uintptr_t)((void*)&_mi_random_init); - x ^= seed; - // xor with high res time -#if defined(_WIN32) - LARGE_INTEGER pcount; - QueryPerformanceCounter(&pcount); - x ^= (uintptr_t)(pcount.QuadPart); -#elif defined(__APPLE__) - x ^= (uintptr_t)mach_absolute_time(); -#else - struct timespec time; - clock_gettime(CLOCK_MONOTONIC, &time); - x ^= (uintptr_t)time.tv_sec; - x ^= (uintptr_t)time.tv_nsec; -#endif - // and do a few randomization steps - uintptr_t max = ((x ^ (x >> 17)) & 0x0F) + 1; - for (uintptr_t i = 0; i < max; i++) { - x = _mi_random_shuffle(x); - } -#endif - return x; -} /* ----------------------------------------------------------- Initialization and freeing of the thread local heaps @@ -214,8 +154,8 @@ static bool _mi_heap_init(void) { mi_heap_t* heap = &td->heap; memcpy(heap, &_mi_heap_empty, sizeof(*heap)); heap->thread_id = _mi_thread_id(); - heap->random = _mi_random_init(heap->thread_id); - heap->cookie = ((uintptr_t)heap ^ _mi_heap_random(heap)) | 1; + _mi_random_init(&heap->random); + heap->cookie = _mi_heap_random_next(heap) | 1; heap->tld = tld; memset(tld, 0, sizeof(*tld)); tld->heap_backing = heap; @@ -451,16 +391,15 @@ void mi_process_init(void) mi_attr_noexcept { // access _mi_heap_default before setting _mi_process_is_initialized to ensure // that the TLS slot is allocated without getting into recursion on macOS // when using dynamic linking with interpose. - mi_heap_t* h = mi_get_default_heap(); + mi_get_default_heap(); _mi_process_is_initialized = true; _mi_heap_main.thread_id = _mi_thread_id(); _mi_verbose_message("process init: 0x%zx\n", _mi_heap_main.thread_id); - uintptr_t random = _mi_random_init(_mi_heap_main.thread_id) ^ (uintptr_t)h; - #ifndef __APPLE__ - _mi_heap_main.cookie = (uintptr_t)&_mi_heap_main ^ random; + _mi_random_init(&_mi_heap_main.random); + #ifndef __APPLE__ // TODO: fix this? cannot update cookie if allocation already happened.. + _mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main); #endif - _mi_heap_main.random = _mi_random_shuffle(random); mi_process_setup_auto_thread_done(); _mi_os_init(); #if (MI_DEBUG) diff --git a/src/memory.c b/src/memory.c index 9505c98f..3d6a22f5 100644 --- a/src/memory.c +++ b/src/memory.c @@ -79,7 +79,7 @@ typedef union mi_region_info_u { struct { bool valid; bool is_large; - int numa_node; + short numa_node; }; } mi_region_info_t; diff --git a/src/os.c b/src/os.c index 6cf89c99..9da209ad 100644 --- a/src/os.c +++ b/src/os.c @@ -409,8 +409,8 @@ static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { if (hint == 0 || hint > ((intptr_t)30<<40)) { // try to wrap around after 30TiB (area after 32TiB is used for huge OS pages) intptr_t init = ((intptr_t)4 << 40); // start at 4TiB area #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode - uintptr_t r = _mi_random_init((uintptr_t)&mi_os_get_aligned_hint ^ hint); - init = init + (MI_SEGMENT_SIZE * ((r>>17) & 0xFFFF)); // (randomly 0-64k)*4MiB == 0 to 256GiB + uintptr_t r = _mi_heap_random_next(mi_get_default_heap()); + init = init + (MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)); // (randomly 20 bits)*4MiB == 0 to 4TiB #endif mi_atomic_cas_strong(mi_atomic_cast(uintptr_t, &aligned_base), init, hint + size); hint = mi_atomic_add(&aligned_base, size); // this may still give 0 or > 30TiB but that is ok, it is a hint after all @@ -909,8 +909,8 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { // Initialize the start address after the 32TiB area start = ((uintptr_t)32 << 40); // 32TiB virtual start address #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode - uintptr_t r = _mi_random_init((uintptr_t)&mi_os_claim_huge_pages); - start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF)); // (randomly 0-1024)*1GiB == 0 to 1TiB + uintptr_t r = _mi_heap_random_next(mi_get_default_heap()); + start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x0FFF)); // (randomly 12bits)*1GiB == between 0 to 4TiB #endif } end = start + size; diff --git a/src/page.c b/src/page.c index 2992bf09..471dca97 100644 --- a/src/page.c +++ b/src/page.c @@ -475,11 +475,12 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co // and initialize the free list by randomly threading through them // set up first element - size_t current = _mi_heap_random(heap) % slice_count; + const uintptr_t r = _mi_heap_random_next(heap); + size_t current = r % slice_count; counts[current]--; mi_block_t* const free_start = blocks[current]; - // and iterate through the rest - uintptr_t rnd = heap->random; + // and iterate through the rest; use `random_shuffle` for performance + uintptr_t rnd = _mi_random_shuffle(r); for (size_t i = 1; i < extend; i++) { // call random_shuffle only every INTPTR_SIZE rounds const size_t round = i%MI_INTPTR_SIZE; @@ -499,8 +500,7 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co } // prepend to the free list (usually NULL) mi_block_set_next(page, blocks[current], page->free); // end of the list - page->free = free_start; - heap->random = _mi_random_shuffle(rnd); + page->free = free_start; } static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t extend, mi_stats_t* const stats) @@ -608,7 +608,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(page_size / block_size < (1L<<16)); page->reserved = (uint16_t)(page_size / block_size); #ifdef MI_ENCODE_FREELIST - page->cookie = _mi_heap_random(heap) | 1; + page->cookie = _mi_heap_random_next(heap) | 1; #endif page->is_zero = page->is_zero_init; @@ -710,7 +710,7 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) { mi_page_queue_t* pq = mi_page_queue(heap,size); mi_page_t* page = pq->first; if (page != NULL) { - if ((MI_SECURE >= 3) && page->capacity < page->reserved && ((_mi_heap_random(heap) & 1) == 1)) { + if ((MI_SECURE >= 3) && page->capacity < page->reserved && ((_mi_heap_random_next(heap) & 1) == 1)) { // in secure mode, we extend half the time to increase randomness mi_page_extend_free(heap, page, heap->tld); mi_assert_internal(mi_page_immediate_available(page)); diff --git a/src/random.c b/src/random.c new file mode 100644 index 00000000..063633ff --- /dev/null +++ b/src/random.c @@ -0,0 +1,290 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2019, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#include "mimalloc.h" +#include "mimalloc-internal.h" + +#include // memset + +/* ---------------------------------------------------------------------------- +We use our own PRNG to keep predictable performance of random number generation +and to avoid implementations that use a lock. We only use the OS provided +random source to initialize the initial seeds. Since we do not need ultimate +performance but we do rely on the security (for secret cookies in secure mode) +we use a cryptographically secure generator (chacha20). +-----------------------------------------------------------------------------*/ + +#define MI_CHACHA_ROUNDS (20) // perhaps use 12 for better performance? + + +/* ---------------------------------------------------------------------------- +Chacha20 implementation as the original algorithm with a 64-bit nonce +and counter: https://en.wikipedia.org/wiki/Salsa20 +The input matrix has sixteen 32-bit values: +Position 0 to 3: constant key +Position 4 to 11: the key +Position 12 to 13: the counter. +Position 14 to 15: the nonce. + +The implementation uses regular C code which compiles very well on modern compilers. +(gcc x64 has no register spills, and clang 6+ uses SSE instructions) +-----------------------------------------------------------------------------*/ + +static inline uint32_t rotl(uint32_t x, uint32_t shift) { + return (x << shift) | (x >> (32 - shift)); +} + +static inline void qround(uint32_t x[16], size_t a, size_t b, size_t c, size_t d) { + x[a] += x[b]; x[d] = rotl(x[d] ^ x[a], 16); + x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 12); + x[a] += x[b]; x[d] = rotl(x[d] ^ x[a], 8); + x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 7); +} + +static void chacha_block(mi_random_ctx_t* r) +{ + // scramble into `x` + uint32_t x[16]; + for (size_t i = 0; i < 16; i++) { + x[i] = r->input[i]; + } + for (size_t i = 0; i < MI_CHACHA_ROUNDS; i += 2) { + qround(x, 0, 4, 8, 12); + qround(x, 1, 5, 9, 13); + qround(x, 2, 6, 10, 14); + qround(x, 3, 7, 11, 15); + qround(x, 0, 5, 10, 15); + qround(x, 1, 6, 11, 12); + qround(x, 2, 7, 8, 13); + qround(x, 3, 4, 9, 14); + } + + // add scrambled data to the initial state + for (size_t i = 0; i < 16; i++) { + r->output[i] = x[i] + r->input[i]; + } + r->output_available = 16; + + // increment the counter for the next round + r->input[12] += 1; + if (r->input[12] == 0) { + r->input[13] += 1; + if (r->input[13] == 0) { // and keep increasing into the nonce + r->input[14] += 1; + } + } +} + +static uint32_t chacha_next32(mi_random_ctx_t* r) { + if (r->output_available <= 0) { + chacha_block(r); + r->output_available = 16; // (assign again to suppress static analysis warning) + } + r->output_available--; + const uint32_t x = r->output[r->output_available]; + r->output[r->output_available] = 0; // reset once the data is handed out + return x; +} + +static inline uint32_t read32(const uint8_t* p, size_t idx32) { + const size_t i = 4*idx32; + return ((uint32_t)p[i+0] | (uint32_t)p[i+1] << 8 | (uint32_t)p[i+2] << 16 | (uint32_t)p[i+3] << 24); +} + +static void chacha_init(mi_random_ctx_t* r, const uint8_t key[32], uint64_t nonce) +{ + // since we only use chacha for randomness (and not encryption) we + // do not _need_ to read 32-bit values as little endian but we do anyways + // just for being compatible :-) + memset(r, 0, sizeof(*r)); + for (size_t i = 0; i < 4; i++) { + const uint8_t* sigma = (uint8_t*)"expand 32-byte k"; + r->input[i] = read32(sigma,i); + } + for (size_t i = 0; i < 8; i++) { + r->input[i + 4] = read32(key,i); + } + r->input[12] = 0; + r->input[13] = 0; + r->input[14] = (uint32_t)nonce; + r->input[15] = (uint32_t)(nonce >> 32); +} + +static void chacha_split(mi_random_ctx_t* r, uint64_t nonce, mi_random_ctx_t* init) { + memset(init, 0, sizeof(*init)); + memcpy(init->input, r->input, sizeof(init->input)); + init->input[12] = 0; + init->input[13] = 0; + init->input[14] = (uint32_t)nonce; + init->input[15] = (uint32_t)(nonce >> 32); + mi_assert_internal(r->input[14] != init->input[14] || r->input[15] != init->input[15]); // do not reuse nonces! + chacha_block(init); +} + + +/* ---------------------------------------------------------------------------- +Random interface +-----------------------------------------------------------------------------*/ + +#if MI_DEBUG>1 +static bool mi_random_is_initialized(mi_random_ctx_t* ctx) { + return (ctx != NULL && ctx->input[0] != 0); +} +#endif + +void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx) { + mi_assert_internal(mi_random_is_initialized(ctx)); + mi_assert_internal(ctx != new_ctx); + chacha_split(ctx, (uintptr_t)new_ctx /*nonce*/, new_ctx); +} + +uintptr_t _mi_random_next(mi_random_ctx_t* ctx) { + mi_assert_internal(mi_random_is_initialized(ctx)); + #if MI_INTPTR_SIZE <= 4 + return chacha_next32(ctx); + #elif MI_INTPTR_SIZE == 8 + return (((uintptr_t)chacha_next32(ctx) << 32) | chacha_next32(ctx)); + #else + # error "define mi_random_next for this platform" + #endif +} + + +/* ---------------------------------------------------------------------------- +To initialize a fresh random context we rely on the OS: +- windows: BCryptGenRandom +- bsd,wasi: arc4random_buf +- linux: getrandom +If we cannot get good randomness, we fall back to weak randomness based on a timer and ASLR. +-----------------------------------------------------------------------------*/ + +#if defined(_WIN32) +#pragma comment (lib,"bcrypt.lib") +#include +static bool os_random_buf(void* buf, size_t buf_len) { + return (BCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0); +} +/* +#define SystemFunction036 NTAPI SystemFunction036 +#include +#undef SystemFunction036 +static bool os_random_buf(void* buf, size_t buf_len) { + RtlGenRandom(buf, (ULONG)buf_len); + return true; +} +*/ +#elif defined(ANDROID) || defined(XP_DARWIN) || defined(__DragonFly__) || \ + defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ + defined(__wasi__) +#include +static bool os_random_buf(void* buf, size_t buf_len) { + arc4random_buf(buf, buf_len); + return true; +} +#elif defined(__linux__) +#include +static bool os_random_buf(void* buf, size_t buf_len) { + return (getrandom(buf, buf_len, GRND_NONBLOCK) == (ssize_t)buf_len); +} +#else +static bool os_random_buf(void* buf, size_t buf_len) { + return false; +} +#endif + +#if defined(_WIN32) +#include +#elif defined(__APPLE__) +#include +#else +#include +#endif + +static uintptr_t os_random_weak(uintptr_t extra_seed) { + uintptr_t x = (uintptr_t)&os_random_weak ^ extra_seed; // ASLR makes the address random + #if defined(_WIN32) + LARGE_INTEGER pcount; + QueryPerformanceCounter(&pcount); + x ^= (uintptr_t)(pcount.QuadPart); + #elif defined(__APPLE__) + x ^= (uintptr_t)mach_absolute_time(); + #else + struct timespec time; + clock_gettime(CLOCK_MONOTONIC, &time); + x ^= (uintptr_t)time.tv_sec; + x ^= (uintptr_t)time.tv_nsec; + #endif + // and do a few randomization steps + uintptr_t max = ((x ^ (x >> 17)) & 0x0F) + 1; + for (uintptr_t i = 0; i < max; i++) { + x = _mi_random_shuffle(x); + } + mi_assert_internal(x != 0); + return x; +} + +void _mi_random_init(mi_random_ctx_t* ctx) { + uint8_t key[32]; + if (!os_random_buf(key, sizeof(key))) { + // if we fail to get random data from the OS, we fall back to a + // weak random source based on the current time + uintptr_t x = os_random_weak(0); + for (size_t i = 0; i < 8; i++) { // key is eight 32-bit words. + _mi_warning_message("unable to use secure randomness\n"); + x = _mi_random_shuffle(x); + ((uint32_t*)key)[i] = (uint32_t)x; + } + } + chacha_init(ctx, key, (uintptr_t)ctx /*nonce*/ ); +} + +/* -------------------------------------------------------- +test vectors from +----------------------------------------------------------- */ +/* +static bool array_equals(uint32_t* x, uint32_t* y, size_t n) { + for (size_t i = 0; i < n; i++) { + if (x[i] != y[i]) return false; + } + return true; +} +static void chacha_test(void) +{ + uint32_t x[4] = { 0x11111111, 0x01020304, 0x9b8d6f43, 0x01234567 }; + uint32_t x_out[4] = { 0xea2a92f4, 0xcb1cf8ce, 0x4581472e, 0x5881c4bb }; + qround(x, 0, 1, 2, 3); + mi_assert_internal(array_equals(x, x_out, 4)); + + uint32_t y[16] = { + 0x879531e0, 0xc5ecf37d, 0x516461b1, 0xc9a62f8a, + 0x44c20ef3, 0x3390af7f, 0xd9fc690b, 0x2a5f714c, + 0x53372767, 0xb00a5631, 0x974c541a, 0x359e9963, + 0x5c971061, 0x3d631689, 0x2098d9d6, 0x91dbd320 }; + uint32_t y_out[16] = { + 0x879531e0, 0xc5ecf37d, 0xbdb886dc, 0xc9a62f8a, + 0x44c20ef3, 0x3390af7f, 0xd9fc690b, 0xcfacafd2, + 0xe46bea80, 0xb00a5631, 0x974c541a, 0x359e9963, + 0x5c971061, 0xccc07c79, 0x2098d9d6, 0x91dbd320 }; + qround(y, 2, 7, 8, 13); + mi_assert_internal(array_equals(y, y_out, 16)); + + mi_random_ctx_t r = { + { 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574, + 0x03020100, 0x07060504, 0x0b0a0908, 0x0f0e0d0c, + 0x13121110, 0x17161514, 0x1b1a1918, 0x1f1e1d1c, + 0x00000001, 0x09000000, 0x4a000000, 0x00000000 }, + {0}, + 0 + }; + uint32_t r_out[16] = { + 0xe4e7f110, 0x15593bd1, 0x1fdd0f50, 0xc47120a3, + 0xc7f4d1c7, 0x0368c033, 0x9aaa2204, 0x4e6cd4c3, + 0x466482d2, 0x09aa9f07, 0x05d7c214, 0xa2028bd9, + 0xd19c12b5, 0xb94e16de, 0xe883d0cb, 0x4e3c50a2 }; + chacha_block(&r); + mi_assert_internal(array_equals(r.output, r_out, 16)); +} +*/ \ No newline at end of file diff --git a/src/static.c b/src/static.c index d31fca8f..0519453e 100644 --- a/src/static.c +++ b/src/static.c @@ -14,6 +14,7 @@ terms of the MIT license. A copy of the license can be found in the file // it will override all the standard library allocation // functions (on Unix's). #include "stats.c" +#include "random.c" #include "os.c" #include "arena.c" #include "memory.c" From ce02986d56cb69dd2f2d2b1a5c25260338665957 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 27 Dec 2019 22:30:23 -0800 Subject: [PATCH 2/5] variable renaming --- src/random.c | 72 ++++++++++++++++++++++++++-------------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/src/random.c b/src/random.c index 063633ff..43e7dd5c 100644 --- a/src/random.c +++ b/src/random.c @@ -44,12 +44,12 @@ static inline void qround(uint32_t x[16], size_t a, size_t b, size_t c, size_t d x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 7); } -static void chacha_block(mi_random_ctx_t* r) +static void chacha_block(mi_random_ctx_t* ctx) { // scramble into `x` uint32_t x[16]; for (size_t i = 0; i < 16; i++) { - x[i] = r->input[i]; + x[i] = ctx->input[i]; } for (size_t i = 0; i < MI_CHACHA_ROUNDS; i += 2) { qround(x, 0, 4, 8, 12); @@ -64,28 +64,28 @@ static void chacha_block(mi_random_ctx_t* r) // add scrambled data to the initial state for (size_t i = 0; i < 16; i++) { - r->output[i] = x[i] + r->input[i]; + ctx->output[i] = x[i] + ctx->input[i]; } - r->output_available = 16; + ctx->output_available = 16; // increment the counter for the next round - r->input[12] += 1; - if (r->input[12] == 0) { - r->input[13] += 1; - if (r->input[13] == 0) { // and keep increasing into the nonce - r->input[14] += 1; + ctx->input[12] += 1; + if (ctx->input[12] == 0) { + ctx->input[13] += 1; + if (ctx->input[13] == 0) { // and keep increasing into the nonce + ctx->input[14] += 1; } } } -static uint32_t chacha_next32(mi_random_ctx_t* r) { - if (r->output_available <= 0) { - chacha_block(r); - r->output_available = 16; // (assign again to suppress static analysis warning) +static uint32_t chacha_next32(mi_random_ctx_t* ctx) { + if (ctx->output_available <= 0) { + chacha_block(ctx); + ctx->output_available = 16; // (assign again to suppress static analysis warning) } - r->output_available--; - const uint32_t x = r->output[r->output_available]; - r->output[r->output_available] = 0; // reset once the data is handed out + const uint32_t x = ctx->output[16 - ctx->output_available]; + ctx->output[16 - ctx->output_available] = 0; // reset once the data is handed out + ctx->output_available--; return x; } @@ -94,34 +94,34 @@ static inline uint32_t read32(const uint8_t* p, size_t idx32) { return ((uint32_t)p[i+0] | (uint32_t)p[i+1] << 8 | (uint32_t)p[i+2] << 16 | (uint32_t)p[i+3] << 24); } -static void chacha_init(mi_random_ctx_t* r, const uint8_t key[32], uint64_t nonce) +static void chacha_init(mi_random_ctx_t* ctx, const uint8_t key[32], uint64_t nonce) { // since we only use chacha for randomness (and not encryption) we // do not _need_ to read 32-bit values as little endian but we do anyways // just for being compatible :-) - memset(r, 0, sizeof(*r)); + memset(ctx, 0, sizeof(*ctx)); for (size_t i = 0; i < 4; i++) { const uint8_t* sigma = (uint8_t*)"expand 32-byte k"; - r->input[i] = read32(sigma,i); + ctx->input[i] = read32(sigma,i); } for (size_t i = 0; i < 8; i++) { - r->input[i + 4] = read32(key,i); + ctx->input[i + 4] = read32(key,i); } - r->input[12] = 0; - r->input[13] = 0; - r->input[14] = (uint32_t)nonce; - r->input[15] = (uint32_t)(nonce >> 32); + ctx->input[12] = 0; + ctx->input[13] = 0; + ctx->input[14] = (uint32_t)nonce; + ctx->input[15] = (uint32_t)(nonce >> 32); } -static void chacha_split(mi_random_ctx_t* r, uint64_t nonce, mi_random_ctx_t* init) { - memset(init, 0, sizeof(*init)); - memcpy(init->input, r->input, sizeof(init->input)); - init->input[12] = 0; - init->input[13] = 0; - init->input[14] = (uint32_t)nonce; - init->input[15] = (uint32_t)(nonce >> 32); - mi_assert_internal(r->input[14] != init->input[14] || r->input[15] != init->input[15]); // do not reuse nonces! - chacha_block(init); +static void chacha_split(mi_random_ctx_t* ctx, uint64_t nonce, mi_random_ctx_t* ctx_new) { + memset(ctx_new, 0, sizeof(*ctx_new)); + memcpy(ctx_new->input, ctx->input, sizeof(ctx_new->input)); + ctx_new->input[12] = 0; + ctx_new->input[13] = 0; + ctx_new->input[14] = (uint32_t)nonce; + ctx_new->input[15] = (uint32_t)(nonce >> 32); + mi_assert_internal(ctx->input[14] != ctx_new->input[14] || ctx->input[15] != ctx_new->input[15]); // do not reuse nonces! + chacha_block(ctx_new); } @@ -135,10 +135,10 @@ static bool mi_random_is_initialized(mi_random_ctx_t* ctx) { } #endif -void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx) { +void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* ctx_new) { mi_assert_internal(mi_random_is_initialized(ctx)); - mi_assert_internal(ctx != new_ctx); - chacha_split(ctx, (uintptr_t)new_ctx /*nonce*/, new_ctx); + mi_assert_internal(ctx != ctx_new); + chacha_split(ctx, (uintptr_t)ctx_new /*nonce*/, ctx_new); } uintptr_t _mi_random_next(mi_random_ctx_t* ctx) { From e3391d9a53c66f922c6e0ac12df4723701a05110 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 27 Dec 2019 23:33:50 -0800 Subject: [PATCH 3/5] stronger encoding of free lists using two keys per page --- include/mimalloc-internal.h | 58 +++++++++++++++++++++++++------------ include/mimalloc-types.h | 17 ++++++----- src/alloc.c | 8 ++--- src/heap.c | 2 ++ src/init.c | 30 ++++++++++++------- src/page.c | 14 ++++----- src/random.c | 2 +- src/segment.c | 2 +- 8 files changed, 83 insertions(+), 50 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index e648c1ff..cdaac963 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -392,12 +392,28 @@ static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) { } -// ------------------------------------------------------------------- -// Encoding/Decoding the free list next pointers -// Note: we pass a `null` value to be used as the `NULL` value for the -// end of a free list. This is to prevent the cookie itself to ever -// be present among user blocks (as `cookie^0==cookie`). -// ------------------------------------------------------------------- +/* ------------------------------------------------------------------- +Encoding/Decoding the free list next pointers + +This is to protect against buffer overflow exploits where the +free list is mutated. Many hardened allocators xor the next pointer `p` +with a secret key `k1`, as `p^k1`, but if the attacker can guess +the pointer `p` this can reveal `k1` (since `p^k1^p == k1`). +Moreover, if multiple blocks can be read, the attacker can +xor both as `(p1^k1) ^ (p2^k1) == p1^p2` which may reveal a lot +about the pointers (and subsequently `k1`). + +Instead mimalloc uses an extra key `k2` and encode as `rotl(p+k2,13)^k1`. +Since these operations are not associative, the above approaches do not +work so well any more even if the `p` can be guesstimated. (We include +the rotation since xor and addition are otherwise linear in the lowest bit) +Both keys are unique per page. + +We also pass a separate `null` value to be used as `NULL` or otherwise +`rotl(k2,13)^k1` would appear (too) often as a sentinel value. +------------------------------------------------------------------- */ + +#define MI_ENCODE_ROTATE_BITS (13) static inline bool mi_is_in_same_segment(const void* p, const void* q) { return (_mi_ptr_segment(p) == _mi_ptr_segment(q)); @@ -412,49 +428,55 @@ static inline bool mi_is_in_same_page(const void* p, const void* q) { return (idxp == idxq); } -static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, uintptr_t cookie ) { +static inline uintptr_t mi_rotl(uintptr_t x, uintptr_t shift) { + return ((x << shift) | (x >> (MI_INTPTR_BITS - shift))); +} +static inline uintptr_t mi_rotr(uintptr_t x, uintptr_t shift) { + return ((x >> shift) | (x << (MI_INTPTR_BITS - shift))); +} +static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, uintptr_t key1, uintptr_t key2 ) { #ifdef MI_ENCODE_FREELIST - mi_block_t* b = (mi_block_t*)(block->next ^ cookie); + mi_block_t* b = (mi_block_t*)(mi_rotr(block->next ^ key1, MI_ENCODE_ROTATE_BITS) - key2); if (mi_unlikely((void*)b==null)) { b = NULL; } return b; #else - UNUSED(cookie); UNUSED(null); + UNUSED(key1); UNUSED(key2); UNUSED(null); return (mi_block_t*)block->next; #endif } -static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, uintptr_t cookie) { +static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, uintptr_t key1, uintptr_t key2) { #ifdef MI_ENCODE_FREELIST if (mi_unlikely(next==NULL)) { next = (mi_block_t*)null; } - block->next = (mi_encoded_t)next ^ cookie; + block->next = mi_rotl((mi_encoded_t)next + key2, MI_ENCODE_ROTATE_BITS) ^ key1; #else - UNUSED(cookie); UNUSED(null); + UNUSED(key1); UNUSED(key2); UNUSED(null); block->next = (mi_encoded_t)next; #endif } static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* block) { #ifdef MI_ENCODE_FREELIST - mi_block_t* next = mi_block_nextx(page,block,page->cookie); - // check for free list corruption: is `next` at least in our segment range? + mi_block_t* next = mi_block_nextx(page,block,page->key[0],page->key[1]); + // check for free list corruption: is `next` at least in the same page? // TODO: check if `next` is `page->block_size` aligned? - if (next!=NULL && !mi_is_in_same_page(block, next)) { + if (mi_unlikely(next!=NULL && !mi_is_in_same_page(block, next))) { _mi_fatal_error("corrupted free list entry of size %zub at %p: value 0x%zx\n", page->block_size, block, (uintptr_t)next); next = NULL; } return next; #else UNUSED(page); - return mi_block_nextx(page,block,0); + return mi_block_nextx(page,block,0,0); #endif } static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, const mi_block_t* next) { #ifdef MI_ENCODE_FREELIST - mi_block_set_nextx(page,block,next, page->cookie); + mi_block_set_nextx(page,block,next, page->key[0], page->key[1]); #else UNUSED(page); - mi_block_set_nextx(page,block, next,0); + mi_block_set_nextx(page,block, next,0,0); #endif } diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 1360c125..ab7d7c53 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -191,7 +191,7 @@ typedef struct mi_page_s { mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) #ifdef MI_ENCODE_FREELIST - uintptr_t cookie; // random cookie to encode the free lists + uintptr_t key[2]; // two random keys to encode the free lists (see `_mi_block_next`) #endif size_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`) @@ -206,9 +206,9 @@ typedef struct mi_page_s { struct mi_page_s* prev; // previous page owned by this thread with the same `block_size` // improve page index calculation - // without padding: 10 words on 64-bit, 11 on 32-bit. Secure adds one word - #if (MI_INTPTR_SIZE==8 && defined(MI_ENCODE_FREELIST)) || (MI_INTPTR_SIZE==4 && !defined(MI_ENCODE_FREELIST)) - void* padding[1]; // 12 words on 64-bit with cookie, 12 words on 32-bit plain + // without padding: 10 words on 64-bit, 11 on 32-bit. Secure adds two words + #if (MI_INTPTR_SIZE==4) + void* padding[1]; // 12/14 words on 32-bit plain #endif } mi_page_t; @@ -239,8 +239,8 @@ typedef struct mi_segment_s { size_t capacity; // count of available pages (`#free + used`) size_t segment_size;// for huge pages this may be different from `MI_SEGMENT_SIZE` size_t segment_info_size; // space we are using from the first page for segment meta-data and possible guard pages. - uintptr_t cookie; // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie` - + uintptr_t cookie; // verify addresses in secure mode: `_mi_ptr_cookie(segment) == segment->cookie` + // layout like this to optimize access in `mi_free` size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`). volatile _Atomic(uintptr_t) thread_id; // unique id of the thread owning this segment @@ -289,8 +289,9 @@ struct mi_heap_s { mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin") volatile _Atomic(mi_block_t*) thread_delayed_free; uintptr_t thread_id; // thread this heap belongs too - uintptr_t cookie; - mi_random_ctx_t random; // random number used for secure allocation + uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`) + uintptr_t key[2]; // twb random keys used to encode the `thread_delayed_free` list + mi_random_ctx_t random; // random number context used for secure allocation size_t page_count; // total number of pages in the `pages` queues. bool no_reclaim; // `true` if this heap should not reclaim abandoned pages }; diff --git a/src/alloc.c b/src/alloc.c index e68b48d2..714acc76 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -157,7 +157,7 @@ static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, con } static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) { - mi_block_t* n = mi_block_nextx(page, block, page->cookie); // pretend it is freed, and get the decoded first field + mi_block_t* n = mi_block_nextx(page, block, page->key[0], page->key[1]); // pretend it is freed, and get the decoded first field if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer? (n==NULL || mi_is_in_same_segment(block, n))) // quick check: in same segment or NULL? { @@ -242,7 +242,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc mi_block_t* dfree; do { dfree = (mi_block_t*)heap->thread_delayed_free; - mi_block_set_nextx(heap,block,dfree, heap->cookie); + mi_block_set_nextx(heap,block,dfree, heap->key[0], heap->key[1]); } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), block, dfree)); } @@ -266,7 +266,7 @@ static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block // and push it on the free list if (mi_likely(local)) { // owning thread can free a block directly - if (mi_check_is_double_free(page, block)) return; + if (mi_unlikely(mi_check_is_double_free(page, block))) return; mi_block_set_next(page, block, page->local_free); page->local_free = block; page->used--; @@ -341,7 +341,7 @@ void mi_free(void* p) mi_attr_noexcept if (mi_likely(tid == segment->thread_id && page->flags.full_aligned == 0)) { // the thread id matches and it is not a full page, nor has aligned blocks // local, and not full or aligned mi_block_t* block = (mi_block_t*)p; - if (mi_check_is_double_free(page,block)) return; + if (mi_unlikely(mi_check_is_double_free(page,block))) return; mi_block_set_next(page, block, page->local_free); page->local_free = block; page->used--; diff --git a/src/heap.c b/src/heap.c index 6d6948df..f90c4624 100644 --- a/src/heap.c +++ b/src/heap.c @@ -193,6 +193,8 @@ mi_heap_t* mi_heap_new(void) { heap->thread_id = _mi_thread_id(); _mi_random_split(&bheap->random, &heap->random); heap->cookie = _mi_heap_random_next(heap) | 1; + heap->key[0] = _mi_heap_random_next(heap); + heap->key[1] = _mi_heap_random_next(heap); heap->no_reclaim = true; // don't reclaim abandoned pages or otherwise destroy is unsafe return heap; } diff --git a/src/init.c b/src/init.c index 768bc2bf..cadcd2a3 100644 --- a/src/init.c +++ b/src/init.c @@ -16,13 +16,13 @@ const mi_page_t _mi_page_empty = { { 0 }, false, NULL, // free #if MI_ENCODE_FREELIST - 0, + { 0, 0 }, #endif 0, // used NULL, ATOMIC_VAR_INIT(0), ATOMIC_VAR_INIT(0), 0, NULL, NULL, NULL - #if (MI_INTPTR_SIZE==8 && defined(MI_ENCODE_FREELIST)) || (MI_INTPTR_SIZE==4 && !defined(MI_ENCODE_FREELIST)) + #if (MI_INTPTR_SIZE==4) , { NULL } // padding #endif }; @@ -83,8 +83,9 @@ const mi_heap_t _mi_heap_empty = { MI_SMALL_PAGES_EMPTY, MI_PAGE_QUEUES_EMPTY, ATOMIC_VAR_INIT(NULL), - 0, - 0, + 0, // tid + 0, // cookie + { 0, 0 }, // keys { {0}, {0}, 0 }, 0, false @@ -105,18 +106,21 @@ static mi_tld_t tld_main = { { MI_STATS_NULL } // stats }; +#if MI_INTPTR_SIZE==8 +#define MI_INIT_COOKIE (0xCDCDCDCDCDCDCDCDUL) +#else +#define MI_INIT_COOKIE (0xCDCDCDCDUL) +#endif + mi_heap_t _mi_heap_main = { &tld_main, MI_SMALL_PAGES_EMPTY, MI_PAGE_QUEUES_EMPTY, NULL, - 0, // thread id -#if MI_INTPTR_SIZE==8 // the cookie of the main heap can be fixed (unlike page cookies that need to be secure!) - 0xCDCDCDCDCDCDCDCDUL, -#else - 0xCDCDCDCDUL, -#endif - { {0}, {0}, 0 }, // random + 0, // thread id + MI_INIT_COOKIE, // initial cookie + { MI_INIT_COOKIE, MI_INIT_COOKIE }, // the key of the main heap can be fixed (unlike page keys that need to be secure!) + { {0}, {0}, 0 }, // random 0, // page count false // can reclaim }; @@ -156,6 +160,8 @@ static bool _mi_heap_init(void) { heap->thread_id = _mi_thread_id(); _mi_random_init(&heap->random); heap->cookie = _mi_heap_random_next(heap) | 1; + heap->key[0] = _mi_heap_random_next(heap); + heap->key[1] = _mi_heap_random_next(heap); heap->tld = tld; memset(tld, 0, sizeof(*tld)); tld->heap_backing = heap; @@ -399,6 +405,8 @@ void mi_process_init(void) mi_attr_noexcept { _mi_random_init(&_mi_heap_main.random); #ifndef __APPLE__ // TODO: fix this? cannot update cookie if allocation already happened.. _mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main); + _mi_heap_main.key[0] = _mi_heap_random_next(&_mi_heap_main); + _mi_heap_main.key[1] = _mi_heap_random_next(&_mi_heap_main); #endif mi_process_setup_auto_thread_done(); _mi_os_init(); diff --git a/src/page.c b/src/page.c index 471dca97..901fbda1 100644 --- a/src/page.c +++ b/src/page.c @@ -103,7 +103,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) { bool _mi_page_is_valid(mi_page_t* page) { mi_assert_internal(mi_page_is_valid_init(page)); #if MI_SECURE - mi_assert_internal(page->cookie != 0); + mi_assert_internal(page->key != 0); #endif if (page->heap!=NULL) { mi_segment_t* segment = _mi_page_segment(page); @@ -284,7 +284,7 @@ void _mi_heap_delayed_free(mi_heap_t* heap) { // and free them all while(block != NULL) { - mi_block_t* next = mi_block_nextx(heap,block, heap->cookie); + mi_block_t* next = mi_block_nextx(heap,block, heap->key[0], heap->key[1]); // use internal free instead of regular one to keep stats etc correct if (!_mi_free_delayed_block(block)) { // we might already start delayed freeing while another thread has not yet @@ -292,9 +292,8 @@ void _mi_heap_delayed_free(mi_heap_t* heap) { mi_block_t* dfree; do { dfree = (mi_block_t*)heap->thread_delayed_free; - mi_block_set_nextx(heap, block, dfree, heap->cookie); + mi_block_set_nextx(heap, block, dfree, heap->key[0], heap->key[1]); } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), block, dfree)); - } block = next; } @@ -357,7 +356,7 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) { #if MI_DEBUG>1 // check there are no references left.. - for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->cookie)) { + for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->key[0], pheap->key[1])) { mi_assert_internal(_mi_ptr_page(block) != page); } #endif @@ -608,7 +607,8 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(page_size / block_size < (1L<<16)); page->reserved = (uint16_t)(page_size / block_size); #ifdef MI_ENCODE_FREELIST - page->cookie = _mi_heap_random_next(heap) | 1; + page->key[0] = _mi_heap_random_next(heap); + page->key[1] = _mi_heap_random_next(heap); #endif page->is_zero = page->is_zero_init; @@ -621,7 +621,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(page->prev == NULL); mi_assert_internal(!mi_page_has_aligned(page)); #if (MI_ENCODE_FREELIST) - mi_assert_internal(page->cookie != 0); + mi_assert_internal(page->key != 0); #endif mi_assert_expensive(mi_page_is_valid_init(page)); diff --git a/src/random.c b/src/random.c index 43e7dd5c..af6cd876 100644 --- a/src/random.c +++ b/src/random.c @@ -231,9 +231,9 @@ void _mi_random_init(mi_random_ctx_t* ctx) { if (!os_random_buf(key, sizeof(key))) { // if we fail to get random data from the OS, we fall back to a // weak random source based on the current time + _mi_warning_message("unable to use secure randomness\n"); uintptr_t x = os_random_weak(0); for (size_t i = 0; i < 8; i++) { // key is eight 32-bit words. - _mi_warning_message("unable to use secure randomness\n"); x = _mi_random_shuffle(x); ((uint32_t*)key)[i] = (uint32_t)x; } diff --git a/src/segment.c b/src/segment.c index f6ce939b..bbe88f82 100644 --- a/src/segment.c +++ b/src/segment.c @@ -520,7 +520,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, segment->segment_size = segment_size; segment->segment_info_size = pre_size; segment->thread_id = _mi_thread_id(); - segment->cookie = _mi_ptr_cookie(segment); + segment->cookie = _mi_ptr_cookie(segment); // _mi_stat_increase(&tld->stats->page_committed, segment->segment_info_size); // set protection From 77134e1ad072aa3bf3fd5e225f58ae88b48db589 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 28 Dec 2019 15:17:49 -0800 Subject: [PATCH 4/5] update free list encoding to stronger formula with addition last --- include/mimalloc-internal.h | 29 +++++++++++++++++------------ src/page.c | 2 +- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index cdaac963..d41dfadc 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -397,24 +397,26 @@ Encoding/Decoding the free list next pointers This is to protect against buffer overflow exploits where the free list is mutated. Many hardened allocators xor the next pointer `p` -with a secret key `k1`, as `p^k1`, but if the attacker can guess +with a secret key `k1`, as `p^k1`. This prevents overwriting with known +values but might be still too weak: if the attacker can guess the pointer `p` this can reveal `k1` (since `p^k1^p == k1`). -Moreover, if multiple blocks can be read, the attacker can +Moreover, if multiple blocks can be read as well, the attacker can xor both as `(p1^k1) ^ (p2^k1) == p1^p2` which may reveal a lot about the pointers (and subsequently `k1`). -Instead mimalloc uses an extra key `k2` and encode as `rotl(p+k2,13)^k1`. +Instead mimalloc uses an extra key `k2` and encodes as `((p^k2)<<> (MI_INTPTR_BITS - shift))); } static inline uintptr_t mi_rotr(uintptr_t x, uintptr_t shift) { + shift %= MI_INTPTR_BITS; return ((x >> shift) | (x << (MI_INTPTR_BITS - shift))); } + static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, uintptr_t key1, uintptr_t key2 ) { #ifdef MI_ENCODE_FREELIST - mi_block_t* b = (mi_block_t*)(mi_rotr(block->next ^ key1, MI_ENCODE_ROTATE_BITS) - key2); + mi_block_t* b = (mi_block_t*)(mi_rotr(block->next - key1, key1) ^ key2); if (mi_unlikely((void*)b==null)) { b = NULL; } return b; #else @@ -448,7 +453,7 @@ static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* bl static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, uintptr_t key1, uintptr_t key2) { #ifdef MI_ENCODE_FREELIST if (mi_unlikely(next==NULL)) { next = (mi_block_t*)null; } - block->next = mi_rotl((mi_encoded_t)next + key2, MI_ENCODE_ROTATE_BITS) ^ key1; + block->next = mi_rotl((uintptr_t)next ^ key2, key1) + key1; #else UNUSED(key1); UNUSED(key2); UNUSED(null); block->next = (mi_encoded_t)next; @@ -485,7 +490,7 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c // ------------------------------------------------------------------- static inline uintptr_t _mi_random_shuffle(uintptr_t x) { - mi_assert_internal(x!=0); + if (x==0) { x = 17; } // ensure we don't get stuck in generating zeros #if (MI_INTPTR_SIZE==8) // by Sebastiano Vigna, see: x ^= x >> 30; diff --git a/src/page.c b/src/page.c index 901fbda1..b070e56a 100644 --- a/src/page.c +++ b/src/page.c @@ -479,7 +479,7 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co counts[current]--; mi_block_t* const free_start = blocks[current]; // and iterate through the rest; use `random_shuffle` for performance - uintptr_t rnd = _mi_random_shuffle(r); + uintptr_t rnd = _mi_random_shuffle(r|1); // ensure not 0 for (size_t i = 1; i < extend; i++) { // call random_shuffle only every INTPTR_SIZE rounds const size_t round = i%MI_INTPTR_SIZE; From fc3e537bd4ac6d9ffec0243ec595ed15ca1649b8 Mon Sep 17 00:00:00 2001 From: daan Date: Sat, 28 Dec 2019 15:28:13 -0800 Subject: [PATCH 5/5] improve double free detection with faster same page check --- include/mimalloc-types.h | 2 +- src/alloc.c | 26 +++++++++++--------------- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index ab7d7c53..76539bd6 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -29,7 +29,7 @@ terms of the MIT license. A copy of the license can be found in the file // #define MI_SECURE 4 // checks for double free. (may be more expensive) #if !defined(MI_SECURE) -#define MI_SECURE 0 +#define MI_SECURE 4 #endif // Define MI_DEBUG for debug mode diff --git a/src/alloc.c b/src/alloc.c index 714acc76..82d97786 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -140,28 +140,24 @@ static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, cons } static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block, const mi_block_t* n) { - size_t psize; - uint8_t* pstart = _mi_page_start(_mi_page_segment(page), page, &psize); - if (n == NULL || ((uint8_t*)n >= pstart && (uint8_t*)n < (pstart + psize))) { - // Suspicious: the decoded value is in the same page (or NULL). - // Walk the free lists to verify positively if it is already freed - if (mi_list_contains(page, page->free, block) || - mi_list_contains(page, page->local_free, block) || - mi_list_contains(page, (const mi_block_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&page->thread_free)), block)) - { - _mi_fatal_error("double free detected of block %p with size %zu\n", block, page->block_size); - return true; - } + // The decoded value is in the same page (or NULL). + // Walk the free lists to verify positively if it is already freed + if (mi_list_contains(page, page->free, block) || + mi_list_contains(page, page->local_free, block) || + mi_list_contains(page, (const mi_block_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&page->thread_free)), block)) + { + _mi_fatal_error("double free detected of block %p with size %zu\n", block, page->block_size); + return true; } return false; } static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) { mi_block_t* n = mi_block_nextx(page, block, page->key[0], page->key[1]); // pretend it is freed, and get the decoded first field - if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer? - (n==NULL || mi_is_in_same_segment(block, n))) // quick check: in same segment or NULL? + if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer? + (n==NULL || mi_is_in_same_page(block, n))) // quick check: in same page or NULL? { - // Suspicous: decoded value in block is in the same segment (or NULL) -- maybe a double free? + // Suspicous: decoded value a in block is in the same page (or NULL) -- maybe a double free? // (continue in separate function to improve code generation) return mi_check_is_double_freex(page, block, n); }