merge from dev

This commit is contained in:
daan 2019-10-28 12:31:08 -07:00
commit 1a36ca3eb9
18 changed files with 246 additions and 834 deletions

View File

@ -68,7 +68,7 @@ endif()
if(MI_SECURE MATCHES "ON")
message(STATUS "Set secure build (MI_SECURE=ON)")
list(APPEND mi_defines MI_SECURE=2)
list(APPEND mi_defines MI_SECURE=3)
endif()
if(MI_SEE_ASM MATCHES "ON")

Binary file not shown.

Binary file not shown.

View File

@ -111,12 +111,12 @@
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<WarningLevel>Level2</WarningLevel>
<Optimization>Disabled</Optimization>
<SDLCheck>true</SDLCheck>
<ConformanceMode>true</ConformanceMode>
<AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
<PreprocessorDefinitions>MI_DEBUG=3;%(PreprocessorDefinitions);</PreprocessorDefinitions>
<PreprocessorDefinitions>MI_DEBUG=1;%(PreprocessorDefinitions);</PreprocessorDefinitions>
<CompileAs>CompileAsCpp</CompileAs>
<SupportJustMyCode>false</SupportJustMyCode>
<LanguageStandard>stdcpp17</LanguageStandard>

View File

@ -130,7 +130,7 @@ static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add
return (intptr_t)RC64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add);
}
static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) {
return (expected == RC64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)expected));
return (expected == (uintptr_t)RC64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)expected));
}
static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) {
return mi_atomic_cas_strong(p,desired,expected);

View File

@ -20,6 +20,18 @@ terms of the MIT license. A copy of the license can be found in the file
#define mi_trace_message(...)
#endif
#if defined(_MSC_VER)
#define mi_decl_noinline __declspec(noinline)
#define mi_attr_noreturn
#elif defined(__GNUC__) || defined(__clang__)
#define mi_decl_noinline __attribute__((noinline))
#define mi_attr_noreturn __attribute__((noreturn))
#else
#define mi_decl_noinline
#define mi_attr_noreturn
#endif
// "options.c"
void _mi_fputs(mi_output_fun* out, const char* prefix, const char* message);
void _mi_fprintf(mi_output_fun* out, const char* fmt, ...);
@ -28,12 +40,12 @@ void _mi_warning_message(const char* fmt, ...);
void _mi_verbose_message(const char* fmt, ...);
void _mi_trace_message(const char* fmt, ...);
void _mi_options_init(void);
void _mi_fatal_error(const char* fmt, ...) mi_attr_noreturn;
// "init.c"
extern mi_stats_t _mi_stats_main;
extern const mi_page_t _mi_page_empty;
bool _mi_is_main_thread(void);
uintptr_t _mi_ptr_cookie(const void* p);
uintptr_t _mi_random_shuffle(uintptr_t x);
uintptr_t _mi_random_init(uintptr_t seed /* can be zero */);
bool _mi_preloading(); // true while the C runtime is not ready
@ -124,13 +136,6 @@ bool _mi_page_is_valid(mi_page_t* page);
#define __has_builtin(x) 0
#endif
#if defined(_MSC_VER)
#define mi_decl_noinline __declspec(noinline)
#elif defined(__GNUC__) || defined(__clang__)
#define mi_decl_noinline __attribute__((noinline))
#else
#define mi_decl_noinline
#endif
/* -----------------------------------------------------------
@ -156,10 +161,13 @@ bool _mi_page_is_valid(mi_page_t* page);
#define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX)
static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) {
#if __has_builtin(__builtin_umul_overflow) || __GNUC__ >= 5
#if (MI_INTPTR_SIZE == 4)
#include <limits.h> // UINT_MAX, ULONG_MAX
#if (SIZE_MAX == UINT_MAX)
return __builtin_umul_overflow(count, size, total);
#else
#elif (SIZE_MAX == ULONG_MAX)
return __builtin_umull_overflow(count, size, total);
#else
return __builtin_umulll_overflow(count, size, total);
#endif
#else /* __builtin_umul_overflow is unavailable */
*total = count * size;
@ -235,6 +243,10 @@ static inline bool mi_heap_is_initialized(mi_heap_t* heap) {
return (heap != &_mi_heap_empty);
}
static inline uintptr_t _mi_ptr_cookie(const void* p) {
return ((uintptr_t)p ^ _mi_heap_main.cookie);
}
/* -----------------------------------------------------------
Pages
----------------------------------------------------------- */
@ -342,19 +354,19 @@ static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size)
// Page flags
//-----------------------------------------------------------
static inline bool mi_page_is_in_full(const mi_page_t* page) {
return page->flags.in_full;
return page->flags.x.in_full;
}
static inline void mi_page_set_in_full(mi_page_t* page, bool in_full) {
page->flags.in_full = in_full;
page->flags.x.in_full = in_full;
}
static inline bool mi_page_has_aligned(const mi_page_t* page) {
return page->flags.has_aligned;
return page->flags.x.has_aligned;
}
static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) {
page->flags.has_aligned = has_aligned;
page->flags.x.has_aligned = has_aligned;
}
@ -362,8 +374,12 @@ static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) {
// Encoding/Decoding the free list next pointers
// -------------------------------------------------------------------
static inline mi_block_t* mi_block_nextx( uintptr_t cookie, mi_block_t* block ) {
#if MI_SECURE
static inline bool mi_is_in_same_segment(const void* p, const void* q) {
return (_mi_ptr_segment(p) == _mi_ptr_segment(q));
}
static inline mi_block_t* mi_block_nextx( uintptr_t cookie, const mi_block_t* block ) {
#if MI_SECURE
return (mi_block_t*)(block->next ^ cookie);
#else
UNUSED(cookie);
@ -371,7 +387,7 @@ static inline mi_block_t* mi_block_nextx( uintptr_t cookie, mi_block_t* block )
#endif
}
static inline void mi_block_set_nextx(uintptr_t cookie, mi_block_t* block, mi_block_t* next) {
static inline void mi_block_set_nextx(uintptr_t cookie, mi_block_t* block, const mi_block_t* next) {
#if MI_SECURE
block->next = (mi_encoded_t)next ^ cookie;
#else
@ -380,16 +396,25 @@ static inline void mi_block_set_nextx(uintptr_t cookie, mi_block_t* block, mi_bl
#endif
}
static inline mi_block_t* mi_block_next(mi_page_t* page, mi_block_t* block) {
static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* block) {
#if MI_SECURE
return mi_block_nextx(page->cookie,block);
mi_block_t* next = mi_block_nextx(page->cookie,block);
#if MI_SECURE >= 4
// check if next is at least in our segment range
// TODO: it is better to check if it is actually inside our page but that is more expensive
// to calculate. Perhaps with a relative free list this becomes feasible?
if (next!=NULL && !mi_is_in_same_segment(block, next)) {
_mi_fatal_error("corrupted free list entry at %p: %zx\n", block, (uintptr_t)next);
}
#endif
return next;
#else
UNUSED(page);
return mi_block_nextx(0, block);
#endif
}
static inline void mi_block_set_next(mi_page_t* page, mi_block_t* block, mi_block_t* next) {
static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, const mi_block_t* next) {
#if MI_SECURE
mi_block_set_nextx(page->cookie,block,next);
#else

View File

@ -22,8 +22,11 @@ terms of the MIT license. A copy of the license can be found in the file
// Define MI_STAT as 1 to maintain statistics; set it to 2 to have detailed statistics (but costs some performance).
// #define MI_STAT 1
// Define MI_SECURE as 1 to encode free lists
// #define MI_SECURE 1
// Define MI_SECURE to enable security mitigations
// #define MI_SECURE 1 // guard page around metadata
// #define MI_SECURE 2 // guard page around each mimalloc page
// #define MI_SECURE 3 // encode free lists
// #define MI_SECURE 4 // all security enabled (checks for double free, corrupted free list and invalid pointer free)
#if !defined(MI_SECURE)
#define MI_SECURE 0
@ -131,15 +134,13 @@ typedef enum mi_delayed_e {
// The `in_full` and `has_aligned` page flags are put in a union to efficiently
// test if both are false (`value == 0`) in the `mi_free` routine.
typedef union mi_page_flags_u {
uint16_t value;
uint8_t full_aligned;
// test if both are false (`full_aligned == 0`) in the `mi_free` routine.
typedef union mi_page_flags_s {
uint8_t full_aligned;
struct {
bool in_full:1;
bool has_aligned:1;
bool is_zero; // `true` if the blocks in the free list are zero initialized
};
uint8_t in_full : 1;
uint8_t has_aligned : 1;
} x;
} mi_page_flags_t;
// Thread free list.
@ -167,15 +168,16 @@ typedef uintptr_t mi_thread_free_t;
typedef struct mi_page_s {
// "owned" by the segment
uint8_t segment_idx; // index in the segment `pages` array, `page == &segment->pages[page->segment_idx]`
bool segment_in_use:1; // `true` if the segment allocated this page
bool is_reset:1; // `true` if the page memory was reset
bool is_committed:1; // `true` if the page virtual memory is committed
bool is_zero_init:1; // `true` if the page was zero initialized
uint8_t segment_in_use:1; // `true` if the segment allocated this page
uint8_t is_reset:1; // `true` if the page memory was reset
uint8_t is_committed:1; // `true` if the page virtual memory is committed
uint8_t is_zero_init:1; // `true` if the page was zero initialized
// layout like this to optimize access in `mi_malloc` and `mi_free`
uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear`
uint16_t reserved; // number of blocks reserved in memory
mi_page_flags_t flags; // `in_full` and `has_aligned` flags (16 bits)
mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits)
bool is_zero; // `true` if the blocks in the free list are zero initialized
mi_block_t* free; // list of available free blocks (`malloc` allocates from this list)
#if MI_SECURE

View File

@ -61,53 +61,53 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t
}
void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
mi_decl_allocator void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
return mi_heap_malloc_zero_aligned_at(heap, size, alignment, offset, false);
}
void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept {
mi_decl_allocator void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept {
return mi_heap_malloc_aligned_at(heap, size, alignment, 0);
}
void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
mi_decl_allocator void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
return mi_heap_malloc_zero_aligned_at(heap, size, alignment, offset, true);
}
void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept {
mi_decl_allocator void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept {
return mi_heap_zalloc_aligned_at(heap, size, alignment, 0);
}
void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
mi_decl_allocator void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
size_t total;
if (mi_mul_overflow(count, size, &total)) return NULL;
return mi_heap_zalloc_aligned_at(heap, total, alignment, offset);
}
void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment) mi_attr_noexcept {
mi_decl_allocator void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment) mi_attr_noexcept {
return mi_heap_calloc_aligned_at(heap,count,size,alignment,0);
}
void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
mi_decl_allocator void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
return mi_heap_malloc_aligned_at(mi_get_default_heap(), size, alignment, offset);
}
void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept {
mi_decl_allocator void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept {
return mi_heap_malloc_aligned(mi_get_default_heap(), size, alignment);
}
void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
mi_decl_allocator void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
return mi_heap_zalloc_aligned_at(mi_get_default_heap(), size, alignment, offset);
}
void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept {
mi_decl_allocator void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept {
return mi_heap_zalloc_aligned(mi_get_default_heap(), size, alignment);
}
void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
mi_decl_allocator void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
return mi_heap_calloc_aligned_at(mi_get_default_heap(), count, size, alignment, offset);
}
void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept {
mi_decl_allocator void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept {
return mi_heap_calloc_aligned(mi_get_default_heap(), count, size, alignment);
}
@ -126,7 +126,7 @@ static void* mi_heap_realloc_zero_aligned_at(mi_heap_t* heap, void* p, size_t ne
if (newp != NULL) {
if (zero && newsize > size) {
const mi_page_t* page = _mi_ptr_page(newp);
if (page->flags.is_zero) {
if (page->is_zero) {
// already zero initialized
mi_assert_expensive(mi_mem_is_zero(newp,newsize));
}
@ -150,55 +150,55 @@ static void* mi_heap_realloc_zero_aligned(mi_heap_t* heap, void* p, size_t newsi
return mi_heap_realloc_zero_aligned_at(heap,p,newsize,alignment,offset,zero);
}
void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept {
mi_decl_allocator void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept {
return mi_heap_realloc_zero_aligned_at(heap,p,newsize,alignment,offset,false);
}
void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept {
mi_decl_allocator void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept {
return mi_heap_realloc_zero_aligned(heap,p,newsize,alignment,false);
}
void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept {
mi_decl_allocator void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept {
return mi_heap_realloc_zero_aligned_at(heap, p, newsize, alignment, offset, true);
}
void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept {
mi_decl_allocator void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept {
return mi_heap_realloc_zero_aligned(heap, p, newsize, alignment, true);
}
void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
mi_decl_allocator void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
size_t total;
if (mi_mul_overflow(newcount, size, &total)) return NULL;
return mi_heap_rezalloc_aligned_at(heap, p, total, alignment, offset);
}
void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept {
mi_decl_allocator void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept {
size_t total;
if (mi_mul_overflow(newcount, size, &total)) return NULL;
return mi_heap_rezalloc_aligned(heap, p, total, alignment);
}
void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept {
mi_decl_allocator void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept {
return mi_heap_realloc_aligned_at(mi_get_default_heap(), p, newsize, alignment, offset);
}
void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept {
mi_decl_allocator void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept {
return mi_heap_realloc_aligned(mi_get_default_heap(), p, newsize, alignment);
}
void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept {
mi_decl_allocator void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept {
return mi_heap_rezalloc_aligned_at(mi_get_default_heap(), p, newsize, alignment, offset);
}
void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept {
mi_decl_allocator void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept {
return mi_heap_rezalloc_aligned(mi_get_default_heap(), p, newsize, alignment);
}
void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
mi_decl_allocator void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
return mi_heap_recalloc_aligned_at(mi_get_default_heap(), p, newcount, size, alignment, offset);
}
void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept {
mi_decl_allocator void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept {
return mi_heap_recalloc_aligned(mi_get_default_heap(), p, newcount, size, alignment);
}

View File

@ -1,715 +0,0 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
#include "mimalloc.h"
#include "mimalloc-internal.h"
#if !defined(_WIN32)
#error "this file should only be included on Windows"
#endif
#include <windows.h>
#include <psapi.h>
#include <stdlib.h> // getenv
#include <stdio.h> // _setmaxstdio
#include <string.h> // strstr
/*
To override the C runtime `malloc` on Windows we need to patch the allocation
functions at runtime initialization. Unfortunately we can never patch before the
runtime initializes itself, because as soon as we call `GetProcAddress` on the
runtime module (a DLL or EXE in Windows speak), it will first load and initialize
(by the OS calling `DllMain` on it).
This means that some things might be already allocated by the C runtime itself
(and possibly other DLL's) before we get to resolve runtime adresses. This is
no problem if everyone unwinds in order: when we unload, we unpatch and restore
the original crt `free` routines and crt malloc'd memory is freed correctly.
But things go wrong if such early CRT alloc'd memory is freed or re-allocated
_after_ we patch, but _before_ we unload (and unpatch), or if any memory allocated
by us is freed after we unpatched.
There are two tricky situations to deal with:
1. The Thread Local Storage (TLS): when the main thread stops it will call registered
callbacks on TLS entries (allocated by `FlsAlloc`). This is done by the OS
before any DLL's are unloaded. Unfortunately, the C runtime registers such
TLS entries with CRT allocated memory which is freed in the callback.
2. Inside the CRT:
a. Some variables might get initialized by patched allocated
blocks but freed during CRT unloading after we unpatched
(like temporary file buffers).
b. Some blocks are allocated at CRT and freed by the CRT (like the
environment storage).
c. And some blocks are allocated by the CRT and then reallocated
while patched, and finally freed after unpatching! This
happens with the `atexit` functions for example to grow the array
of registered functions.
In principle situation 2 is hopeless: since we cannot patch before CRT initialization,
we can never be sure how to free or reallocate a pointer during CRT unloading.
However, in practice there is a good solution: when terminating, we just patch
the reallocation and free routines to no-ops -- we are winding down anyway! This leaves
just the reallocation problm of CRT alloc'd memory once we are patched. Here, a study of the
CRT reveals that there seem to be just three such situations:
1. When registering `atexit` routines (to grow the exit function table),
2. When calling `_setmaxstdio` (to grow the file handle table),
3. and `_popen`/`_wpopen` (to grow handle pairs). These turn out not to be
a problem as these are NULL initialized.
We fix these by providing wrappers:
1. We first register a _global_ `atexit` routine ourselves (`mi_patches_at_exit`) before patching,
and then patch the `_crt_atexit` function to implement our own global exit list (and the
same for `_crt_at_quick_exit`). All module local lists are no problem since they are always fully
(un)patched from initialization to end. We can register in the global list by dynamically
getting the global `_crt_atexit` entry from `ucrtbase.dll`.
2. The `_setmaxstdio` is _detoured_: we patch it by a stub that unpatches first,
calls the original routine and repatches again.
That leaves us to reliably shutdown and enter "termination mode":
1. Using our trick to get the global exit list entry point, we register an exit function `mi_patches_atexit`
that first executes all our home brew list of exit functions, and then enters a _termination_
phase that patches realloc/free variants with no-ops. Patching later again with special no-ops for
`free` also improves efficiency during the program run since no flags need to be checked.
2. That is not quite good enough yet since after executing exit routines after us on the
global exit list (registered by the CRT),
the OS starts to unwind the TLS callbacks and we would like to run callbacks registered after loading
our DLL to be done in patched mode. So, we also allocate a TLS entry when our DLL is loaded and when its
callback is called, we re-enable the original patches again. Since TLS is destroyed in FIFO order
this runs any callbacks in later DLL's in patched mode.
3. Finally the DLL's get unloaded by the OS in order (still patched) until our DLL gets unloaded
and then we start a termination phase again, and patch realloc/free with no-ops for good this time.
*/
static int __cdecl mi_setmaxstdio(int newmax);
// ------------------------------------------------------
// Microsoft allocation extensions
// ------------------------------------------------------
typedef size_t mi_nothrow_t;
static void mi_free_nothrow(void* p, mi_nothrow_t tag) {
UNUSED(tag);
mi_free(p);
}
// Versions of `free`, `realloc`, `recalloc`, `expand` and `msize`
// that are used during termination and are no-ops.
static void mi_free_term(void* p) {
UNUSED(p);
}
static void mi_free_size_term(void* p, size_t size) {
UNUSED(size);
UNUSED(p);
}
static void mi_free_nothrow_term(void* p, mi_nothrow_t tag) {
UNUSED(tag);
UNUSED(p);
}
static void* mi_realloc_term(void* p, size_t newsize) {
UNUSED(p); UNUSED(newsize);
return NULL;
}
static void* mi__recalloc_term(void* p, size_t newcount, size_t newsize) {
UNUSED(p); UNUSED(newcount); UNUSED(newsize);
return NULL;
}
static void* mi__expand_term(void* p, size_t newsize) {
UNUSED(p); UNUSED(newsize);
return NULL;
}
static size_t mi__msize_term(void* p) {
UNUSED(p);
return 0;
}
static void* mi__malloc_dbg(size_t size, int block_type, const char* fname, int line) {
UNUSED(block_type); UNUSED(fname); UNUSED(line);
return _malloc_base(size);
}
static void* mi__calloc_dbg(size_t count, size_t size, int block_type, const char* fname, int line) {
UNUSED(block_type); UNUSED(fname); UNUSED(line);
return _calloc_base(count, size);
}
static void* mi__realloc_dbg(void* p, size_t size, int block_type, const char* fname, int line) {
UNUSED(block_type); UNUSED(fname); UNUSED(line);
return _realloc_base(p, size);
}
static void mi__free_dbg(void* p, int block_type) {
UNUSED(block_type);
_free_base(p);
}
// the `recalloc`,`expand`, and `msize` don't have base versions and thus need a separate term version
static void* mi__recalloc_dbg(void* p, size_t count, size_t size, int block_type, const char* fname, int line) {
UNUSED(block_type); UNUSED(fname); UNUSED(line);
return mi_recalloc(p, count, size);
}
static void* mi__expand_dbg(void* p, size_t size, int block_type, const char* fname, int line) {
UNUSED(block_type); UNUSED(fname); UNUSED(line);
return mi__expand(p, size);
}
static size_t mi__msize_dbg(void* p, int block_type) {
UNUSED(block_type);
return mi_usable_size(p);
}
static void* mi__recalloc_dbg_term(void* p, size_t count, size_t size, int block_type, const char* fname, int line) {
UNUSED(block_type); UNUSED(fname); UNUSED(line);
return mi__recalloc_term(p, count, size);
}
static void* mi__expand_dbg_term(void* p, size_t size, int block_type, const char* fname, int line) {
UNUSED(block_type); UNUSED(fname); UNUSED(line);
return mi__expand_term(p, size);
}
static size_t mi__msize_dbg_term(void* p, int block_type) {
UNUSED(block_type);
return mi__msize_term(p);
}
// ------------------------------------------------------
// implement our own global atexit handler
// ------------------------------------------------------
typedef void (cbfun_t)(void);
typedef int (atexit_fun_t)(cbfun_t* fn);
typedef uintptr_t encoded_t;
typedef struct exit_list_s {
encoded_t functions; // encoded pointer to array of encoded function pointers
size_t count;
size_t capacity;
} exit_list_t;
#define MI_EXIT_INC (64)
static exit_list_t atexit_list = { 0, 0, 0 };
static exit_list_t at_quick_exit_list = { 0, 0, 0 };
static CRITICAL_SECTION atexit_lock;
// encode/decode function pointers with a random canary for security
static encoded_t canary;
static inline void *decode(encoded_t x) {
return (void*)(x^canary);
}
static inline encoded_t encode(void* p) {
return ((uintptr_t)p ^ canary);
}
static void init_canary()
{
canary = _mi_random_init(0);
atexit_list.functions = at_quick_exit_list.functions = encode(NULL);
}
// initialize the list
static void mi_initialize_atexit(void) {
InitializeCriticalSection(&atexit_lock);
init_canary();
}
// register an exit function
static int mi_register_atexit(exit_list_t* list, cbfun_t* fn) {
if (fn == NULL) return EINVAL;
EnterCriticalSection(&atexit_lock);
encoded_t* functions = (encoded_t*)decode(list->functions);
if (list->count >= list->capacity) { // at first `functions == decode(0) == NULL`
encoded_t* newf = (encoded_t*)mi_recalloc(functions, list->capacity + MI_EXIT_INC, sizeof(cbfun_t*));
if (newf != NULL) {
list->capacity += MI_EXIT_INC;
list->functions = encode(newf);
functions = newf;
}
}
int result;
if (list->count < list->capacity && functions != NULL) {
functions[list->count] = encode(fn);
list->count++;
result = 0; // success
}
else {
result = ENOMEM;
}
LeaveCriticalSection(&atexit_lock);
return result;
}
// Register a global `atexit` function
static int mi_atexit(cbfun_t* fn) {
return mi_register_atexit(&atexit_list,fn);
}
static int mi_at_quick_exit(cbfun_t* fn) {
return mi_register_atexit(&at_quick_exit_list,fn);
}
static int mi_register_onexit(void* table, cbfun_t* fn) {
// TODO: how can we distinguish a quick_exit from atexit?
return mi_atexit(fn);
}
// Execute exit functions in a list
static void mi_execute_exit_list(exit_list_t* list) {
// copy and zero the list structure
EnterCriticalSection(&atexit_lock);
exit_list_t clist = *list;
memset(list,0,sizeof(*list));
LeaveCriticalSection(&atexit_lock);
// now execute the functions outside of the lock
encoded_t* functions = (encoded_t*)decode(clist.functions);
if (functions != NULL) {
for (size_t i = clist.count; i > 0; i--) { // careful with unsigned count down..
cbfun_t* fn = (cbfun_t*)decode(functions[i-1]);
if (fn==NULL) break; // corrupted!
fn();
}
mi_free(functions);
}
}
// ------------------------------------------------------
// Jump assembly instructions for patches
// ------------------------------------------------------
#if defined(_M_IX86) || defined(_M_X64)
#define MI_JUMP_SIZE 14 // at most 2+4+8 for a long jump or 1+5 for a short one
typedef struct mi_jump_s {
uint8_t opcodes[MI_JUMP_SIZE];
} mi_jump_t;
void mi_jump_restore(void* current, const mi_jump_t* saved) {
memcpy(current, &saved->opcodes, MI_JUMP_SIZE);
}
void mi_jump_write(void* current, void* target, mi_jump_t* save) {
if (save != NULL) {
memcpy(&save->opcodes, current, MI_JUMP_SIZE);
}
uint8_t* opcodes = ((mi_jump_t*)current)->opcodes;
ptrdiff_t diff = (uint8_t*)target - (uint8_t*)current;
uint32_t ofs32 = (uint32_t)diff;
#ifdef _M_X64
uint64_t ofs64 = (uint64_t)diff;
if (ofs64 != (uint64_t)ofs32) {
// use long jump
opcodes[0] = 0xFF;
opcodes[1] = 0x25;
*((uint32_t*)&opcodes[2]) = 0;
*((uint64_t*)&opcodes[6]) = (uint64_t)target;
}
else
#endif
{
// use short jump
opcodes[0] = 0xE9;
*((uint32_t*)&opcodes[1]) = ofs32 - 5 /* size of the short jump instruction */;
}
}
#elif defined(_M_ARM64)
#define MI_JUMP_SIZE 16
typedef struct mi_jump_s {
uint8_t opcodes[MI_JUMP_SIZE];
} mi_jump_t;
void mi_jump_restore(void* current, const mi_jump_t* saved) {
memcpy(current, &saved->opcodes, MI_JUMP_SIZE);
}
void mi_jump_write(void* current, void* target, mi_jump_t* save) {
if (save != NULL) {
memcpy(&save->opcodes, current, MI_JUMP_SIZE);
}
uint8_t* opcodes = ((mi_jump_t*)current)->opcodes;
uint64_t diff = (uint8_t*)target - (uint8_t*)current;
// 0x50 0x00 0x00 0x58 ldr x16, .+8 # load PC relative +8
// 0x00 0x02 0x3F 0xD6 blr x16 # and jump
// <address>
// <address>
static const uint8_t jump_opcodes[8] = { 0x50, 0x00, 0x00, 0x58, 0x00, 0x02, 0x3F, 0xD6 };
memcpy(&opcodes[0], jump_opcodes, sizeof(jump_opcodes));
*((uint64_t*)&opcodes[8]) = diff;
}
#else
#error "define jump instructions for this platform"
#endif
// ------------------------------------------------------
// Patches
// ------------------------------------------------------
typedef enum patch_apply_e {
PATCH_NONE,
PATCH_TARGET,
PATCH_TARGET_TERM
} patch_apply_t;
#define MAX_ENTRIES 4 // maximum number of patched entry points (like `malloc` in ucrtbase and msvcrt)
typedef struct mi_patch_s {
const char* name; // name of the function to patch
void* target; // the address of the new target (never NULL)
void* target_term; // the address of the target during termination (or NULL)
patch_apply_t applied; // what target has been applied?
void* originals[MAX_ENTRIES]; // the resolved addresses of the function (or NULLs)
mi_jump_t saves[MAX_ENTRIES]; // the saved instructions in case it was applied
} mi_patch_t;
#define MI_PATCH_NAME3(name,target,term) { name, &target, &term, PATCH_NONE, {NULL,NULL,NULL,NULL} }
#define MI_PATCH_NAME2(name,target) { name, &target, NULL, PATCH_NONE, {NULL,NULL,NULL,NULL} }
#define MI_PATCH3(name,target,term) MI_PATCH_NAME3(#name, target, term)
#define MI_PATCH2(name,target) MI_PATCH_NAME2(#name, target)
#define MI_PATCH1(name) MI_PATCH2(name,mi_##name)
static mi_patch_t patches[] = {
// we implement our own global exit handler (as the CRT versions do a realloc internally)
//MI_PATCH2(_crt_atexit, mi_atexit),
//MI_PATCH2(_crt_at_quick_exit, mi_at_quick_exit),
MI_PATCH2(_setmaxstdio, mi_setmaxstdio),
MI_PATCH2(_register_onexit_function, mi_register_onexit),
// override higher level atexit functions so we can implement at_quick_exit correcty
MI_PATCH2(atexit, mi_atexit),
MI_PATCH2(at_quick_exit, mi_at_quick_exit),
// regular entries
MI_PATCH2(malloc, mi_malloc),
MI_PATCH2(calloc, mi_calloc),
MI_PATCH3(realloc, mi_realloc,mi_realloc_term),
MI_PATCH3(free, mi_free,mi_free_term),
// extended api
MI_PATCH2(_strdup, mi_strdup),
MI_PATCH2(_strndup, mi_strndup),
MI_PATCH3(_expand, mi__expand,mi__expand_term),
MI_PATCH3(_recalloc, mi_recalloc,mi__recalloc_term),
MI_PATCH3(_msize, mi_usable_size,mi__msize_term),
// base versions
MI_PATCH2(_malloc_base, mi_malloc),
MI_PATCH2(_calloc_base, mi_calloc),
MI_PATCH3(_realloc_base, mi_realloc,mi_realloc_term),
MI_PATCH3(_free_base, mi_free,mi_free_term),
// these base versions are in the crt but without import records
MI_PATCH_NAME3("_recalloc_base", mi_recalloc,mi__recalloc_term),
MI_PATCH_NAME3("_msize_base", mi_usable_size,mi__msize_term),
// debug
MI_PATCH2(_malloc_dbg, mi__malloc_dbg),
MI_PATCH2(_realloc_dbg, mi__realloc_dbg),
MI_PATCH2(_calloc_dbg, mi__calloc_dbg),
MI_PATCH2(_free_dbg, mi__free_dbg),
MI_PATCH3(_expand_dbg, mi__expand_dbg, mi__expand_dbg_term),
MI_PATCH3(_recalloc_dbg, mi__recalloc_dbg, mi__recalloc_dbg_term),
MI_PATCH3(_msize_dbg, mi__msize_dbg, mi__msize_dbg_term),
#if 0
// override new/delete variants for efficiency (?)
#ifdef _WIN64
// 64 bit new/delete
MI_PATCH_NAME2("??2@YAPEAX_K@Z", mi_new),
MI_PATCH_NAME2("??_U@YAPEAX_K@Z", mi_new),
MI_PATCH_NAME3("??3@YAXPEAX@Z", mi_free, mi_free_term),
MI_PATCH_NAME3("??_V@YAXPEAX@Z", mi_free, mi_free_term),
MI_PATCH_NAME3("??3@YAXPEAX_K@Z", mi_free_size, mi_free_size_term), // delete sized
MI_PATCH_NAME3("??_V@YAXPEAX_K@Z", mi_free_size, mi_free_size_term), // delete sized
MI_PATCH_NAME2("??2@YAPEAX_KAEBUnothrow_t@std@@@Z", mi_new),
MI_PATCH_NAME2("??_U@YAPEAX_KAEBUnothrow_t@std@@@Z", mi_new),
MI_PATCH_NAME3("??3@YAXPEAXAEBUnothrow_t@std@@@Z", mi_free_nothrow, mi_free_nothrow_term),
MI_PATCH_NAME3("??_V@YAXPEAXAEBUnothrow_t@std@@@Z", mi_free_nothrow, mi_free_nothrow_term),
#else
// 32 bit new/delete
MI_PATCH_NAME2("??2@YAPAXI@Z", mi_new),
MI_PATCH_NAME2("??_U@YAPAXI@Z", mi_new),
MI_PATCH_NAME3("??3@YAXPAX@Z", mi_free, mi_free_term),
MI_PATCH_NAME3("??_V@YAXPAX@Z", mi_free, mi_free_term),
MI_PATCH_NAME3("??3@YAXPAXI@Z", mi_free_size, mi_free_size_term), // delete sized
MI_PATCH_NAME3("??_V@YAXPAXI@Z", mi_free_size, mi_free_size_term), // delete sized
MI_PATCH_NAME2("??2@YAPAXIABUnothrow_t@std@@@Z", mi_new),
MI_PATCH_NAME2("??_U@YAPAXIABUnothrow_t@std@@@Z", mi_new),
MI_PATCH_NAME3("??3@YAXPAXABUnothrow_t@std@@@Z", mi_free_nothrow, mi_free_nothrow_term),
MI_PATCH_NAME3("??_V@YAXPAXABUnothrow_t@std@@@Z", mi_free_nothrow, mi_free_nothrow_term),
#endif
#endif
{ NULL, NULL, NULL, PATCH_NONE, {NULL,NULL,NULL,NULL} }
};
// Apply a patch
static bool mi_patch_apply(mi_patch_t* patch, patch_apply_t apply)
{
if (patch->originals[0] == NULL) return true; // unresolved
if (apply == PATCH_TARGET_TERM && patch->target_term == NULL) apply = PATCH_TARGET; // avoid re-applying non-term variants
if (patch->applied == apply) return false;
for (int i = 0; i < MAX_ENTRIES; i++) {
void* original = patch->originals[i];
if (original == NULL) break; // no more
DWORD protect = PAGE_READWRITE;
if (!VirtualProtect(original, MI_JUMP_SIZE, PAGE_EXECUTE_READWRITE, &protect)) return false;
if (apply == PATCH_NONE) {
mi_jump_restore(original, &patch->saves[i]);
}
else {
void* target = (apply == PATCH_TARGET ? patch->target : patch->target_term);
mi_assert_internal(target != NULL);
if (target != NULL) mi_jump_write(original, target, &patch->saves[i]);
}
VirtualProtect(original, MI_JUMP_SIZE, protect, &protect);
}
patch->applied = apply;
return true;
}
// Apply all patches
static bool _mi_patches_apply(patch_apply_t apply, patch_apply_t* previous) {
static patch_apply_t current = PATCH_NONE;
if (previous != NULL) *previous = current;
if (current == apply) return true;
current = apply;
bool ok = true;
for (size_t i = 0; patches[i].name != NULL; i++) {
if (!mi_patch_apply(&patches[i], apply)) ok = false;
}
return ok;
}
// Export the following three functions just in case
// a user needs that level of control.
// Disable all patches
mi_decl_export void mi_patches_disable(void) {
_mi_patches_apply(PATCH_NONE, NULL);
}
// Enable all patches normally
mi_decl_export bool mi_patches_enable(void) {
return _mi_patches_apply( PATCH_TARGET, NULL );
}
// Enable all patches in termination phase where free is a no-op
mi_decl_export bool mi_patches_enable_term(void) {
return _mi_patches_apply(PATCH_TARGET_TERM, NULL);
}
// ------------------------------------------------------
// Stub for _setmaxstdio
// ------------------------------------------------------
static int __cdecl mi_setmaxstdio(int newmax) {
patch_apply_t previous;
_mi_patches_apply(PATCH_NONE, &previous); // disable patches
int result = _setmaxstdio(newmax); // call original function (that calls original CRT recalloc)
_mi_patches_apply(previous,NULL); // and re-enable patches
return result;
}
// ------------------------------------------------------
// Resolve addresses dynamically
// ------------------------------------------------------
// Try to resolve patches for a given module (DLL)
static void mi_module_resolve(const char* fname, HMODULE mod, int priority) {
// see if any patches apply
for (size_t i = 0; patches[i].name != NULL; i++) {
mi_patch_t* patch = &patches[i];
if (patch->applied == PATCH_NONE) {
// find an available entry
int i = 0;
while (i < MAX_ENTRIES && patch->originals[i] != NULL) i++;
if (i < MAX_ENTRIES) {
void* addr = GetProcAddress(mod, patch->name);
if (addr != NULL) {
// found it! set the address
patch->originals[i] = addr;
_mi_trace_message(" found %s at %s!%p (entry %i)\n", patch->name, fname, addr, i);
}
}
}
}
}
#define MIMALLOC_NAME "mimalloc-override.dll"
#define UCRTBASE_NAME "ucrtbase.dll"
#define UCRTBASED_NAME "ucrtbased.dll"
// Resolve addresses of all patches by inspecting the loaded modules
static atexit_fun_t* crt_atexit = NULL;
static atexit_fun_t* crt_at_quick_exit = NULL;
static bool mi_patches_resolve(void) {
// get all loaded modules
HANDLE process = GetCurrentProcess(); // always -1, no need to release
DWORD needed = 0;
HMODULE modules[400]; // try to stay under 4k to not trigger the guard page
EnumProcessModules(process, modules, sizeof(modules), &needed);
if (needed == 0) return false;
int count = needed / sizeof(HMODULE);
int ucrtbase_index = 0;
int mimalloc_index = 0;
// iterate through the loaded modules
for (int i = 0; i < count; i++) {
HMODULE mod = modules[i];
char filename[MAX_PATH] = { 0 };
DWORD slen = GetModuleFileName(mod, filename, MAX_PATH);
if (slen > 0 && slen < MAX_PATH) {
// filter out potential crt modules only
filename[slen] = 0;
const char* lastsep = strrchr(filename, '\\');
const char* basename = (lastsep==NULL ? filename : lastsep+1);
_mi_trace_message(" %i: dynamic module %s\n", i, filename);
// remember indices so we can check load order (in debug mode)
if (_stricmp(basename, MIMALLOC_NAME) == 0) mimalloc_index = i;
if (_stricmp(basename, UCRTBASE_NAME) == 0) ucrtbase_index = i;
if (_stricmp(basename, UCRTBASED_NAME) == 0) ucrtbase_index = i;
// see if we potentially patch in this module
int priority = 0;
if (i == 0) priority = 2; // main module to allow static crt linking
else if (_strnicmp(basename, "ucrt", 4) == 0) priority = 3; // new ucrtbase.dll in windows 10
// NOTE: don't override msvcr -- leads to crashes in setlocale (needs more testing)
// else if (_strnicmp(basename, "msvcr", 5) == 0) priority = 1; // older runtimes
if (priority > 0) {
// probably found a crt module, try to patch it
mi_module_resolve(basename,mod,priority);
// try to find the atexit functions for the main process (in `ucrtbase.dll`)
if (crt_atexit==NULL) crt_atexit = (atexit_fun_t*)GetProcAddress(mod, "_crt_atexit");
if (crt_at_quick_exit == NULL) crt_at_quick_exit = (atexit_fun_t*)GetProcAddress(mod, "_crt_at_quick_exit");
}
}
}
int diff = mimalloc_index - ucrtbase_index;
if (diff > 1) {
_mi_warning_message("warning: the \"mimalloc-override\" DLL seems not to load before or right after the C runtime (\"ucrtbase\").\n"
" Try to fix this by changing the linking order.\n");
}
return true;
}
// ------------------------------------------------------
// Dll Entry
// ------------------------------------------------------
extern BOOL WINAPI _DllMainCRTStartup(HINSTANCE inst, DWORD reason, LPVOID reserved);
static DWORD mi_fls_unwind_entry;
static void NTAPI mi_fls_unwind(PVOID value) {
if (value != NULL) mi_patches_enable(); // and re-enable normal patches again for DLL's loaded after us
return;
}
static void mi_patches_atexit(void) {
mi_execute_exit_list(&atexit_list);
mi_patches_enable_term(); // enter termination phase and patch realloc/free with a no-op
}
static void mi_patches_at_quick_exit(void) {
mi_execute_exit_list(&at_quick_exit_list);
mi_patches_enable_term(); // enter termination phase and patch realloc/free with a no-op
}
BOOL WINAPI DllEntry(HINSTANCE inst, DWORD reason, LPVOID reserved) {
if (reason == DLL_PROCESS_ATTACH) {
__security_init_cookie();
}
else if (reason == DLL_PROCESS_DETACH) {
// enter termination phase for good now
mi_patches_enable_term();
}
// C runtime main
BOOL ok = _DllMainCRTStartup(inst, reason, reserved);
if (reason == DLL_PROCESS_ATTACH && ok) {
// initialize at exit lists
mi_initialize_atexit();
// Now resolve patches
ok = mi_patches_resolve();
if (ok) {
// check if patching is not disabled
#pragma warning(suppress:4996)
const char* s = getenv("MIMALLOC_DISABLE_OVERRIDE");
bool enabled = (s == NULL || !(strstr("1;TRUE;YES;ON", s) != NULL));
if (!enabled) {
_mi_verbose_message("override is disabled\n");
}
else {
// and register our unwind entry (this must be after resolving due to possible delayed DLL initialization from GetProcAddress)
mi_fls_unwind_entry = FlsAlloc(&mi_fls_unwind);
if (mi_fls_unwind_entry != FLS_OUT_OF_INDEXES) {
FlsSetValue(mi_fls_unwind_entry, (void*)1);
}
// register our patch disabler in the global exit list
if (crt_atexit != NULL) (*crt_atexit)(&mi_patches_atexit);
if (crt_at_quick_exit != NULL) (*crt_at_quick_exit)(&mi_patches_at_quick_exit);
// and patch ! this also redirects the `atexit` handling for the global exit list
mi_patches_enable();
_mi_verbose_message("override is enabled\n");
// hide internal allocation
mi_stats_reset();
}
}
}
return ok;
}

View File

@ -33,7 +33,7 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz
page->used++;
mi_assert_internal(page->free == NULL || _mi_ptr_page(page->free) == page);
#if (MI_DEBUG)
if (!page->flags.is_zero) { memset(block, MI_DEBUG_UNINIT, size); }
if (!page->is_zero) { memset(block, MI_DEBUG_UNINIT, size); }
#elif (MI_SECURE)
block->next = 0;
#endif
@ -47,26 +47,26 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz
}
// allocate a small block
extern inline void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept {
extern inline mi_decl_allocator void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept {
mi_assert(size <= MI_SMALL_SIZE_MAX);
mi_page_t* page = _mi_heap_get_free_small_page(heap,size);
return _mi_page_malloc(heap, page, size);
}
extern inline void* mi_malloc_small(size_t size) mi_attr_noexcept {
extern inline mi_decl_allocator void* mi_malloc_small(size_t size) mi_attr_noexcept {
return mi_heap_malloc_small(mi_get_default_heap(), size);
}
// zero initialized small block
void* mi_zalloc_small(size_t size) mi_attr_noexcept {
mi_decl_allocator void* mi_zalloc_small(size_t size) mi_attr_noexcept {
void* p = mi_malloc_small(size);
if (p != NULL) { memset(p, 0, size); }
return p;
}
// The main allocation function
extern inline void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept {
extern inline mi_decl_allocator void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept {
mi_assert(heap!=NULL);
mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local
void* p;
@ -85,7 +85,7 @@ extern inline void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcep
return p;
}
extern inline void* mi_malloc(size_t size) mi_attr_noexcept {
extern inline mi_decl_allocator void* mi_malloc(size_t size) mi_attr_noexcept {
return mi_heap_malloc(mi_get_default_heap(), size);
}
@ -96,7 +96,7 @@ void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size) {
mi_assert_internal(p != NULL);
mi_assert_internal(size > 0 && page->block_size >= size);
mi_assert_internal(_mi_ptr_page(p)==page);
if (page->flags.is_zero) {
if (page->is_zero) {
// already zero initialized memory?
((mi_block_t*)p)->next = 0; // clear the free list pointer
mi_assert_expensive(mi_mem_is_zero(p,page->block_size));
@ -115,19 +115,63 @@ void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) {
return p;
}
extern inline void* mi_heap_zalloc(mi_heap_t* heap, size_t size) mi_attr_noexcept {
extern inline mi_decl_allocator void* mi_heap_zalloc(mi_heap_t* heap, size_t size) mi_attr_noexcept {
return _mi_heap_malloc_zero(heap, size, true);
}
void* mi_zalloc(size_t size) mi_attr_noexcept {
mi_decl_allocator void* mi_zalloc(size_t size) mi_attr_noexcept {
return mi_heap_zalloc(mi_get_default_heap(),size);
}
// ------------------------------------------------------
// Check for double free in secure mode
// ------------------------------------------------------
#if MI_SECURE>=4
static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, const mi_block_t* elem) {
while (list != NULL) {
if (elem==list) return true;
list = mi_block_next(page, list);
}
return false;
}
static mi_decl_noinline bool mi_check_double_freex(const mi_page_t* page, const mi_block_t* block, const mi_block_t* n) {
size_t psize;
uint8_t* pstart = _mi_page_start(_mi_page_segment(page), page, &psize);
if (n == NULL || ((uint8_t*)n >= pstart && (uint8_t*)n < (pstart + psize))) {
// Suspicious: the decoded value is in the same page (or NULL).
// Walk the free lists to see if it is already freed
if (mi_list_contains(page, page->free, block) ||
mi_list_contains(page, page->local_free, block) ||
mi_list_contains(page, (const mi_block_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&page->thread_free)), block))
{
_mi_fatal_error("double free detected of block %p with size %zu\n", block, page->block_size);
return true;
}
}
return false;
}
static inline bool mi_check_double_free(const mi_page_t* page, const mi_block_t* block) {
mi_block_t* n = (mi_block_t*)(block->next ^ page->cookie);
if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check
(n==NULL || mi_is_in_same_segment(block, n)))
{
// Suspicous: decoded value in block is in the same segment (or NULL) -- maybe a double free?
return mi_check_double_freex(page, block, n);
}
return false;
}
#endif
// ------------------------------------------------------
// Free
// ------------------------------------------------------
// multi-threaded free
static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* block)
{
@ -147,7 +191,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc
mi_block_set_next(page, block, page->free);
page->free = block;
page->used--;
page->flags.is_zero = false;
page->is_zero = false;
_mi_segment_page_free(page,true,&heap->tld->segments);
}
return;
@ -251,14 +295,16 @@ void mi_free(void* p) mi_attr_noexcept
#if (MI_DEBUG>0)
if (mi_unlikely(!mi_is_in_heap_region(p))) {
_mi_warning_message("possibly trying to mi_free a pointer that does not point to a valid heap region: 0x%p\n"
_mi_warning_message("possibly trying to free a pointer that does not point to a valid heap region: 0x%p\n"
"(this may still be a valid very large allocation (over 64MiB))\n", p);
if (mi_likely(_mi_ptr_cookie(segment) == segment->cookie)) {
_mi_warning_message("(yes, the previous pointer 0x%p was valid after all)\n", p);
}
}
#endif
#if (MI_DEBUG>0 || MI_SECURE>=4)
if (mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie)) {
_mi_error_message("trying to mi_free a pointer that does not point to a valid heap space: %p\n", p);
_mi_error_message("trying to free a pointer that does not point to a valid heap space: %p\n", p);
return;
}
#endif
@ -278,6 +324,9 @@ void mi_free(void* p) mi_attr_noexcept
if (mi_likely(tid == segment->thread_id && page->flags.full_aligned == 0)) { // the thread id matches and it is not a full page, nor has aligned blocks
// local, and not full or aligned
mi_block_t* block = (mi_block_t*)p;
#if MI_SECURE>=4
if (mi_check_double_free(page,block)) return;
#endif
mi_block_set_next(page, block, page->local_free);
page->local_free = block;
page->used--;
@ -360,29 +409,29 @@ void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept {
mi_free(p);
}
extern inline void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept {
extern inline mi_decl_allocator void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept {
size_t total;
if (mi_mul_overflow(count,size,&total)) return NULL;
return mi_heap_zalloc(heap,total);
}
void* mi_calloc(size_t count, size_t size) mi_attr_noexcept {
mi_decl_allocator void* mi_calloc(size_t count, size_t size) mi_attr_noexcept {
return mi_heap_calloc(mi_get_default_heap(),count,size);
}
// Uninitialized `calloc`
extern void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept {
extern mi_decl_allocator void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept {
size_t total;
if (mi_mul_overflow(count,size,&total)) return NULL;
return mi_heap_malloc(heap, total);
}
void* mi_mallocn(size_t count, size_t size) mi_attr_noexcept {
mi_decl_allocator void* mi_mallocn(size_t count, size_t size) mi_attr_noexcept {
return mi_heap_mallocn(mi_get_default_heap(),count,size);
}
// Expand in place or fail
void* mi_expand(void* p, size_t newsize) mi_attr_noexcept {
mi_decl_allocator void* mi_expand(void* p, size_t newsize) mi_attr_noexcept {
if (p == NULL) return NULL;
size_t size = mi_usable_size(p);
if (newsize > size) return NULL;
@ -408,11 +457,11 @@ void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero)
return newp;
}
void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept {
mi_decl_allocator void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept {
return _mi_heap_realloc_zero(heap, p, newsize, false);
}
void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept {
mi_decl_allocator void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept {
size_t total;
if (mi_mul_overflow(count, size, &total)) return NULL;
return mi_heap_realloc(heap, p, total);
@ -420,41 +469,41 @@ void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_a
// Reallocate but free `p` on errors
void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept {
mi_decl_allocator void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept {
void* newp = mi_heap_realloc(heap, p, newsize);
if (newp==NULL && p!=NULL) mi_free(p);
return newp;
}
void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept {
mi_decl_allocator void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept {
return _mi_heap_realloc_zero(heap, p, newsize, true);
}
void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept {
mi_decl_allocator void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept {
size_t total;
if (mi_mul_overflow(count, size, &total)) return NULL;
return mi_heap_rezalloc(heap, p, total);
}
void* mi_realloc(void* p, size_t newsize) mi_attr_noexcept {
mi_decl_allocator void* mi_realloc(void* p, size_t newsize) mi_attr_noexcept {
return mi_heap_realloc(mi_get_default_heap(),p,newsize);
}
void* mi_reallocn(void* p, size_t count, size_t size) mi_attr_noexcept {
mi_decl_allocator void* mi_reallocn(void* p, size_t count, size_t size) mi_attr_noexcept {
return mi_heap_reallocn(mi_get_default_heap(),p,count,size);
}
// Reallocate but free `p` on errors
void* mi_reallocf(void* p, size_t newsize) mi_attr_noexcept {
mi_decl_allocator void* mi_reallocf(void* p, size_t newsize) mi_attr_noexcept {
return mi_heap_reallocf(mi_get_default_heap(),p,newsize);
}
void* mi_rezalloc(void* p, size_t newsize) mi_attr_noexcept {
mi_decl_allocator void* mi_rezalloc(void* p, size_t newsize) mi_attr_noexcept {
return mi_heap_rezalloc(mi_get_default_heap(), p, newsize);
}
void* mi_recalloc(void* p, size_t count, size_t size) mi_attr_noexcept {
mi_decl_allocator void* mi_recalloc(void* p, size_t count, size_t size) mi_attr_noexcept {
return mi_heap_recalloc(mi_get_default_heap(), p, count, size);
}

View File

@ -13,7 +13,7 @@ terms of the MIT license. A copy of the license can be found in the file
// Empty page used to initialize the small free pages array
const mi_page_t _mi_page_empty = {
0, false, false, false, false, 0, 0,
{ 0 },
{ 0 }, false,
NULL, // free
#if MI_SECURE
0,
@ -184,10 +184,6 @@ uintptr_t _mi_random_init(uintptr_t seed /* can be zero */) {
return x;
}
uintptr_t _mi_ptr_cookie(const void* p) {
return ((uintptr_t)p ^ _mi_heap_main.cookie);
}
/* -----------------------------------------------------------
Initialization and freeing of the thread local heaps
----------------------------------------------------------- */

View File

@ -71,7 +71,7 @@ bool _mi_os_is_huge_reserved(void* p);
typedef uintptr_t mi_region_info_t;
static inline mi_region_info_t mi_region_info_create(void* start, bool is_large, bool is_committed) {
return ((uintptr_t)start | ((is_large?1:0) << 1) | (is_committed?1:0));
return ((uintptr_t)start | ((uintptr_t)(is_large?1:0) << 1) | (is_committed?1:0));
}
static inline void* mi_region_info_read(mi_region_info_t info, bool* is_large, bool* is_committed) {

View File

@ -144,21 +144,23 @@ static void mi_out_stderr(const char* msg) {
// function we also buffer output that happens earlier. When
// an output function is registered it is called immediately with
// the output up to that point.
#define MAX_OUT_BUF (8*1024)
static char out_buf[MAX_OUT_BUF+1];
#ifndef MI_MAX_DELAY_OUTPUT
#define MI_MAX_DELAY_OUTPUT (32*1024)
#endif
static char out_buf[MI_MAX_DELAY_OUTPUT+1];
static _Atomic(uintptr_t) out_len;
static void mi_out_buf(const char* msg) {
if (msg==NULL) return;
if (mi_atomic_read_relaxed(&out_len)>=MAX_OUT_BUF) return;
if (mi_atomic_read_relaxed(&out_len)>=MI_MAX_DELAY_OUTPUT) return;
size_t n = strlen(msg);
if (n==0) return;
// claim space
uintptr_t start = mi_atomic_addu(&out_len, n);
if (start >= MAX_OUT_BUF) return;
if (start >= MI_MAX_DELAY_OUTPUT) return;
// check bound
if (start+n >= MAX_OUT_BUF) {
n = MAX_OUT_BUF-start-1;
if (start+n >= MI_MAX_DELAY_OUTPUT) {
n = MI_MAX_DELAY_OUTPUT-start-1;
}
memcpy(&out_buf[start], msg, n);
}
@ -166,9 +168,9 @@ static void mi_out_buf(const char* msg) {
static void mi_out_buf_flush(mi_output_fun* out) {
if (out==NULL) return;
// claim all (no more output will be added after this point)
size_t count = mi_atomic_addu(&out_len, MAX_OUT_BUF);
size_t count = mi_atomic_addu(&out_len, MI_MAX_DELAY_OUTPUT);
// and output the current contents
if (count>MAX_OUT_BUF) count = MAX_OUT_BUF;
if (count>MI_MAX_DELAY_OUTPUT) count = MI_MAX_DELAY_OUTPUT;
out_buf[count] = 0;
out(out_buf);
}
@ -283,6 +285,14 @@ void _mi_assert_fail(const char* assertion, const char* fname, unsigned line, co
}
#endif
mi_attr_noreturn void _mi_fatal_error(const char* fmt, ...) {
va_list args;
va_start(args, fmt);
mi_vfprintf(NULL, "mimalloc: fatal: ", fmt, args);
va_end(args);
exit(99);
}
// --------------------------------------------------------
// Initialize options by checking the environment
// --------------------------------------------------------
@ -344,7 +354,7 @@ static void mi_option_init(mi_option_desc_t* desc) {
size_t len = strlen(s);
if (len >= sizeof(buf)) len = sizeof(buf) - 1;
for (size_t i = 0; i < len; i++) {
buf[i] = toupper(s[i]);
buf[i] = (char)toupper(s[i]);
}
buf[len] = 0;
if (buf[0]==0 || strstr("1;TRUE;YES;ON", buf) != NULL) {

View File

@ -145,13 +145,13 @@ void _mi_os_init(void) {
hDll = LoadLibrary(TEXT("kernelbase.dll"));
if (hDll != NULL) {
// use VirtualAlloc2FromApp if possible as it is available to Windows store apps
pVirtualAlloc2 = (PVirtualAlloc2)GetProcAddress(hDll, "VirtualAlloc2FromApp");
if (pVirtualAlloc2==NULL) pVirtualAlloc2 = (PVirtualAlloc2)GetProcAddress(hDll, "VirtualAlloc2");
pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2FromApp");
if (pVirtualAlloc2==NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2");
FreeLibrary(hDll);
}
hDll = LoadLibrary(TEXT("ntdll.dll"));
if (hDll != NULL) {
pNtAllocateVirtualMemoryEx = (PNtAllocateVirtualMemoryEx)GetProcAddress(hDll, "NtAllocateVirtualMemoryEx");
if (hDll != NULL) {
pNtAllocateVirtualMemoryEx = (PNtAllocateVirtualMemoryEx)(void (*)(void))GetProcAddress(hDll, "NtAllocateVirtualMemoryEx");
FreeLibrary(hDll);
}
if (mi_option_is_enabled(mi_option_large_os_pages) || mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
@ -283,7 +283,7 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment,
p = mi_win_virtual_allocx(addr, size, try_alignment, flags);
}
if (p == NULL) {
_mi_warning_message("unable to alloc mem error: err: %i size: 0x%x \n", GetLastError(), size);
_mi_warning_message("unable to allocate memory: error code: %i, addr: %p, size: 0x%x, large only: %d, allow_large: %d\n", GetLastError(), addr, size, large_only, allow_large);
}
return p;
}
@ -456,6 +456,7 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo
if (!commit) allow_large = false;
void* p = NULL;
/*
if (commit && allow_large) {
p = _mi_os_try_alloc_from_huge_reserved(size, try_alignment);
if (p != NULL) {
@ -463,6 +464,7 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo
return p;
}
}
*/
#if defined(_WIN32)
int flags = MEM_RESERVE;
@ -698,7 +700,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats)
void* p = VirtualAlloc(start, csize, MEM_RESET, PAGE_READWRITE);
mi_assert_internal(p == start);
#if 1
if (p == start) {
if (p == start && start != NULL) {
VirtualUnlock(start,csize); // VirtualUnlock after MEM_RESET removes the memory from the working set
}
#endif
@ -884,7 +886,7 @@ int mi_reserve_huge_os_pages( size_t pages, double max_secs, size_t* pages_reser
uint8_t* start = (uint8_t*)((uintptr_t)32 << 40); // 32TiB virtual start address
#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode
uintptr_t r = _mi_random_init((uintptr_t)&mi_reserve_huge_os_pages);
start = start + ((uintptr_t)MI_SEGMENT_SIZE * ((r>>17) & 0xFFFF)); // (randomly 0-64k)*4MiB == 0 to 256GiB
start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF)); // (randomly 0-1024)*1GiB == 0 to 1TiB
#endif
// Allocate one page at the time but try to place them contiguously

View File

@ -57,7 +57,7 @@ static inline uint8_t mi_bsr32(uint32_t x);
static inline uint8_t mi_bsr32(uint32_t x) {
uint32_t idx;
_BitScanReverse((DWORD*)&idx, x);
return idx;
return (uint8_t)idx;
}
#elif defined(__GNUC__) || defined(__clang__)
static inline uint8_t mi_bsr32(uint32_t x) {

View File

@ -192,7 +192,7 @@ void _mi_page_free_collect(mi_page_t* page, bool force) {
// usual case
page->free = page->local_free;
page->local_free = NULL;
page->flags.is_zero = false;
page->is_zero = false;
}
else if (force) {
// append -- only on shutdown (force) as this is a linear operation
@ -204,7 +204,7 @@ void _mi_page_free_collect(mi_page_t* page, bool force) {
mi_block_set_next(page, tail, page->free);
page->free = page->local_free;
page->local_free = NULL;
page->flags.is_zero = false;
page->is_zero = false;
}
}
@ -559,7 +559,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* st
// extension into zero initialized memory preserves the zero'd free list
if (!page->is_zero_init) {
page->flags.is_zero = false;
page->is_zero = false;
}
mi_assert_expensive(mi_page_is_valid_init(page));
}
@ -579,7 +579,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
#if MI_SECURE
page->cookie = _mi_heap_random(heap) | 1;
#endif
page->flags.is_zero = page->is_zero_init;
page->is_zero = page->is_zero_init;
mi_assert_internal(page->capacity == 0);
mi_assert_internal(page->free == NULL);

View File

@ -2,12 +2,18 @@
#include <stdio.h>
#include <assert.h>
#include <string.h>
#include <stdint.h>
#include <mimalloc.h>
#include <mimalloc-override.h> // redefines malloc etc.
static void double_free1();
static void double_free2();
int main() {
mi_version();
//double_free1();
//double_free2();
void* p1 = malloc(78);
void* p2 = malloc(24);
free(p1);
@ -29,3 +35,37 @@ int main() {
mi_stats_print(NULL);
return 0;
}
static void double_free1() {
void* p[256];
uintptr_t buf[256];
p[0] = mi_malloc(622616);
p[1] = mi_malloc(655362);
p[2] = mi_malloc(786432);
mi_free(p[2]);
// [VULN] Double free
mi_free(p[2]);
p[3] = mi_malloc(786456);
// [BUG] Found overlap
// p[3]=0x429b2ea2000 (size=917504), p[1]=0x429b2e42000 (size=786432)
fprintf(stderr, "p3: %p-%p, p1: %p-%p, p2: %p\n", p[3], (uint8_t*)(p[3]) + 786456, p[1], (uint8_t*)(p[1]) + 655362, p[2]);
}
static void double_free2() {
void* p[256];
uintptr_t buf[256];
// [INFO] Command buffer: 0x327b2000
// [INFO] Input size: 182
p[0] = malloc(712352);
p[1] = malloc(786432);
free(p[0]);
// [VULN] Double free
free(p[0]);
p[2] = malloc(786440);
p[3] = malloc(917504);
p[4] = malloc(786440);
// [BUG] Found overlap
// p[4]=0x433f1402000 (size=917504), p[1]=0x433f14c2000 (size=786432)
fprintf(stderr, "p1: %p-%p, p2: %p-%p\n", p[4], (uint8_t*)(p[4]) + 917504, p[1], (uint8_t*)(p[1]) + 786432);
}

View File

@ -2,6 +2,7 @@
#include <stdio.h>
#include <assert.h>
#include <string.h>
#include <stdint.h>
#include <mimalloc.h>
#include <new>
@ -41,7 +42,7 @@ int main() {
p2 = malloc(16);
p1 = realloc(p1, 32);
free(p1);
mi_free(p2);
free(p2);
mi_free(s);
Test* t = new Test(42);
delete t;
@ -66,3 +67,5 @@ public:
};
static Static s = Static();