Merge branch 'master' into dev-win

This commit is contained in:
daan 2019-11-22 09:29:28 -08:00
commit 74c409192b
25 changed files with 495 additions and 202 deletions

View File

@ -6,14 +6,13 @@ set(CMAKE_CXX_STANDARD 17)
option(MI_OVERRIDE "Override the standard malloc interface" ON)
option(MI_INTERPOSE "Use interpose to override standard malloc on macOS" ON)
option(MI_SEE_ASM "Generate assembly files" OFF)
option(MI_CHECK_FULL "Use full internal invariant checking in DEBUG mode" OFF)
option(MI_DEBUG_FULL "Use full internal heap invariant checking in DEBUG mode" OFF)
option(MI_SECURE "Use full security mitigations (like guard pages, allocation randomization, double-free mitigation, and free-list corruption detection)" OFF)
option(MI_USE_CXX "Use the C++ compiler to compile the library" OFF)
option(MI_SECURE "Use security mitigations (like guard pages and randomization)" OFF)
option(MI_SEE_ASM "Generate assembly files" OFF)
option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanism (Unix)" OFF)
option(MI_BUILD_TESTS "Build test executables" ON)
set(mi_install_dir "lib/mimalloc-${mi_version}")
option(MI_CHECK_FULL "Use full internal invariant checking in DEBUG mode (deprecated, use MI_DEBUG_FULL instead)" OFF)
set(mi_sources
src/stats.c
@ -28,29 +27,33 @@ set(mi_sources
src/options.c
src/init.c)
# Set default build type
# -----------------------------------------------------------------------------
# Converience: set default build type depending on the build directory
# -----------------------------------------------------------------------------
if (NOT CMAKE_BUILD_TYPE)
if ("${CMAKE_BINARY_DIR}" MATCHES ".*(D|d)ebug$")
message(STATUS "No build type selected, default to *** Debug ***")
if ("${CMAKE_BINARY_DIR}" MATCHES ".*(D|d)ebug$" OR MI_DEBUG_FULL MATCHES "ON")
message(STATUS "No build type selected, default to: Debug")
set(CMAKE_BUILD_TYPE "Debug")
else()
message(STATUS "No build type selected, default to *** Release ***")
message(STATUS "No build type selected, default to: Release")
set(CMAKE_BUILD_TYPE "Release")
endif()
else()
message(STATUS "Build type specified as *** ${CMAKE_BUILD_TYPE} ***")
endif()
if("${CMAKE_BINARY_DIR}" MATCHES ".*(S|s)ecure$")
message(STATUS "Default to secure build")
set(MI_SECURE "ON")
endif()
# -----------------------------------------------------------------------------
# Process options
# -----------------------------------------------------------------------------
if(CMAKE_C_COMPILER_ID MATCHES "MSVC")
set(MI_USE_CXX "ON")
endif()
# Options
if(MI_OVERRIDE MATCHES "ON")
message(STATUS "Override standard malloc (MI_OVERRIDE=ON)")
if(APPLE)
@ -67,8 +70,8 @@ if(MI_OVERRIDE MATCHES "ON")
endif()
if(MI_SECURE MATCHES "ON")
message(STATUS "Set secure build (MI_SECURE=ON)")
list(APPEND mi_defines MI_SECURE=3)
message(STATUS "Set full secure build (MI_SECURE=ON)")
list(APPEND mi_defines MI_SECURE=4)
endif()
if(MI_SEE_ASM MATCHES "ON")
@ -77,7 +80,12 @@ if(MI_SEE_ASM MATCHES "ON")
endif()
if(MI_CHECK_FULL MATCHES "ON")
message(STATUS "Set debug level to full invariant checking (MI_CHECK_FULL=ON)")
message(STATUS "The MI_CHECK_FULL option is deprecated, use MI_DEBUG_FULL instead")
set(MI_DEBUG_FULL "ON")
endif()
if(MI_DEBUG_FULL MATCHES "ON")
message(STATUS "Set debug level to full internal invariant checking (MI_DEBUG_FULL=ON)")
list(APPEND mi_defines MI_DEBUG=3) # full invariant checking
endif()
@ -102,19 +110,6 @@ if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU")
endif()
endif()
if(NOT(CMAKE_BUILD_TYPE MATCHES "Release|release|RelWithDebInfo|relwithdebinfo"))
string(TOLOWER "${CMAKE_BUILD_TYPE}" build_type)
set(mi_basename "mimalloc-${build_type}")
else()
if(MI_SECURE MATCHES "ON")
set(mi_basename "mimalloc-secure")
else()
set(mi_basename "mimalloc")
endif()
endif()
message(STATUS "Output library name : ${mi_basename}")
message(STATUS "Installation directory: ${mi_install_dir}")
# extra needed libraries
if(WIN32)
list(APPEND mi_libraries psapi shell32 user32)
@ -127,9 +122,28 @@ else()
endif()
# -----------------------------------------------------------------------------
# Main targets
# Install and output names
# -----------------------------------------------------------------------------
set(mi_install_dir "${CMAKE_INSTALL_PREFIX}/lib/mimalloc-${mi_version}")
if(MI_SECURE MATCHES "ON")
set(mi_basename "mimalloc-secure")
else()
set(mi_basename "mimalloc")
endif()
string(TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_LC)
if(NOT(CMAKE_BUILD_TYPE_LC MATCHES "^(release|relwithdebinfo|minsizerel)$"))
set(mi_basename "${mi_basename}-${CMAKE_BUILD_TYPE_LC}") #append build type (e.g. -debug) if not a release version
endif()
message(STATUS "")
message(STATUS "Library base name: ${mi_basename}")
message(STATUS "Build type : ${CMAKE_BUILD_TYPE_LC}")
message(STATUS "Install directory: ${mi_install_dir}")
message(STATUS "")
# -----------------------------------------------------------------------------
# Main targets
# -----------------------------------------------------------------------------
# shared library
add_library(mimalloc SHARED ${mi_sources})
@ -231,7 +245,7 @@ endif()
if (MI_OVERRIDE MATCHES "ON")
target_compile_definitions(mimalloc PRIVATE MI_MALLOC_OVERRIDE)
if(NOT WIN32)
# It is only possible to override malloc on Windows when building as a DLL. (src/alloc-override.c)
# It is only possible to override malloc on Windows when building as a DLL.
target_compile_definitions(mimalloc-static PRIVATE MI_MALLOC_OVERRIDE)
target_compile_definitions(mimalloc-obj PRIVATE MI_MALLOC_OVERRIDE)
endif()

View File

@ -35,22 +35,32 @@ jobs:
CC: gcc
CXX: g++
BuildType: debug
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_CHECK_FULL=ON
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON
Release:
CC: gcc
CXX: g++
BuildType: release
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release
Secure:
CC: gcc
CXX: g++
BuildType: secure
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON
Debug Clang:
CC: clang
CXX: clang++
BuildType: debug-clang
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_CHECK_FULL=ON
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON
Release Clang:
CC: clang
CXX: clang++
BuildType: release-clang
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release
Secure Clang:
CC: clang
CXX: clang++
BuildType: secure-clang
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON
steps:
- task: CMake@1

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1,5 +1,5 @@
set(mi_version_major 1)
set(mi_version_minor 1)
set(mi_version_minor 2)
set(mi_version ${mi_version_major}.${mi_version_minor})
set(PACKAGE_VERSION ${mi_version})

View File

@ -0,0 +1,72 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<ClCompile Include="..\..\src\options.c">
<Filter>Header Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\alloc.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\alloc-aligned.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\alloc-override.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\alloc-posix.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\heap.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\init.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\memory.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\os.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\page.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\page-queue.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\segment.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\stats.c">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\..\include\mimalloc-atomic.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="$(ProjectDir)..\..\include\mimalloc-internal.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\..\include\mimalloc-new-delete.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\..\include\mimalloc-override.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\..\include\mimalloc-types.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<Filter Include="Header Files">
<UniqueIdentifier>{f1fccf27-17b9-42dd-ba51-6070baff85c6}</UniqueIdentifier>
</Filter>
<Filter Include="Source Files">
<UniqueIdentifier>{39cb7e38-69d0-43fb-8406-6a0f7cefc3b4}</UniqueIdentifier>
</Filter>
</ItemGroup>
</Project>

View File

@ -149,8 +149,8 @@
</ClCompile>
</ItemGroup>
<ItemGroup>
<ProjectReference Include="mimalloc-override.vcxproj">
<Project>{abb5eae7-b3e6-432e-b636-333449892ea7}</Project>
<ProjectReference Include="mimalloc.vcxproj">
<Project>{abb5eae7-b3e6-432e-b636-333449892ea6}</Project>
</ProjectReference>
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />

View File

@ -0,0 +1,75 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<ClCompile Include="..\..\src\alloc.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\alloc-aligned.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\alloc-override.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\alloc-override-osx.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\alloc-posix.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\heap.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\init.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\memory.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\options.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\os.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\page.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\page-queue.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\segment.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\stats.c">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="$(ProjectDir)..\..\include\mimalloc-atomic.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="$(ProjectDir)..\..\include\mimalloc-internal.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\..\include\mimalloc-new-delete.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="$(ProjectDir)..\..\include\mimalloc-override.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="$(ProjectDir)..\..\include\mimalloc-types.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<Filter Include="Header Files">
<UniqueIdentifier>{2b556b10-f559-4b2d-896e-142652adbf0c}</UniqueIdentifier>
</Filter>
<Filter Include="Source Files">
<UniqueIdentifier>{852a14ae-6dde-4e95-8077-ca705e97e5af}</UniqueIdentifier>
</Filter>
</ItemGroup>
</Project>

View File

@ -220,7 +220,7 @@ static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x)
#endif
#elif defined(__wasi__)
#include <sched.h>
static inline void mi_atomic_yield() {
static inline void mi_atomic_yield(void) {
sched_yield();
}
#else

View File

@ -101,6 +101,7 @@ uint8_t _mi_bsr(uintptr_t x); // bit-scan-right, used on BSD i
void _mi_heap_destroy_pages(mi_heap_t* heap);
void _mi_heap_collect_abandon(mi_heap_t* heap);
uintptr_t _mi_heap_random(mi_heap_t* heap);
void _mi_heap_set_default_direct(mi_heap_t* heap);
// "stats.c"
void _mi_stats_done(mi_stats_t* stats);
@ -274,14 +275,20 @@ static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) {
return segment;
}
// Get the page containing the pointer
static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) {
// used internally
static inline uintptr_t _mi_segment_page_idx_of(const mi_segment_t* segment, const void* p) {
// if (segment->page_size > MI_SEGMENT_SIZE) return &segment->pages[0]; // huge pages
ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment;
mi_assert_internal(diff >= 0 && diff < MI_SEGMENT_SIZE);
uintptr_t idx = (uintptr_t)diff >> segment->page_shift;
mi_assert_internal(idx < segment->capacity);
mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM || idx == 0);
return idx;
}
// Get the page containing the pointer
static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) {
uintptr_t idx = _mi_segment_page_idx_of(segment, p);
return &((mi_segment_t*)segment)->pages[idx];
}
@ -372,53 +379,67 @@ static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) {
// -------------------------------------------------------------------
// Encoding/Decoding the free list next pointers
// Note: we pass a `null` value to be used as the `NULL` value for the
// end of a free list. This is to prevent the cookie itself to ever
// be present among user blocks (as `cookie^0==cookie`).
// -------------------------------------------------------------------
static inline bool mi_is_in_same_segment(const void* p, const void* q) {
return (_mi_ptr_segment(p) == _mi_ptr_segment(q));
}
static inline mi_block_t* mi_block_nextx( uintptr_t cookie, const mi_block_t* block ) {
static inline bool mi_is_in_same_page(const void* p, const void* q) {
mi_segment_t* segmentp = _mi_ptr_segment(p);
mi_segment_t* segmentq = _mi_ptr_segment(q);
if (segmentp != segmentq) return false;
uintptr_t idxp = _mi_segment_page_idx_of(segmentp, p);
uintptr_t idxq = _mi_segment_page_idx_of(segmentq, q);
return (idxp == idxq);
}
static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, uintptr_t cookie ) {
#ifdef MI_ENCODE_FREELIST
return (mi_block_t*)(block->next ^ cookie);
mi_block_t* b = (mi_block_t*)(block->next ^ cookie);
if (mi_unlikely((void*)b==null)) { b = NULL; }
return b;
#else
UNUSED(cookie);
UNUSED(cookie); UNUSED(null);
return (mi_block_t*)block->next;
#endif
}
static inline void mi_block_set_nextx(uintptr_t cookie, mi_block_t* block, const mi_block_t* next) {
static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, uintptr_t cookie) {
#ifdef MI_ENCODE_FREELIST
if (mi_unlikely(next==NULL)) { next = (mi_block_t*)null; }
block->next = (mi_encoded_t)next ^ cookie;
#else
UNUSED(cookie);
UNUSED(cookie); UNUSED(null);
block->next = (mi_encoded_t)next;
#endif
}
static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* block) {
#ifdef MI_ENCODE_FREELIST
mi_block_t* next = mi_block_nextx(page->cookie,block);
mi_block_t* next = mi_block_nextx(page,block,page->cookie);
// check for free list corruption: is `next` at least in our segment range?
// TODO: it is better to check if it is actually inside our page but that is more expensive
// to calculate. Perhaps with a relative free list this becomes feasible?
if (next!=NULL && !mi_is_in_same_segment(block, next)) {
// TODO: check if `next` is `page->block_size` aligned?
if (next!=NULL && !mi_is_in_same_page(block, next)) {
_mi_fatal_error("corrupted free list entry of size %zub at %p: value 0x%zx\n", page->block_size, block, (uintptr_t)next);
next = NULL;
}
return next;
#else
UNUSED(page);
return mi_block_nextx(0, block);
return mi_block_nextx(page,block,0);
#endif
}
static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, const mi_block_t* next) {
#ifdef MI_ENCODE_FREELIST
mi_block_set_nextx(page->cookie,block,next);
mi_block_set_nextx(page,block,next, page->cookie);
#else
UNUSED(page);
mi_block_set_nextx(0, block, next);
mi_block_set_nextx(page,block, next,0);
#endif
}

View File

@ -26,7 +26,7 @@ terms of the MIT license. A copy of the license can be found in the file
// #define MI_SECURE 1 // guard page around metadata
// #define MI_SECURE 2 // guard page around each mimalloc page
// #define MI_SECURE 3 // encode free lists (detect corrupted free list (buffer overflow), and invalid pointer free)
// #define MI_SECURE 4 // experimental, may be more expensive: checks for double free.
// #define MI_SECURE 4 // checks for double free. (may be more expensive)
#if !defined(MI_SECURE)
#define MI_SECURE 0
@ -35,7 +35,7 @@ terms of the MIT license. A copy of the license can be found in the file
// Define MI_DEBUG for debug mode
// #define MI_DEBUG 1 // basic assertion checks and statistics, check double free, corrupted free list, and invalid pointer free.
// #define MI_DEBUG 2 // + internal assertion checks
// #define MI_DEBUG 3 // + extensive internal invariant checking
// #define MI_DEBUG 3 // + extensive internal invariant checking (cmake -DMI_DEBUG_FULL=ON)
#if !defined(MI_DEBUG)
#if !defined(NDEBUG) || defined(_DEBUG)
#define MI_DEBUG 2

View File

@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file
#ifndef MIMALLOC_H
#define MIMALLOC_H
#define MI_MALLOC_VERSION 110 // major + 2 digits minor
#define MI_MALLOC_VERSION 120 // major + 2 digits minor
// ------------------------------------------------------
// Compiler specific attributes
@ -271,6 +271,7 @@ typedef enum mi_option_e {
mi_option_eager_commit_delay,
mi_option_segment_reset,
mi_option_os_tag,
mi_option_max_errors,
_mi_option_last
} mi_option_t;

View File

@ -1,7 +1,7 @@
<img align="left" width="100" height="100" src="doc/mimalloc-logo.png"/>
[<img align="right" src="https://dev.azure.com/Daan0324/mimalloc/_apis/build/status/microsoft.mimalloc?branchName=master"/>](https://dev.azure.com/Daan0324/mimalloc/_build?definitionId=1&_a=summary)
[<img align="right" src="https://dev.azure.com/Daan0324/mimalloc/_apis/build/status/microsoft.mimalloc?branchName=dev"/>](https://dev.azure.com/Daan0324/mimalloc/_build?definitionId=1&_a=summary)
# mimalloc
@ -37,7 +37,7 @@ Notable aspects of the design include:
programs.
- __secure__: _mimalloc_ can be built in secure mode, adding guard pages,
randomized allocation, encrypted free lists, etc. to protect against various
heap vulnerabilities. The performance penalty is only around 3% on average
heap vulnerabilities. The performance penalty is usually around 10% on average
over our benchmarks.
- __first-class heaps__: efficiently create and use multiple heaps to allocate across different regions.
A heap can be destroyed at once instead of deallocating each object separately.
@ -56,6 +56,7 @@ Enjoy!
### Releases
* 2019-11-22, `v1.2.0`: stable release 1.2: bug fixes, improved secure mode (free list corruption checks, double free mitigation). Improved dynamic overriding on Windows.
* 2019-10-07, `v1.1.0`: stable release 1.1.
* 2019-09-01, `v1.0.8`: pre-release 8: more robust windows dynamic overriding, initial huge page support.
* 2019-08-10, `v1.0.6`: pre-release 6: various performance improvements.
@ -64,7 +65,7 @@ Enjoy!
## Windows
Open `ide/vs2017/mimalloc.sln` in Visual Studio 2017 and build.
Open `ide/vs2019/mimalloc.sln` in Visual Studio 2019 and build (or `ide/vs2017/mimalloc.sln`).
The `mimalloc` project builds a static library (in `out/msvc-x64`), while the
`mimalloc-override` project builds a DLL for overriding malloc
in the entire program.
@ -97,7 +98,7 @@ maintains detailed statistics as:
This will name the shared library as `libmimalloc-debug.so`.
Finally, you can build a _secure_ version that uses guard pages, encrypted
free lists, etc, as:
free lists, etc., as:
```
> mkdir -p out/secure
> cd out/secure
@ -138,6 +139,9 @@ target_link_libraries(myapp PUBLIC mimalloc-static)
```
to link with the static library. See `test\CMakeLists.txt` for an example.
For best performance in C++ programs, it is also recommended to override the
global `new` and `delete` operators. For convience, mimalloc provides
[mimalloc-new-delete.h](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) which does this for you -- just include it in a single(!) source file in your project.
You can pass environment variables to print verbose messages (`MIMALLOC_VERBOSE=1`)
and statistics (`MIMALLOC_SHOW_STATS=1`) (in the debug version):
@ -188,18 +192,18 @@ or via environment variables.
- `MIMALLOC_SHOW_STATS=1`: show statistics when the program terminates.
- `MIMALLOC_VERBOSE=1`: show verbose messages.
- `MIMALLOC_SHOW_ERRORS=1`: show error and warning messages.
- `MIMALLOC_LARGE_OS_PAGES=1`: use large OS pages when available; for some workloads this can significantly
- `MIMALLOC_LARGE_OS_PAGES=1`: use large OS pages when available; for some workloads this can significantly
improve performance. Use `MIMALLOC_VERBOSE` to check if the large OS pages are enabled -- usually one needs
to explicitly allow large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). However, sometimes
the OS is very slow to reserve contiguous physical memory for large OS pages so use with care on systems that
can have fragmented memory.
- `MIMALLOC_EAGER_REGION_COMMIT=1`: on Windows, commit large (256MiB) regions eagerly. On Windows, these regions
show in the working set even though usually just a small part is committed to physical memory. This is why it
turned off by default on Windows as it looks not good in the task manager. However, in reality it is always better
show in the working set even though usually just a small part is committed to physical memory. This is why it
turned off by default on Windows as it looks not good in the task manager. However, in reality it is always better
to turn it on as it improves performance and has no other drawbacks.
- `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where N is the number of 1GiB huge OS pages. This reserves the huge pages at
startup and can give quite a performance improvement on long running workloads. Usually it is better to not use
`MIMALLOC_LARGE_OS_PAGES` in combination with this setting. Just like large OS pages, use with care as reserving
startup and can give quite a performance improvement on long running workloads. Usually it is better to not use
`MIMALLOC_LARGE_OS_PAGES` in combination with this setting. Just like large OS pages, use with care as reserving
contiguous physical memory can take a long time when memory is fragmented. Still experimental.
[linux-huge]: https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/5/html/tuning_and_optimizing_red_hat_enterprise_linux_for_oracle_9i_and_10g_databases/sect-oracle_9i_and_10g_tuning_guide-large_memory_optimization_big_pages_and_huge_pages-configuring_huge_pages_in_red_hat_enterprise_linux_4_or_5
@ -211,7 +215,7 @@ Overriding the standard `malloc` can be done either _dynamically_ or _statically
## Dynamic override
This is the recommended way to override the standard malloc interface.
This is the recommended way to override the standard malloc interface.
### Linux, BSD
@ -244,29 +248,31 @@ resolved to the _mimalloc_ library.
Note that certain security restrictions may apply when doing this from
the [shell](https://stackoverflow.com/questions/43941322/dyld-insert-libraries-ignored-when-calling-application-through-bash).
Note: unfortunately, at this time, dynamic overriding on macOS seems broken but it is actively worked on to fix this
Note: unfortunately, at this time, dynamic overriding on macOS seems broken but it is actively worked on to fix this
(see issue [`#50`](https://github.com/microsoft/mimalloc/issues/50)).
### Windows
On Windows you need to link your program explicitly with the mimalloc
DLL and use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch).
Moreover, you need to ensure the `mimalloc-redirect.dll` (or `mimalloc-redirect32.dll`) is available
in the same folder as the mimalloc DLL at runtime (as it as referred to by the mimalloc DLL).
The redirection DLL's ensure all calls to the C runtime malloc API get redirected to mimalloc.
DLL and use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch).
Moreover, you need to ensure the `mimalloc-redirect.dll` (or `mimalloc-redirect32.dll`) is available
in the same folder as the main `mimalloc-override.dll` at runtime (as it is a dependency).
The redirection DLL ensures that all calls to the C runtime malloc API get redirected to
mimalloc (in `mimalloc-override.dll`).
To ensure the mimalloc DLL is loaded at run-time it is easiest to insert some
call to the mimalloc API in the `main` function, like `mi_version()`
call to the mimalloc API in the `main` function, like `mi_version()`
(or use the `/INCLUDE:mi_version` switch on the linker). See the `mimalloc-override-test` project
for an example on how to use this.
for an example on how to use this. For best performance on Windows with C++, it
is highly recommended to also override the `new`/`delete` operations (as described
in the introduction).
The environment variable `MIMALLOC_DISABLE_REDIRECT=1` can be used to disable dynamic
overriding at run-time. Use `MIMALLOC_VERBOSE=1` to check if mimalloc successfully redirected.
overriding at run-time. Use `MIMALLOC_VERBOSE=1` to check if mimalloc was successfully redirected.
(Note: in principle, it should be possible to patch existing executables
that are linked with the dynamic C runtime (`ucrtbase.dll`) by just putting the mimalloc DLL into
the import table (and putting `mimalloc-redirect.dll` in the same folder)
Such patching can be done for example with [CFF Explorer](https://ntcore.com/?page_id=388)).
(Note: in principle, it is possible to patch existing executables
that are linked with the dynamic C runtime (`ucrtbase.dll`) by just putting the `mimalloc-override.dll` into the import table (and putting `mimalloc-redirect.dll` in the same folder)
Such patching can be done for example with [CFF Explorer](https://ntcore.com/?page_id=388)).
## Static override
@ -282,6 +288,12 @@ object file. For example:
> gcc -o myprogram mimalloc-override.o myfile1.c ...
```
Another way to override statically that works on all platforms, is to
link statically to mimalloc (as shown in the introduction) and include a
header file in each source file that re-defines `malloc` etc. to `mi_malloc`.
This is provided by [`mimalloc-override.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-override.h). This only works reliably though if all sources are
under your control or otherwise mixing of pointers from different heaps may occur!
# Performance

View File

@ -157,7 +157,7 @@ static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, con
}
static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) {
mi_block_t* n = mi_block_nextx(page->cookie, block); // pretend it is freed, and get the decoded first field
mi_block_t* n = mi_block_nextx(page, block, page->cookie); // pretend it is freed, and get the decoded first field
if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer?
(n==NULL || mi_is_in_same_segment(block, n))) // quick check: in same segment or NULL?
{
@ -235,14 +235,14 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc
}
else {
// racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`)
mi_heap_t* heap = page->heap;
mi_heap_t* heap = (mi_heap_t*)mi_atomic_read_ptr(mi_atomic_cast(void*, &page->heap));
mi_assert_internal(heap != NULL);
if (heap != NULL) {
// add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity)
mi_block_t* dfree;
do {
dfree = (mi_block_t*)heap->thread_delayed_free;
mi_block_set_nextx(heap->cookie,block,dfree);
mi_block_set_nextx(heap,block,dfree, heap->cookie);
} while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), block, dfree));
}

View File

@ -223,7 +223,7 @@ static void mi_heap_free(mi_heap_t* heap) {
// reset default
if (mi_heap_is_default(heap)) {
_mi_heap_default = heap->tld->heap_backing;
_mi_heap_set_default_direct(heap->tld->heap_backing);
}
// and free the used memory
mi_free(heap);
@ -354,8 +354,8 @@ mi_heap_t* mi_heap_set_default(mi_heap_t* heap) {
mi_assert(mi_heap_is_initialized(heap));
if (!mi_heap_is_initialized(heap)) return NULL;
mi_assert_expensive(mi_heap_is_valid(heap));
mi_heap_t* old = _mi_heap_default;
_mi_heap_default = heap;
mi_heap_t* old = mi_get_default_heap();
_mi_heap_set_default_direct(heap);
return old;
}

View File

@ -90,6 +90,7 @@ const mi_heap_t _mi_heap_empty = {
false
};
// the thread-local default heap for allocation
mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty;
@ -198,8 +199,8 @@ static bool _mi_heap_init(void) {
if (mi_heap_is_initialized(_mi_heap_default)) return true;
if (_mi_is_main_thread()) {
// the main heap is statically allocated
_mi_heap_default = &_mi_heap_main;
mi_assert_internal(_mi_heap_default->tld->heap_backing == _mi_heap_default);
_mi_heap_set_default_direct(&_mi_heap_main);
mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_get_default_heap());
}
else {
// use `_mi_os_alloc` to allocate directly from the OS
@ -219,18 +220,17 @@ static bool _mi_heap_init(void) {
tld->heap_backing = heap;
tld->segments.stats = &tld->stats;
tld->os.stats = &tld->stats;
_mi_heap_default = heap;
_mi_heap_set_default_direct(heap);
}
return false;
}
// Free the thread local default heap (called from `mi_thread_done`)
static bool _mi_heap_done(void) {
mi_heap_t* heap = _mi_heap_default;
static bool _mi_heap_done(mi_heap_t* heap) {
if (!mi_heap_is_initialized(heap)) return true;
// reset default heap
_mi_heap_default = (_mi_is_main_thread() ? &_mi_heap_main : (mi_heap_t*)&_mi_heap_empty);
_mi_heap_set_default_direct(_mi_is_main_thread() ? &_mi_heap_main : (mi_heap_t*)&_mi_heap_empty);
// todo: delete all non-backing heaps?
@ -277,6 +277,8 @@ static bool _mi_heap_done(void) {
// to set up the thread local keys.
// --------------------------------------------------------
static void _mi_thread_done(mi_heap_t* default_heap);
#ifdef __wasi__
// no pthreads in the WebAssembly Standard Interface
#elif !defined(_WIN32)
@ -291,14 +293,14 @@ static bool _mi_heap_done(void) {
#include <fibersapi.h>
static DWORD mi_fls_key;
static void NTAPI mi_fls_done(PVOID value) {
if (value!=NULL) mi_thread_done();
if (value!=NULL) _mi_thread_done((mi_heap_t*)value);
}
#elif defined(MI_USE_PTHREADS)
// use pthread locol storage keys to detect thread ending
#include <pthread.h>
static pthread_key_t mi_pthread_key;
static void mi_pthread_done(void* value) {
if (value!=NULL) mi_thread_done();
if (value!=NULL) _mi_thread_done((mi_heap_t*)value);
}
#elif defined(__wasi__)
// no pthreads in the WebAssembly Standard Interface
@ -332,6 +334,8 @@ void mi_thread_init(void) mi_attr_noexcept
mi_process_init();
// initialize the thread local default heap
// (this will call `_mi_heap_set_default_direct` and thus set the
// fiber/pthread key to a non-zero value, ensuring `_mi_thread_done` is called)
if (_mi_heap_init()) return; // returns true if already initialized
// don't further initialize for the main thread
@ -339,33 +343,38 @@ void mi_thread_init(void) mi_attr_noexcept
_mi_stat_increase(&mi_get_default_heap()->tld->stats.threads, 1);
// set hooks so our mi_thread_done() will be called
#if defined(_WIN32) && defined(MI_SHARED_LIB)
// nothing to do as it is done in DllMain
#elif defined(_WIN32) && !defined(MI_SHARED_LIB)
FlsSetValue(mi_fls_key, (void*)(_mi_thread_id()|1)); // set to a dummy value so that `mi_fls_done` is called
#elif defined(MI_USE_PTHREADS)
pthread_setspecific(mi_pthread_key, (void*)(_mi_thread_id()|1)); // set to a dummy value so that `mi_pthread_done` is called
#endif
//_mi_verbose_message("thread init: 0x%zx\n", _mi_thread_id());
}
void mi_thread_done(void) mi_attr_noexcept {
_mi_thread_done(mi_get_default_heap());
}
static void _mi_thread_done(mi_heap_t* heap) {
// stats
mi_heap_t* heap = mi_get_default_heap();
if (!_mi_is_main_thread() && mi_heap_is_initialized(heap)) {
_mi_stat_decrease(&heap->tld->stats.threads, 1);
}
// abandon the thread local heap
if (_mi_heap_done()) return; // returns true if already ran
//if (!_mi_is_main_thread()) {
// _mi_verbose_message("thread done: 0x%zx\n", _mi_thread_id());
//}
if (_mi_heap_done(heap)) return; // returns true if already ran
}
void _mi_heap_set_default_direct(mi_heap_t* heap) {
mi_assert_internal(heap != NULL);
_mi_heap_default = heap;
// ensure the default heap is passed to `_mi_thread_done`
// setting to a non-NULL value also ensures `mi_thread_done` is called.
#if defined(_WIN32) && defined(MI_SHARED_LIB)
// nothing to do as it is done in DllMain
#elif defined(_WIN32) && !defined(MI_SHARED_LIB)
FlsSetValue(mi_fls_key, heap);
#elif defined(MI_USE_PTHREADS)
pthread_setspecific(mi_pthread_key, heap);
#endif
}
// --------------------------------------------------------
// Run functions on process init/done, and thread init/done
@ -446,7 +455,7 @@ void mi_process_init(void) mi_attr_noexcept {
// access _mi_heap_default before setting _mi_process_is_initialized to ensure
// that the TLS slot is allocated without getting into recursion on macOS
// when using dynamic linking with interpose.
mi_heap_t* h = _mi_heap_default;
mi_heap_t* h = mi_get_default_heap();
_mi_process_is_initialized = true;
_mi_heap_main.thread_id = _mi_thread_id();
@ -461,6 +470,7 @@ void mi_process_init(void) mi_attr_noexcept {
#if (MI_DEBUG)
_mi_verbose_message("debug level : %d\n", MI_DEBUG);
#endif
_mi_verbose_message("secure level: %d\n", MI_SECURE);
mi_thread_init();
mi_stats_reset(); // only call stat reset *after* thread init (or the heap tld == NULL)
}

View File

@ -14,6 +14,10 @@ terms of the MIT license. A copy of the license can be found in the file
#include <ctype.h> // toupper
#include <stdarg.h>
static uintptr_t mi_max_error_count = 16; // stop outputting errors after this
static void mi_add_stderr_output();
int mi_version(void) mi_attr_noexcept {
return MI_MALLOC_VERSION;
}
@ -66,13 +70,16 @@ static mi_option_desc_t options[_mi_option_last] =
{ 0, UNINIT, MI_OPTION(reset_decommits) }, // note: cannot enable this if secure is on
{ 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed
{ 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit)
{ 100, UNINIT, MI_OPTION(os_tag) } // only apple specific for now but might serve more or less related purpose
{ 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose
{ 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output
};
static void mi_option_init(mi_option_desc_t* desc);
void _mi_options_init(void) {
// called on process load
// called on process load; should not be called before the CRT is initialized!
// (e.g. do not call this from process_init as that may run before CRT initialization)
mi_add_stderr_output(); // now it safe to use stderr for output
for(int i = 0; i < _mi_option_last; i++ ) {
mi_option_t option = (mi_option_t)i;
mi_option_get(option); // initialize
@ -81,6 +88,7 @@ void _mi_options_init(void) {
_mi_verbose_message("option '%s': %ld\n", desc->name, desc->value);
}
}
mi_max_error_count = mi_option_get(mi_option_max_errors);
}
long mi_option_get(mi_option_t option) {
@ -134,7 +142,7 @@ static void mi_out_stderr(const char* msg) {
#ifdef _WIN32
// on windows with redirection, the C runtime cannot handle locale dependent output
// after the main thread closes so we use direct console output.
_cputs(msg);
if (!_mi_preloading()) { _cputs(msg); }
#else
fputs(msg, stderr);
#endif
@ -165,23 +173,29 @@ static void mi_out_buf(const char* msg) {
memcpy(&out_buf[start], msg, n);
}
static void mi_out_buf_flush(mi_output_fun* out) {
static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf) {
if (out==NULL) return;
// claim all (no more output will be added after this point)
size_t count = mi_atomic_addu(&out_len, MI_MAX_DELAY_OUTPUT);
// claim (if `no_more_buf == true`, no more output will be added after this point)
size_t count = mi_atomic_addu(&out_len, (no_more_buf ? MI_MAX_DELAY_OUTPUT : 1));
// and output the current contents
if (count>MI_MAX_DELAY_OUTPUT) count = MI_MAX_DELAY_OUTPUT;
out_buf[count] = 0;
out(out_buf);
if (!no_more_buf) {
out_buf[count] = '\n'; // if continue with the buffer, insert a newline
}
}
// The initial default output, outputs to stderr and the delayed output buffer.
// Once this module is loaded, switch to this routine
// which outputs to stderr and the delayed output buffer.
static void mi_out_buf_stderr(const char* msg) {
mi_out_stderr(msg);
mi_out_buf(msg);
}
// --------------------------------------------------------
// Default output handler
// --------------------------------------------------------
@ -193,14 +207,19 @@ static mi_output_fun* volatile mi_out_default; // = NULL
static mi_output_fun* mi_out_get_default(void) {
mi_output_fun* out = mi_out_default;
return (out == NULL ? &mi_out_buf_stderr : out);
return (out == NULL ? &mi_out_buf : out);
}
void mi_register_output(mi_output_fun* out) mi_attr_noexcept {
mi_out_default = (out == NULL ? &mi_out_stderr : out); // stop using the delayed output buffer
if (out!=NULL) mi_out_buf_flush(out); // output the delayed output now
if (out!=NULL) mi_out_buf_flush(out,true); // output all the delayed output now
}
// add stderr to the delayed output after the module is loaded
static void mi_add_stderr_output() {
mi_out_buf_flush(&mi_out_stderr, false); // flush current contents to stderr
mi_out_default = &mi_out_buf_stderr; // and add stderr to the delayed output
}
// --------------------------------------------------------
// Messages, all end up calling `_mi_fputs`.
@ -213,7 +232,7 @@ static volatile _Atomic(uintptr_t) error_count; // = 0; // when MAX_ERROR_COUNT
static mi_decl_thread bool recurse = false;
void _mi_fputs(mi_output_fun* out, const char* prefix, const char* message) {
if (_mi_preloading() || recurse) return;
if (recurse) return;
if (out==NULL || (FILE*)out==stdout || (FILE*)out==stderr) out = mi_out_get_default();
recurse = true;
if (prefix != NULL) out(prefix);
@ -227,7 +246,7 @@ void _mi_fputs(mi_output_fun* out, const char* prefix, const char* message) {
static void mi_vfprintf( mi_output_fun* out, const char* prefix, const char* fmt, va_list args ) {
char buf[512];
if (fmt==NULL) return;
if (_mi_preloading() || recurse) return;
if (recurse) return;
recurse = true;
vsnprintf(buf,sizeof(buf)-1,fmt,args);
recurse = false;
@ -260,7 +279,7 @@ void _mi_verbose_message(const char* fmt, ...) {
void _mi_error_message(const char* fmt, ...) {
if (!mi_option_is_enabled(mi_option_show_errors) && !mi_option_is_enabled(mi_option_verbose)) return;
if (mi_atomic_increment(&error_count) > MAX_ERROR_COUNT) return;
if (mi_atomic_increment(&error_count) > mi_max_error_count) return;
va_list args;
va_start(args,fmt);
mi_vfprintf(NULL, "mimalloc: error: ", fmt, args);
@ -270,7 +289,7 @@ void _mi_error_message(const char* fmt, ...) {
void _mi_warning_message(const char* fmt, ...) {
if (!mi_option_is_enabled(mi_option_show_errors) && !mi_option_is_enabled(mi_option_verbose)) return;
if (mi_atomic_increment(&error_count) > MAX_ERROR_COUNT) return;
if (mi_atomic_increment(&error_count) > mi_max_error_count) return;
va_list args;
va_start(args,fmt);
mi_vfprintf(NULL, "mimalloc: warning: ", fmt, args);

View File

@ -260,7 +260,7 @@ static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) {
page->heap->page_count--;
page->next = NULL;
page->prev = NULL;
page->heap = NULL;
mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL);
mi_page_set_in_full(page,false);
}
@ -274,7 +274,7 @@ static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_
(mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
mi_page_set_in_full(page, mi_page_queue_is_full(queue));
page->heap = heap;
mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), heap);
page->next = queue->first;
page->prev = NULL;
if (queue->first != NULL) {
@ -338,7 +338,7 @@ size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue
// set append pages to new heap and count
size_t count = 0;
for (mi_page_t* page = append->first; page != NULL; page = page->next) {
page->heap = heap;
mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), heap);
count++;
}

View File

@ -283,7 +283,7 @@ void _mi_heap_delayed_free(mi_heap_t* heap) {
// and free them all
while(block != NULL) {
mi_block_t* next = mi_block_nextx(heap->cookie,block);
mi_block_t* next = mi_block_nextx(heap,block, heap->cookie);
// use internal free instead of regular one to keep stats etc correct
if (!_mi_free_delayed_block(block)) {
// we might already start delayed freeing while another thread has not yet
@ -291,7 +291,7 @@ void _mi_heap_delayed_free(mi_heap_t* heap) {
mi_block_t* dfree;
do {
dfree = (mi_block_t*)heap->thread_delayed_free;
mi_block_set_nextx(heap->cookie, block, dfree);
mi_block_set_nextx(heap, block, dfree, heap->cookie);
} while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), block, dfree));
}
@ -343,18 +343,24 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) {
mi_assert_internal(pq == mi_page_queue_of(page));
mi_assert_internal(page->heap != NULL);
_mi_page_use_delayed_free(page,MI_NEVER_DELAYED_FREE);
#if MI_DEBUG > 1
mi_heap_t* pheap = (mi_heap_t*)mi_atomic_read_ptr(mi_atomic_cast(void*, &page->heap));
#endif
// remove from our page list
mi_segments_tld_t* segments_tld = &page->heap->tld->segments;
mi_page_queue_remove(pq, page);
// page is no longer associated with our heap
mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL);
#if MI_DEBUG>1
// check there are no references left..
for (mi_block_t* block = (mi_block_t*)page->heap->thread_delayed_free; block != NULL; block = mi_block_nextx(page->heap->cookie,block)) {
for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->cookie)) {
mi_assert_internal(_mi_ptr_page(block) != page);
}
#endif
// and then remove from our page list
mi_segments_tld_t* segments_tld = &page->heap->tld->segments;
mi_page_queue_remove(pq, page);
// and abandon it
mi_assert_internal(page->heap == NULL);
_mi_segment_page_abandon(page,segments_tld);
@ -439,13 +445,15 @@ void _mi_page_retire(mi_page_t* page) {
#define MI_MAX_SLICES (1UL << MI_MAX_SLICE_SHIFT)
#define MI_MIN_SLICES (2)
static void mi_page_free_list_extend_secure(mi_heap_t* heap, mi_page_t* page, size_t extend, mi_stats_t* stats) {
static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* const page, const size_t extend, mi_stats_t* const stats) {
UNUSED(stats);
#if (MI_SECURE<=2)
mi_assert_internal(page->free == NULL);
mi_assert_internal(page->local_free == NULL);
#endif
mi_assert_internal(page->capacity + extend <= page->reserved);
void* page_area = _mi_page_start(_mi_page_segment(page), page, NULL);
size_t bsize = page->block_size;
void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL);
const size_t bsize = page->block_size;
// initialize a randomized free list
// set up `slice_count` slices to alternate between
@ -453,8 +461,8 @@ static void mi_page_free_list_extend_secure(mi_heap_t* heap, mi_page_t* page, si
while ((extend >> shift) == 0) {
shift--;
}
size_t slice_count = (size_t)1U << shift;
size_t slice_extend = extend / slice_count;
const size_t slice_count = (size_t)1U << shift;
const size_t slice_extend = extend / slice_count;
mi_assert_internal(slice_extend >= 1);
mi_block_t* blocks[MI_MAX_SLICES]; // current start of the slice
size_t counts[MI_MAX_SLICES]; // available objects in the slice
@ -468,12 +476,12 @@ static void mi_page_free_list_extend_secure(mi_heap_t* heap, mi_page_t* page, si
// set up first element
size_t current = _mi_heap_random(heap) % slice_count;
counts[current]--;
page->free = blocks[current];
mi_block_t* const free_start = blocks[current];
// and iterate through the rest
uintptr_t rnd = heap->random;
for (size_t i = 1; i < extend; i++) {
// call random_shuffle only every INTPTR_SIZE rounds
size_t round = i%MI_INTPTR_SIZE;
const size_t round = i%MI_INTPTR_SIZE;
if (round == 0) rnd = _mi_random_shuffle(rnd);
// select a random next slice index
size_t next = ((rnd >> 8*round) & (slice_count-1));
@ -483,34 +491,39 @@ static void mi_page_free_list_extend_secure(mi_heap_t* heap, mi_page_t* page, si
}
// and link the current block to it
counts[next]--;
mi_block_t* block = blocks[current];
mi_block_t* const block = blocks[current];
blocks[current] = (mi_block_t*)((uint8_t*)block + bsize); // bump to the following block
mi_block_set_next(page, block, blocks[next]); // and set next; note: we may have `current == next`
current = next;
}
mi_block_set_next(page, blocks[current], NULL); // end of the list
// prepend to the free list (usually NULL)
mi_block_set_next(page, blocks[current], page->free); // end of the list
page->free = free_start;
heap->random = _mi_random_shuffle(rnd);
}
static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* page, size_t extend, mi_stats_t* stats)
static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t extend, mi_stats_t* const stats)
{
UNUSED(stats);
#if (MI_SECURE <= 2)
mi_assert_internal(page->free == NULL);
mi_assert_internal(page->local_free == NULL);
#endif
mi_assert_internal(page->capacity + extend <= page->reserved);
void* page_area = _mi_page_start(_mi_page_segment(page), page, NULL );
size_t bsize = page->block_size;
mi_block_t* start = mi_page_block_at(page, page_area, page->capacity);
void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL );
const size_t bsize = page->block_size;
mi_block_t* const start = mi_page_block_at(page, page_area, page->capacity);
// initialize a sequential free list
mi_block_t* last = mi_page_block_at(page, page_area, page->capacity + extend - 1);
mi_block_t* const last = mi_page_block_at(page, page_area, page->capacity + extend - 1);
mi_block_t* block = start;
while(block <= last) {
mi_block_t* next = (mi_block_t*)((uint8_t*)block + bsize);
mi_block_set_next(page,block,next);
block = next;
}
mi_block_set_next(page, last, NULL);
// prepend to free list (usually `NULL`)
mi_block_set_next(page, last, page->free);
page->free = start;
}
@ -532,10 +545,12 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* page, size_t e
// extra test in malloc? or cache effects?)
static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* stats) {
UNUSED(stats);
mi_assert_expensive(mi_page_is_valid_init(page));
#if (MI_SECURE<=2)
mi_assert(page->free == NULL);
mi_assert(page->local_free == NULL);
mi_assert_expensive(mi_page_is_valid_init(page));
if (page->free != NULL) return;
#endif
if (page->capacity >= page->reserved) return;
size_t page_size;
@ -746,7 +761,8 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) {
mi_assert_internal(_mi_page_segment(page)->page_kind==MI_PAGE_HUGE);
mi_assert_internal(_mi_page_segment(page)->used==1);
mi_assert_internal(_mi_page_segment(page)->thread_id==0); // abandoned, not in the huge queue
page->heap = NULL;
mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL);
if (page->block_size > MI_HUGE_OBJ_SIZE_MAX) {
_mi_stat_increase(&heap->tld->stats.giant, block_size);
_mi_stat_counter_increase(&heap->tld->stats.giant_count, 1);

View File

@ -13,7 +13,7 @@ if (NOT CMAKE_BUILD_TYPE)
endif()
# Import mimalloc (if installed)
find_package(mimalloc 1.0 REQUIRED NO_SYSTEM_ENVIRONMENT_PATH)
find_package(mimalloc 1.2 REQUIRED NO_SYSTEM_ENVIRONMENT_PATH)
message(STATUS "Found mimalloc installed at: ${MIMALLOC_TARGET_DIR}")
# overriding with a dynamic library

View File

@ -15,9 +15,9 @@ int main() {
mi_version();
// detect double frees and heap corruption
//double_free1();
//double_free2();
//corrupt_free();
// double_free1();
// double_free2();
// corrupt_free();
void* p1 = malloc(78);
void* p2 = malloc(24);

View File

@ -1,7 +1,7 @@
Testing allocators is difficult as bugs may only surface after particular
allocation patterns. The main approach to testing _mimalloc_ is therefore
to have extensive internal invariant checking (see `page_is_valid` in `page.c`
for example), which is enabled in debug mode with `-DMI_CHECK_FULL=ON`.
for example), which is enabled in debug mode with `-DMI_DEBUG_FULL=ON`.
The main testing strategy is then to run [`mimalloc-bench`][bench] using full
invariant checking to catch any potential problems over a wide range of intensive
allocation benchmarks and programs.

View File

@ -6,7 +6,8 @@ terms of the MIT license.
/* This is a stress test for the allocator, using multiple threads and
transferring objects between threads. This is not a typical workload
but uses a random linear size distribution. Do not use this test as a benchmark!
but uses a random linear size distribution. Timing can also depend on
(random) thread scheduling. Do not use this test as a benchmark!
*/
#include <stdio.h>
@ -16,17 +17,35 @@ terms of the MIT license.
#include <string.h>
#include <mimalloc.h>
// > mimalloc-test-stress [THREADS] [SCALE] [ITER]
//
// argument defaults
static int THREADS = 32; // more repeatable if THREADS <= #processors
static int N = 20; // scaling factor
static int THREADS = 32; // more repeatable if THREADS <= #processors
static int SCALE = 50; // scaling factor
static int ITER = 10; // N full iterations re-creating all threads
// static int THREADS = 8; // more repeatable if THREADS <= #processors
// static int N = 100; // scaling factor
// static int SCALE = 100; // scaling factor
static bool allow_large_objects = true; // allow very large objects?
static size_t use_one_size = 0; // use single object size of N uintptr_t?
#ifdef USE_STD_MALLOC
#define custom_malloc(s) malloc(s)
#define custom_realloc(p,s) realloc(p,s)
#define custom_free(p) free(p)
#else
#define custom_malloc(s) mi_malloc(s)
#define custom_realloc(p,s) mi_realloc(p,s)
#define custom_free(p) mi_free(p)
#endif
// transfer pointer between threads
#define TRANSFERS (1000)
static volatile void* transfer[TRANSFERS];
#if (UINTPTR_MAX != UINT32_MAX)
const uintptr_t cookie = 0xbf58476d1ce4e5b9UL;
#else
@ -39,21 +58,21 @@ typedef uintptr_t* random_t;
static uintptr_t pick(random_t r) {
uintptr_t x = *r;
#if (UINTPTR_MAX > UINT32_MAX)
// by Sebastiano Vigna, see: <http://xoshiro.di.unimi.it/splitmix64.c>
#if (UINTPTR_MAX > UINT32_MAX)
// by Sebastiano Vigna, see: <http://xoshiro.di.unimi.it/splitmix64.c>
x ^= x >> 30;
x *= 0xbf58476d1ce4e5b9UL;
x ^= x >> 27;
x *= 0x94d049bb133111ebUL;
x ^= x >> 31;
#else
// by Chris Wellons, see: <https://nullprogram.com/blog/2018/07/31/>
#else
// by Chris Wellons, see: <https://nullprogram.com/blog/2018/07/31/>
x ^= x >> 16;
x *= 0x7feb352dUL;
x ^= x >> 15;
x *= 0x846ca68bUL;
x ^= x >> 16;
#endif
#endif
*r = x;
return x;
}
@ -63,10 +82,17 @@ static bool chance(size_t perc, random_t r) {
}
static void* alloc_items(size_t items, random_t r) {
if (chance(1, r)) items *= 100; // 1% huge objects;
if (items==40) items++; // pthreads uses that size for stack increases
uintptr_t* p = (uintptr_t*)mi_malloc(items*sizeof(uintptr_t));
for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie;
if (chance(1, r)) {
if (chance(1, r) && allow_large_objects) items *= 10000; // 0.01% giant
else if (chance(10, r) && allow_large_objects) items *= 1000; // 0.1% huge
else items *= 100; // 1% large objects;
}
if (items == 40) items++; // pthreads uses that size for stack increases
if (use_one_size > 0) items = (use_one_size / sizeof(uintptr_t));
uintptr_t* p = (uintptr_t*)custom_malloc(items * sizeof(uintptr_t));
if (p != NULL) {
for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie;
}
return p;
}
@ -75,42 +101,42 @@ static void free_items(void* p) {
uintptr_t* q = (uintptr_t*)p;
uintptr_t items = (q[0] ^ cookie);
for (uintptr_t i = 0; i < items; i++) {
if ((q[i]^cookie) != items - i) {
if ((q[i] ^ cookie) != items - i) {
fprintf(stderr, "memory corruption at block %p at %zu\n", p, i);
abort();
}
}
}
mi_free(p);
custom_free(p);
}
static void stress(intptr_t tid) {
//bench_start_thread();
uintptr_t r = tid ^ 42;
const size_t max_item = 128; // in words
const size_t max_item_retained = 10*max_item;
size_t allocs = 25*N*(tid%8 + 1); // some threads do more
size_t retain = allocs/2;
uintptr_t r = tid * 43;
const size_t max_item_shift = 5; // 128
const size_t max_item_retained_shift = max_item_shift + 2;
size_t allocs = 100 * ((size_t)SCALE) * (tid % 8 + 1); // some threads do more
size_t retain = allocs / 2;
void** data = NULL;
size_t data_size = 0;
size_t data_top = 0;
void** retained = (void**)mi_malloc(retain*sizeof(void*));
void** retained = (void**)custom_malloc(retain * sizeof(void*));
size_t retain_top = 0;
while (allocs>0 || retain>0) {
while (allocs > 0 || retain > 0) {
if (retain == 0 || (chance(50, &r) && allocs > 0)) {
// 50%+ alloc
allocs--;
if (data_top >= data_size) {
data_size += 100000;
data = (void**)mi_realloc(data, data_size*sizeof(void*));
data = (void**)custom_realloc(data, data_size * sizeof(void*));
}
data[data_top++] = alloc_items((pick(&r) % max_item) + 1, &r);
data[data_top++] = alloc_items( 1ULL << (pick(&r) % max_item_shift), &r);
}
else {
// 25% retain
retained[retain_top++] = alloc_items(10*((pick(&r) % max_item_retained) + 1), &r);
retained[retain_top++] = alloc_items( 1ULL << (pick(&r) % max_item_retained_shift), &r);
retain--;
}
if (chance(66, &r) && data_top > 0) {
@ -120,7 +146,7 @@ static void stress(intptr_t tid) {
data[idx] = NULL;
}
if (chance(25, &r) && data_top > 0) {
// 25% transfer-swap
// 25% exchange a local pointer with the (shared) transfer buffer.
size_t data_idx = pick(&r) % data_top;
size_t transfer_idx = pick(&r) % TRANSFERS;
void* p = data[data_idx];
@ -135,38 +161,54 @@ static void stress(intptr_t tid) {
for (size_t i = 0; i < data_top; i++) {
free_items(data[i]);
}
mi_free(retained);
mi_free(data);
custom_free(retained);
custom_free(data);
//bench_end_thread();
}
static void run_os_threads(size_t nthreads);
int main(int argc, char** argv) {
if (argc>=2) {
// > mimalloc-test-stress [THREADS] [SCALE] [ITER]
if (argc >= 2) {
char* end;
long n = strtol(argv[1], &end, 10);
if (n > 0) THREADS = n;
}
if (argc>=3) {
if (argc >= 3) {
char* end;
long n = (strtol(argv[2], &end, 10));
if (n > 0) N = n;
if (n > 0) SCALE = n;
}
printf("start with %i threads with a %i%% load-per-thread\n", THREADS, N);
if (argc >= 4) {
char* end;
long n = (strtol(argv[3], &end, 10));
if (n > 0) ITER = n;
}
printf("start with %d threads with a %d%% load-per-thread and %d iterations\n", THREADS, SCALE, ITER);
//int res = mi_reserve_huge_os_pages(4,1);
//printf("(reserve huge: %i\n)", res);
//bench_start_program();
// Run ITER full iterations where half the objects in the transfer buffer survive to the next round.
mi_stats_reset();
memset((void*)transfer, 0, TRANSFERS*sizeof(void*));
run_os_threads(THREADS);
for (int i = 0; i < TRANSFERS; i++) {
free_items((void*)transfer[i]);
uintptr_t r = 43 * 43;
for (int n = 0; n < ITER; n++) {
run_os_threads(THREADS);
for (int i = 0; i < TRANSFERS; i++) {
if (chance(50, &r) || n + 1 == ITER) { // free all on last run, otherwise free half of the transfers
void* p = atomic_exchange_ptr(&transfer[i], NULL);
free_items(p);
}
}
mi_collect(false);
#ifndef NDEBUG
if ((n + 1) % 10 == 0) { printf("- iterations: %3d\n", n + 1); }
#endif
}
#ifndef NDEBUG
mi_collect(false);
#endif
mi_collect(true);
mi_stats_print(NULL);
//bench_end_program();
return 0;
@ -183,8 +225,8 @@ static DWORD WINAPI thread_entry(LPVOID param) {
}
static void run_os_threads(size_t nthreads) {
DWORD* tids = (DWORD*)malloc(nthreads * sizeof(DWORD));
HANDLE* thandles = (HANDLE*)malloc(nthreads * sizeof(HANDLE));
DWORD* tids = (DWORD*)custom_malloc(nthreads * sizeof(DWORD));
HANDLE* thandles = (HANDLE*)custom_malloc(nthreads * sizeof(HANDLE));
for (uintptr_t i = 0; i < nthreads; i++) {
thandles[i] = CreateThread(0, 4096, &thread_entry, (void*)(i), 0, &tids[i]);
}
@ -194,16 +236,16 @@ static void run_os_threads(size_t nthreads) {
for (size_t i = 0; i < nthreads; i++) {
CloseHandle(thandles[i]);
}
free(tids);
free(thandles);
custom_free(tids);
custom_free(thandles);
}
static void* atomic_exchange_ptr(volatile void** p, void* newval) {
#if (INTPTR_MAX == UINT32_MAX)
#if (INTPTR_MAX == UINT32_MAX)
return (void*)InterlockedExchange((volatile LONG*)p, (LONG)newval);
#else
#else
return (void*)InterlockedExchange64((volatile LONG64*)p, (LONG64)newval);
#endif
#endif
}
#else
@ -216,8 +258,8 @@ static void* thread_entry(void* param) {
}
static void run_os_threads(size_t nthreads) {
pthread_t* threads = (pthread_t*)mi_malloc(nthreads*sizeof(pthread_t));
memset(threads, 0, sizeof(pthread_t)*nthreads);
pthread_t* threads = (pthread_t*)custom_malloc(nthreads * sizeof(pthread_t));
memset(threads, 0, sizeof(pthread_t) * nthreads);
//pthread_setconcurrency(nthreads);
for (uintptr_t i = 0; i < nthreads; i++) {
pthread_create(&threads[i], NULL, &thread_entry, (void*)i);
@ -225,6 +267,7 @@ static void run_os_threads(size_t nthreads) {
for (size_t i = 0; i < nthreads; i++) {
pthread_join(threads[i], NULL);
}
custom_free(threads);
}
static void* atomic_exchange_ptr(volatile void** p, void* newval) {