improve macOS M1 performance; use interpose in combination with zone's; add -fno-builtin-malloc flag in building with MI_OVERRIDE
This commit is contained in:
parent
751a2249c8
commit
32ee13f41e
@ -12,7 +12,7 @@ option(MI_XMALLOC "Enable abort() call on memory allocation failure by
|
||||
option(MI_SHOW_ERRORS "Show error and warning messages by default (only enabled by default in DEBUG mode)" OFF)
|
||||
option(MI_USE_CXX "Use the C++ compiler to compile the library (instead of the C compiler)" OFF)
|
||||
option(MI_SEE_ASM "Generate assembly files" OFF)
|
||||
option(MI_OSX_INTERPOSE "Use interpose to override standard malloc on macOS" OFF)
|
||||
option(MI_OSX_INTERPOSE "Use interpose to override standard malloc on macOS" ON)
|
||||
option(MI_OSX_ZONE "Use malloc zone to override standard malloc on macOS" ON)
|
||||
option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanism (Unix)" OFF)
|
||||
option(MI_BUILD_SHARED "Build shared library" ON)
|
||||
@ -78,12 +78,17 @@ if(MI_OVERRIDE)
|
||||
message(STATUS " Use malloc zone to override malloc (MI_OSX_ZONE=ON)")
|
||||
list(APPEND mi_sources src/alloc-override-osx.c)
|
||||
list(APPEND mi_defines MI_OSX_ZONE=1)
|
||||
if (NOT MI_OSX_INTERPOSE)
|
||||
message(STATUS " WARNING: zone overriding usually also needs interpose (use -DMI_OSX_INTERPOSE=ON)")
|
||||
endif()
|
||||
endif()
|
||||
if(MI_OSX_INTERPOSE)
|
||||
# use interpose on macOS
|
||||
message(STATUS " Use interpose to override malloc (MI_OSX_INTERPOSE=ON)")
|
||||
message(STATUS " WARNING: interpose does not seem to work reliably on the M1; use -DMI_OSX_ZONE=ON instead")
|
||||
list(APPEND mi_defines MI_OSX_INTERPOSE)
|
||||
if (NOT MI_OSX_ZONE)
|
||||
message(STATUS " WARNING: interpose usually also needs zone overriding (use -DMI_OSX_INTERPOSE=ON)")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
@ -188,6 +193,9 @@ if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel" AND NOT CMAKE_SYSTEM
|
||||
else()
|
||||
list(APPEND mi_cflags -ftls-model=initial-exec)
|
||||
endif()
|
||||
if(MI_OVERRIDE)
|
||||
list(APPEND -fno-builtin-malloc)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (MSVC AND MSVC_VERSION GREATER_EQUAL 1914)
|
||||
@ -364,7 +372,7 @@ if (MI_BUILD_TESTS)
|
||||
target_compile_definitions(mimalloc-test-api PRIVATE ${mi_defines})
|
||||
target_compile_options(mimalloc-test-api PRIVATE ${mi_cflags})
|
||||
target_include_directories(mimalloc-test-api PRIVATE include)
|
||||
target_link_libraries(mimalloc-test-api PRIVATE mimalloc-static ${mi_libraries})
|
||||
target_link_libraries(mimalloc-test-api PRIVATE mimalloc ${mi_libraries})
|
||||
|
||||
add_executable(mimalloc-test-stress test/test-stress.c)
|
||||
target_compile_definitions(mimalloc-test-stress PRIVATE ${mi_defines})
|
||||
|
@ -17,17 +17,20 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
/* ------------------------------------------------------
|
||||
Override system malloc on macOS
|
||||
This is done through the malloc zone interface.
|
||||
It seems we also need to interpose (see `alloc-override.c`)
|
||||
or otherwise we get zone errors as there are usually
|
||||
already allocations done by the time we take over the
|
||||
zone. Unfortunately, that means we need to replace
|
||||
the `free` with a checked free (`cfree`) impacting
|
||||
performance.
|
||||
It seems to be most robust in combination with interposing
|
||||
though or otherwise we may get zone errors as there are could
|
||||
be allocations done by the time we take over the
|
||||
zone.
|
||||
------------------------------------------------------ */
|
||||
|
||||
#include <AvailabilityMacros.h>
|
||||
#include <malloc/malloc.h>
|
||||
#include <string.h> // memset
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if defined(MAC_OS_X_VERSION_10_6) && \
|
||||
MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6
|
||||
@ -41,9 +44,7 @@ extern malloc_zone_t* malloc_default_purgeable_zone(void) __attribute__((weak_im
|
||||
|
||||
static size_t zone_size(malloc_zone_t* zone, const void* p) {
|
||||
UNUSED(zone);
|
||||
if (!mi_is_in_heap_region(p))
|
||||
return 0; // not our pointer, bail out
|
||||
|
||||
//if (!mi_is_in_heap_region(p)){ return 0; } // not our pointer, bail out
|
||||
return mi_usable_size(p);
|
||||
}
|
||||
|
||||
@ -109,6 +110,11 @@ static void zone_free_definite_size(malloc_zone_t* zone, void* p, size_t size) {
|
||||
zone_free(zone,p);
|
||||
}
|
||||
|
||||
static boolean_t zone_claimed_address(malloc_zone_t* zone, void* p) {
|
||||
UNUSED(zone);
|
||||
return mi_is_in_heap_region(p);
|
||||
}
|
||||
|
||||
|
||||
/* ------------------------------------------------------
|
||||
Introspection members
|
||||
@ -174,21 +180,6 @@ static boolean_t intro_zone_locked(malloc_zone_t* zone) {
|
||||
At process start, override the default allocator
|
||||
------------------------------------------------------ */
|
||||
|
||||
static malloc_zone_t* mi_get_default_zone()
|
||||
{
|
||||
// The first returned zone is the real default
|
||||
malloc_zone_t** zones = NULL;
|
||||
unsigned count = 0;
|
||||
kern_return_t ret = malloc_get_all_zones(0, NULL, (vm_address_t**)&zones, &count);
|
||||
if (ret == KERN_SUCCESS && count > 0) {
|
||||
return zones[0];
|
||||
}
|
||||
else {
|
||||
// fallback
|
||||
return malloc_default_zone();
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(__GNUC__) && !defined(__clang__)
|
||||
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
|
||||
#endif
|
||||
@ -220,33 +211,197 @@ static malloc_zone_t mi_malloc_zone = {
|
||||
.batch_malloc = &zone_batch_malloc,
|
||||
.batch_free = &zone_batch_free,
|
||||
.introspect = &mi_introspect,
|
||||
#if defined(MAC_OS_X_VERSION_10_6) && \
|
||||
MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6
|
||||
// switch to version 9 on OSX 10.6 to support memalign.
|
||||
.version = 9,
|
||||
#if defined(MAC_OS_X_VERSION_10_6) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6
|
||||
// switch to version 9+ on OSX 10.6 to support memalign.
|
||||
.memalign = &zone_memalign,
|
||||
.free_definite_size = &zone_free_definite_size,
|
||||
.pressure_relief = &zone_pressure_relief,
|
||||
#if defined(MAC_OS_X_VERSION_10_7) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_7
|
||||
.claimed_address = &zone_claimed_address,
|
||||
.version = 10
|
||||
#else
|
||||
.version = 9
|
||||
#endif
|
||||
#else
|
||||
.version = 4,
|
||||
.version = 4
|
||||
#endif
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(MI_SHARED_LIB_EXPORT) && defined(MI_OSX_INTERPOSE)
|
||||
|
||||
static malloc_zone_t *mi_malloc_default_zone(void) {
|
||||
#if defined(MI_OSX_INTERPOSE)
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Override malloc_xxx and zone_xxx api's to use only
|
||||
// our mimalloc zone. Since even the loader uses malloc
|
||||
// on macOS, this ensures that all allocations go through
|
||||
// mimalloc (as all calls are interposed).
|
||||
// ------------------------------------------------------
|
||||
|
||||
static inline malloc_zone_t* mi_get_default_zone(void)
|
||||
{
|
||||
static bool init;
|
||||
if (mi_unlikely(!init)) {
|
||||
init = true;
|
||||
malloc_zone_register(&mi_malloc_zone); // by calling register we avoid a zone error on free (see <http://eatmyrandom.blogspot.com/2010/03/mallocfree-interception-on-mac-os-x.html>)
|
||||
}
|
||||
return &mi_malloc_zone;
|
||||
}
|
||||
// TODO: should use the macros in alloc-override but they aren't available here.
|
||||
__attribute__((used)) static struct {
|
||||
const void *replacement;
|
||||
const void *target;
|
||||
} replace_malloc_default_zone[] __attribute__((section("__DATA, __interpose"))) = {
|
||||
{ (const void*)mi_malloc_default_zone, (const void*)malloc_default_zone },
|
||||
};
|
||||
#endif
|
||||
|
||||
mi_decl_externc int malloc_jumpstart(uintptr_t cookie);
|
||||
mi_decl_externc void _malloc_fork_prepare(void);
|
||||
mi_decl_externc void _malloc_fork_parent(void);
|
||||
mi_decl_externc void _malloc_fork_child(void);
|
||||
|
||||
|
||||
static malloc_zone_t* mi_malloc_create_zone(vm_size_t size, unsigned flags) {
|
||||
UNUSED(size); UNUSED(flags);
|
||||
return mi_get_default_zone();
|
||||
}
|
||||
|
||||
static malloc_zone_t* mi_malloc_default_zone (void) {
|
||||
return mi_get_default_zone();
|
||||
}
|
||||
|
||||
static malloc_zone_t* mi_malloc_default_purgeable_zone(void) {
|
||||
return mi_get_default_zone();
|
||||
}
|
||||
|
||||
static void mi_malloc_destroy_zone(malloc_zone_t* zone) {
|
||||
UNUSED(zone);
|
||||
// nothing.
|
||||
}
|
||||
|
||||
static kern_return_t mi_malloc_get_all_zones (task_t task, memory_reader_t mr, vm_address_t** addresses, unsigned* count) {
|
||||
UNUSED(task); UNUSED(mr);
|
||||
if (addresses != NULL) *addresses = NULL;
|
||||
if (count != NULL) *count = 0;
|
||||
return KERN_SUCCESS;
|
||||
}
|
||||
|
||||
static const char* mi_malloc_get_zone_name(malloc_zone_t* zone) {
|
||||
return (zone == NULL ? mi_malloc_zone.zone_name : zone->zone_name);
|
||||
}
|
||||
|
||||
static void mi_malloc_set_zone_name(malloc_zone_t* zone, const char* name) {
|
||||
UNUSED(zone); UNUSED(name);
|
||||
}
|
||||
|
||||
static int mi_malloc_jumpstart(uintptr_t cookie) {
|
||||
UNUSED(cookie);
|
||||
return 1; // or 0 for no error?
|
||||
}
|
||||
|
||||
static void mi__malloc_fork_prepare(void) {
|
||||
// nothing
|
||||
}
|
||||
static void mi__malloc_fork_parent(void) {
|
||||
// nothing
|
||||
}
|
||||
static void mi__malloc_fork_child(void) {
|
||||
// nothing
|
||||
}
|
||||
|
||||
static void mi_malloc_printf(const char* fmt, ...) {
|
||||
UNUSED(fmt);
|
||||
}
|
||||
|
||||
static bool zone_check(malloc_zone_t* zone) {
|
||||
UNUSED(zone);
|
||||
return true;
|
||||
}
|
||||
|
||||
static malloc_zone_t* zone_from_ptr(const void* p) {
|
||||
UNUSED(p);
|
||||
return mi_get_default_zone();
|
||||
}
|
||||
|
||||
static void zone_log(malloc_zone_t* zone, void* p) {
|
||||
UNUSED(zone); UNUSED(p);
|
||||
}
|
||||
|
||||
static void zone_print(malloc_zone_t* zone, bool b) {
|
||||
UNUSED(zone); UNUSED(b);
|
||||
}
|
||||
|
||||
static void zone_print_ptr_info(void* p) {
|
||||
UNUSED(p);
|
||||
}
|
||||
|
||||
static void zone_register(malloc_zone_t* zone) {
|
||||
UNUSED(zone);
|
||||
}
|
||||
|
||||
static void zone_unregister(malloc_zone_t* zone) {
|
||||
UNUSED(zone);
|
||||
}
|
||||
|
||||
// use interposing so `DYLD_INSERT_LIBRARIES` works without `DYLD_FORCE_FLAT_NAMESPACE=1`
|
||||
// See: <https://books.google.com/books?id=K8vUkpOXhN4C&pg=PA73>
|
||||
struct mi_interpose_s {
|
||||
const void* replacement;
|
||||
const void* target;
|
||||
};
|
||||
#define MI_INTERPOSE_FUN(oldfun,newfun) { (const void*)&newfun, (const void*)&oldfun }
|
||||
#define MI_INTERPOSE_MI(fun) MI_INTERPOSE_FUN(fun,mi_##fun)
|
||||
#define MI_INTERPOSE_ZONE(fun) MI_INTERPOSE_FUN(malloc_##fun,fun)
|
||||
__attribute__((used)) static const struct mi_interpose_s _mi_zone_interposes[] __attribute__((section("__DATA, __interpose"))) =
|
||||
{
|
||||
|
||||
MI_INTERPOSE_MI(malloc_create_zone),
|
||||
MI_INTERPOSE_MI(malloc_default_purgeable_zone),
|
||||
MI_INTERPOSE_MI(malloc_default_zone),
|
||||
MI_INTERPOSE_MI(malloc_destroy_zone),
|
||||
MI_INTERPOSE_MI(malloc_get_all_zones),
|
||||
MI_INTERPOSE_MI(malloc_get_zone_name),
|
||||
MI_INTERPOSE_MI(malloc_jumpstart),
|
||||
MI_INTERPOSE_MI(malloc_printf),
|
||||
MI_INTERPOSE_MI(malloc_set_zone_name),
|
||||
MI_INTERPOSE_MI(_malloc_fork_child),
|
||||
MI_INTERPOSE_MI(_malloc_fork_parent),
|
||||
MI_INTERPOSE_MI(_malloc_fork_prepare),
|
||||
|
||||
MI_INTERPOSE_ZONE(zone_batch_free),
|
||||
MI_INTERPOSE_ZONE(zone_batch_malloc),
|
||||
MI_INTERPOSE_ZONE(zone_calloc),
|
||||
MI_INTERPOSE_ZONE(zone_check),
|
||||
MI_INTERPOSE_ZONE(zone_free),
|
||||
MI_INTERPOSE_ZONE(zone_from_ptr),
|
||||
MI_INTERPOSE_ZONE(zone_log),
|
||||
MI_INTERPOSE_ZONE(zone_malloc),
|
||||
MI_INTERPOSE_ZONE(zone_memalign),
|
||||
MI_INTERPOSE_ZONE(zone_print),
|
||||
MI_INTERPOSE_ZONE(zone_print_ptr_info),
|
||||
MI_INTERPOSE_ZONE(zone_realloc),
|
||||
MI_INTERPOSE_ZONE(zone_register),
|
||||
MI_INTERPOSE_ZONE(zone_unregister),
|
||||
MI_INTERPOSE_ZONE(zone_valloc)
|
||||
};
|
||||
|
||||
|
||||
#else
|
||||
|
||||
// ------------------------------------------------------
|
||||
// hook into the zone api's without interposing
|
||||
// ------------------------------------------------------
|
||||
|
||||
static inline malloc_zone_t* mi_get_default_zone(void)
|
||||
{
|
||||
// The first returned zone is the real default
|
||||
malloc_zone_t** zones = NULL;
|
||||
unsigned count = 0;
|
||||
kern_return_t ret = malloc_get_all_zones(0, NULL, (vm_address_t**)&zones, &count);
|
||||
if (ret == KERN_SUCCESS && count > 0) {
|
||||
return zones[0];
|
||||
}
|
||||
else {
|
||||
// fallback
|
||||
return malloc_default_zone();
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(__clang__)
|
||||
__attribute__((constructor(0)))
|
||||
@ -287,5 +442,6 @@ static void _mi_macos_override_malloc() {
|
||||
}
|
||||
|
||||
}
|
||||
#endif // MI_OSX_INTERPOSE
|
||||
|
||||
#endif // MI_MALLOC_OVERRIDE
|
||||
|
@ -13,7 +13,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
#error "It is only possible to override "malloc" on Windows when building as a DLL (and linking the C runtime as a DLL)"
|
||||
#endif
|
||||
|
||||
#if defined(MI_MALLOC_OVERRIDE) && !(defined(_WIN32)) // || (defined(__APPLE__) && !defined(MI_OSX_INTERPOSE)))
|
||||
#if defined(MI_MALLOC_OVERRIDE) && !(defined(_WIN32))
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Override system malloc
|
||||
@ -41,7 +41,12 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
#define MI_FORWARD02(fun,x,y) { fun(x,y); }
|
||||
#endif
|
||||
|
||||
#if defined(__APPLE__) && defined(MI_SHARED_LIB_EXPORT) && defined(MI_OSX_INTERPOSE)
|
||||
#if defined(__APPLE__) && defined(MI_SHARED_LIB_EXPORT) && defined(MI_OSX_INTERPOSE)
|
||||
#include <malloc/malloc.h>
|
||||
mi_decl_externc void vfree(void* p);
|
||||
mi_decl_externc size_t malloc_size(const void* p);
|
||||
mi_decl_externc size_t malloc_good_size(size_t size);
|
||||
|
||||
// use interposing so `DYLD_INSERT_LIBRARIES` works without `DYLD_FORCE_FLAT_NAMESPACE=1`
|
||||
// See: <https://books.google.com/books?id=K8vUkpOXhN4C&pg=PA73>
|
||||
struct mi_interpose_s {
|
||||
@ -50,6 +55,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
};
|
||||
#define MI_INTERPOSE_FUN(oldfun,newfun) { (const void*)&newfun, (const void*)&oldfun }
|
||||
#define MI_INTERPOSE_MI(fun) MI_INTERPOSE_FUN(fun,mi_##fun)
|
||||
|
||||
__attribute__((used)) static struct mi_interpose_s _mi_interposes[] __attribute__((section("__DATA, __interpose"))) =
|
||||
{
|
||||
MI_INTERPOSE_MI(malloc),
|
||||
@ -61,21 +67,24 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
MI_INTERPOSE_MI(posix_memalign),
|
||||
MI_INTERPOSE_MI(reallocf),
|
||||
MI_INTERPOSE_MI(valloc),
|
||||
MI_INTERPOSE_MI(malloc_size),
|
||||
MI_INTERPOSE_MI(malloc_good_size),
|
||||
MI_INTERPOSE_MI(aligned_alloc),
|
||||
#ifndef MI_OSX_ZONE
|
||||
// some code allocates from default zone but deallocates using plain free :-( (like NxHashResizeToCapacity <https://github.com/nneonneo/osx-10.9-opensource/blob/master/objc4-551.1/runtime/hashtable2.mm>)
|
||||
// sometimes code allocates from default zone but deallocates using plain free :-( (like NxHashResizeToCapacity <https://github.com/nneonneo/osx-10.9-opensource/blob/master/objc4-551.1/runtime/hashtable2.mm>)
|
||||
MI_INTERPOSE_FUN(free,mi_cfree), // use safe free that checks if pointers are from us
|
||||
MI_INTERPOSE_FUN(vfree,mi_cfree),
|
||||
#else
|
||||
// We interpose malloc_default_zone in alloc-override-osx.c
|
||||
// we interpose malloc_default_zone in alloc-override-osx.c so we can use mi_free safely
|
||||
MI_INTERPOSE_MI(free),
|
||||
MI_INTERPOSE_FUN(vfree,mi_free),
|
||||
#endif
|
||||
// some code allocates from a zone but deallocates using plain free :-( (like NxHashResizeToCapacity <https://github.com/nneonneo/osx-10.9-opensource/blob/master/objc4-551.1/runtime/hashtable2.mm>)
|
||||
MI_INTERPOSE_FUN(free,mi_cfree), // use safe free that checks if pointers are from us
|
||||
};
|
||||
#elif defined(_MSC_VER)
|
||||
// cannot override malloc unless using a dll.
|
||||
// we just override new/delete which does work in a static library.
|
||||
#else
|
||||
// On all other systems forward to our API
|
||||
// On all other systems forward to our API
|
||||
void* malloc(size_t size) MI_FORWARD1(mi_malloc, size)
|
||||
void* calloc(size_t size, size_t n) MI_FORWARD2(mi_calloc, size, n)
|
||||
void* realloc(void* p, size_t newsize) MI_FORWARD2(mi_realloc, p, newsize)
|
||||
@ -123,7 +132,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
void* operator new[](std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept { return mi_new_aligned_nothrow(n, static_cast<size_t>(al)); }
|
||||
#endif
|
||||
|
||||
#elif (defined(__GNUC__) || defined(__clang__))
|
||||
#elif (defined(__GNUC__) || defined(__clang__)) && !defined(MI_OSX_ZONE)
|
||||
// ------------------------------------------------------
|
||||
// Override by defining the mangled C++ names of the operators (as
|
||||
// used by GCC and CLang).
|
||||
|
Loading…
Reference in New Issue
Block a user