improve macOS M1 performance; use interpose in combination with zone's; add -fno-builtin-malloc flag in building with MI_OVERRIDE

This commit is contained in:
Daan 2021-11-02 21:54:44 -07:00
parent 751a2249c8
commit 32ee13f41e
3 changed files with 223 additions and 50 deletions

View File

@ -12,7 +12,7 @@ option(MI_XMALLOC "Enable abort() call on memory allocation failure by
option(MI_SHOW_ERRORS "Show error and warning messages by default (only enabled by default in DEBUG mode)" OFF)
option(MI_USE_CXX "Use the C++ compiler to compile the library (instead of the C compiler)" OFF)
option(MI_SEE_ASM "Generate assembly files" OFF)
option(MI_OSX_INTERPOSE "Use interpose to override standard malloc on macOS" OFF)
option(MI_OSX_INTERPOSE "Use interpose to override standard malloc on macOS" ON)
option(MI_OSX_ZONE "Use malloc zone to override standard malloc on macOS" ON)
option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanism (Unix)" OFF)
option(MI_BUILD_SHARED "Build shared library" ON)
@ -78,12 +78,17 @@ if(MI_OVERRIDE)
message(STATUS " Use malloc zone to override malloc (MI_OSX_ZONE=ON)")
list(APPEND mi_sources src/alloc-override-osx.c)
list(APPEND mi_defines MI_OSX_ZONE=1)
if (NOT MI_OSX_INTERPOSE)
message(STATUS " WARNING: zone overriding usually also needs interpose (use -DMI_OSX_INTERPOSE=ON)")
endif()
endif()
if(MI_OSX_INTERPOSE)
# use interpose on macOS
message(STATUS " Use interpose to override malloc (MI_OSX_INTERPOSE=ON)")
message(STATUS " WARNING: interpose does not seem to work reliably on the M1; use -DMI_OSX_ZONE=ON instead")
list(APPEND mi_defines MI_OSX_INTERPOSE)
if (NOT MI_OSX_ZONE)
message(STATUS " WARNING: interpose usually also needs zone overriding (use -DMI_OSX_INTERPOSE=ON)")
endif()
endif()
endif()
endif()
@ -188,6 +193,9 @@ if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel" AND NOT CMAKE_SYSTEM
else()
list(APPEND mi_cflags -ftls-model=initial-exec)
endif()
if(MI_OVERRIDE)
list(APPEND -fno-builtin-malloc)
endif()
endif()
if (MSVC AND MSVC_VERSION GREATER_EQUAL 1914)
@ -364,7 +372,7 @@ if (MI_BUILD_TESTS)
target_compile_definitions(mimalloc-test-api PRIVATE ${mi_defines})
target_compile_options(mimalloc-test-api PRIVATE ${mi_cflags})
target_include_directories(mimalloc-test-api PRIVATE include)
target_link_libraries(mimalloc-test-api PRIVATE mimalloc-static ${mi_libraries})
target_link_libraries(mimalloc-test-api PRIVATE mimalloc ${mi_libraries})
add_executable(mimalloc-test-stress test/test-stress.c)
target_compile_definitions(mimalloc-test-stress PRIVATE ${mi_defines})

View File

@ -17,17 +17,20 @@ terms of the MIT license. A copy of the license can be found in the file
/* ------------------------------------------------------
Override system malloc on macOS
This is done through the malloc zone interface.
It seems we also need to interpose (see `alloc-override.c`)
or otherwise we get zone errors as there are usually
already allocations done by the time we take over the
zone. Unfortunately, that means we need to replace
the `free` with a checked free (`cfree`) impacting
performance.
It seems to be most robust in combination with interposing
though or otherwise we may get zone errors as there are could
be allocations done by the time we take over the
zone.
------------------------------------------------------ */
#include <AvailabilityMacros.h>
#include <malloc/malloc.h>
#include <string.h> // memset
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
#if defined(MAC_OS_X_VERSION_10_6) && \
MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6
@ -41,9 +44,7 @@ extern malloc_zone_t* malloc_default_purgeable_zone(void) __attribute__((weak_im
static size_t zone_size(malloc_zone_t* zone, const void* p) {
UNUSED(zone);
if (!mi_is_in_heap_region(p))
return 0; // not our pointer, bail out
//if (!mi_is_in_heap_region(p)){ return 0; } // not our pointer, bail out
return mi_usable_size(p);
}
@ -109,6 +110,11 @@ static void zone_free_definite_size(malloc_zone_t* zone, void* p, size_t size) {
zone_free(zone,p);
}
static boolean_t zone_claimed_address(malloc_zone_t* zone, void* p) {
UNUSED(zone);
return mi_is_in_heap_region(p);
}
/* ------------------------------------------------------
Introspection members
@ -174,21 +180,6 @@ static boolean_t intro_zone_locked(malloc_zone_t* zone) {
At process start, override the default allocator
------------------------------------------------------ */
static malloc_zone_t* mi_get_default_zone()
{
// The first returned zone is the real default
malloc_zone_t** zones = NULL;
unsigned count = 0;
kern_return_t ret = malloc_get_all_zones(0, NULL, (vm_address_t**)&zones, &count);
if (ret == KERN_SUCCESS && count > 0) {
return zones[0];
}
else {
// fallback
return malloc_default_zone();
}
}
#if defined(__GNUC__) && !defined(__clang__)
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
#endif
@ -220,33 +211,197 @@ static malloc_zone_t mi_malloc_zone = {
.batch_malloc = &zone_batch_malloc,
.batch_free = &zone_batch_free,
.introspect = &mi_introspect,
#if defined(MAC_OS_X_VERSION_10_6) && \
MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6
// switch to version 9 on OSX 10.6 to support memalign.
.version = 9,
#if defined(MAC_OS_X_VERSION_10_6) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6
// switch to version 9+ on OSX 10.6 to support memalign.
.memalign = &zone_memalign,
.free_definite_size = &zone_free_definite_size,
.pressure_relief = &zone_pressure_relief,
#if defined(MAC_OS_X_VERSION_10_7) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_7
.claimed_address = &zone_claimed_address,
.version = 10
#else
.version = 9
#endif
#else
.version = 4,
.version = 4
#endif
};
#ifdef __cplusplus
}
#endif
#if defined(MI_SHARED_LIB_EXPORT) && defined(MI_OSX_INTERPOSE)
static malloc_zone_t *mi_malloc_default_zone(void) {
#if defined(MI_OSX_INTERPOSE)
// ------------------------------------------------------
// Override malloc_xxx and zone_xxx api's to use only
// our mimalloc zone. Since even the loader uses malloc
// on macOS, this ensures that all allocations go through
// mimalloc (as all calls are interposed).
// ------------------------------------------------------
static inline malloc_zone_t* mi_get_default_zone(void)
{
static bool init;
if (mi_unlikely(!init)) {
init = true;
malloc_zone_register(&mi_malloc_zone); // by calling register we avoid a zone error on free (see <http://eatmyrandom.blogspot.com/2010/03/mallocfree-interception-on-mac-os-x.html>)
}
return &mi_malloc_zone;
}
// TODO: should use the macros in alloc-override but they aren't available here.
__attribute__((used)) static struct {
const void *replacement;
const void *target;
} replace_malloc_default_zone[] __attribute__((section("__DATA, __interpose"))) = {
{ (const void*)mi_malloc_default_zone, (const void*)malloc_default_zone },
};
#endif
mi_decl_externc int malloc_jumpstart(uintptr_t cookie);
mi_decl_externc void _malloc_fork_prepare(void);
mi_decl_externc void _malloc_fork_parent(void);
mi_decl_externc void _malloc_fork_child(void);
static malloc_zone_t* mi_malloc_create_zone(vm_size_t size, unsigned flags) {
UNUSED(size); UNUSED(flags);
return mi_get_default_zone();
}
static malloc_zone_t* mi_malloc_default_zone (void) {
return mi_get_default_zone();
}
static malloc_zone_t* mi_malloc_default_purgeable_zone(void) {
return mi_get_default_zone();
}
static void mi_malloc_destroy_zone(malloc_zone_t* zone) {
UNUSED(zone);
// nothing.
}
static kern_return_t mi_malloc_get_all_zones (task_t task, memory_reader_t mr, vm_address_t** addresses, unsigned* count) {
UNUSED(task); UNUSED(mr);
if (addresses != NULL) *addresses = NULL;
if (count != NULL) *count = 0;
return KERN_SUCCESS;
}
static const char* mi_malloc_get_zone_name(malloc_zone_t* zone) {
return (zone == NULL ? mi_malloc_zone.zone_name : zone->zone_name);
}
static void mi_malloc_set_zone_name(malloc_zone_t* zone, const char* name) {
UNUSED(zone); UNUSED(name);
}
static int mi_malloc_jumpstart(uintptr_t cookie) {
UNUSED(cookie);
return 1; // or 0 for no error?
}
static void mi__malloc_fork_prepare(void) {
// nothing
}
static void mi__malloc_fork_parent(void) {
// nothing
}
static void mi__malloc_fork_child(void) {
// nothing
}
static void mi_malloc_printf(const char* fmt, ...) {
UNUSED(fmt);
}
static bool zone_check(malloc_zone_t* zone) {
UNUSED(zone);
return true;
}
static malloc_zone_t* zone_from_ptr(const void* p) {
UNUSED(p);
return mi_get_default_zone();
}
static void zone_log(malloc_zone_t* zone, void* p) {
UNUSED(zone); UNUSED(p);
}
static void zone_print(malloc_zone_t* zone, bool b) {
UNUSED(zone); UNUSED(b);
}
static void zone_print_ptr_info(void* p) {
UNUSED(p);
}
static void zone_register(malloc_zone_t* zone) {
UNUSED(zone);
}
static void zone_unregister(malloc_zone_t* zone) {
UNUSED(zone);
}
// use interposing so `DYLD_INSERT_LIBRARIES` works without `DYLD_FORCE_FLAT_NAMESPACE=1`
// See: <https://books.google.com/books?id=K8vUkpOXhN4C&pg=PA73>
struct mi_interpose_s {
const void* replacement;
const void* target;
};
#define MI_INTERPOSE_FUN(oldfun,newfun) { (const void*)&newfun, (const void*)&oldfun }
#define MI_INTERPOSE_MI(fun) MI_INTERPOSE_FUN(fun,mi_##fun)
#define MI_INTERPOSE_ZONE(fun) MI_INTERPOSE_FUN(malloc_##fun,fun)
__attribute__((used)) static const struct mi_interpose_s _mi_zone_interposes[] __attribute__((section("__DATA, __interpose"))) =
{
MI_INTERPOSE_MI(malloc_create_zone),
MI_INTERPOSE_MI(malloc_default_purgeable_zone),
MI_INTERPOSE_MI(malloc_default_zone),
MI_INTERPOSE_MI(malloc_destroy_zone),
MI_INTERPOSE_MI(malloc_get_all_zones),
MI_INTERPOSE_MI(malloc_get_zone_name),
MI_INTERPOSE_MI(malloc_jumpstart),
MI_INTERPOSE_MI(malloc_printf),
MI_INTERPOSE_MI(malloc_set_zone_name),
MI_INTERPOSE_MI(_malloc_fork_child),
MI_INTERPOSE_MI(_malloc_fork_parent),
MI_INTERPOSE_MI(_malloc_fork_prepare),
MI_INTERPOSE_ZONE(zone_batch_free),
MI_INTERPOSE_ZONE(zone_batch_malloc),
MI_INTERPOSE_ZONE(zone_calloc),
MI_INTERPOSE_ZONE(zone_check),
MI_INTERPOSE_ZONE(zone_free),
MI_INTERPOSE_ZONE(zone_from_ptr),
MI_INTERPOSE_ZONE(zone_log),
MI_INTERPOSE_ZONE(zone_malloc),
MI_INTERPOSE_ZONE(zone_memalign),
MI_INTERPOSE_ZONE(zone_print),
MI_INTERPOSE_ZONE(zone_print_ptr_info),
MI_INTERPOSE_ZONE(zone_realloc),
MI_INTERPOSE_ZONE(zone_register),
MI_INTERPOSE_ZONE(zone_unregister),
MI_INTERPOSE_ZONE(zone_valloc)
};
#else
// ------------------------------------------------------
// hook into the zone api's without interposing
// ------------------------------------------------------
static inline malloc_zone_t* mi_get_default_zone(void)
{
// The first returned zone is the real default
malloc_zone_t** zones = NULL;
unsigned count = 0;
kern_return_t ret = malloc_get_all_zones(0, NULL, (vm_address_t**)&zones, &count);
if (ret == KERN_SUCCESS && count > 0) {
return zones[0];
}
else {
// fallback
return malloc_default_zone();
}
}
#if defined(__clang__)
__attribute__((constructor(0)))
@ -287,5 +442,6 @@ static void _mi_macos_override_malloc() {
}
}
#endif // MI_OSX_INTERPOSE
#endif // MI_MALLOC_OVERRIDE

View File

@ -13,7 +13,7 @@ terms of the MIT license. A copy of the license can be found in the file
#error "It is only possible to override "malloc" on Windows when building as a DLL (and linking the C runtime as a DLL)"
#endif
#if defined(MI_MALLOC_OVERRIDE) && !(defined(_WIN32)) // || (defined(__APPLE__) && !defined(MI_OSX_INTERPOSE)))
#if defined(MI_MALLOC_OVERRIDE) && !(defined(_WIN32))
// ------------------------------------------------------
// Override system malloc
@ -41,7 +41,12 @@ terms of the MIT license. A copy of the license can be found in the file
#define MI_FORWARD02(fun,x,y) { fun(x,y); }
#endif
#if defined(__APPLE__) && defined(MI_SHARED_LIB_EXPORT) && defined(MI_OSX_INTERPOSE)
#if defined(__APPLE__) && defined(MI_SHARED_LIB_EXPORT) && defined(MI_OSX_INTERPOSE)
#include <malloc/malloc.h>
mi_decl_externc void vfree(void* p);
mi_decl_externc size_t malloc_size(const void* p);
mi_decl_externc size_t malloc_good_size(size_t size);
// use interposing so `DYLD_INSERT_LIBRARIES` works without `DYLD_FORCE_FLAT_NAMESPACE=1`
// See: <https://books.google.com/books?id=K8vUkpOXhN4C&pg=PA73>
struct mi_interpose_s {
@ -50,6 +55,7 @@ terms of the MIT license. A copy of the license can be found in the file
};
#define MI_INTERPOSE_FUN(oldfun,newfun) { (const void*)&newfun, (const void*)&oldfun }
#define MI_INTERPOSE_MI(fun) MI_INTERPOSE_FUN(fun,mi_##fun)
__attribute__((used)) static struct mi_interpose_s _mi_interposes[] __attribute__((section("__DATA, __interpose"))) =
{
MI_INTERPOSE_MI(malloc),
@ -61,21 +67,24 @@ terms of the MIT license. A copy of the license can be found in the file
MI_INTERPOSE_MI(posix_memalign),
MI_INTERPOSE_MI(reallocf),
MI_INTERPOSE_MI(valloc),
MI_INTERPOSE_MI(malloc_size),
MI_INTERPOSE_MI(malloc_good_size),
MI_INTERPOSE_MI(aligned_alloc),
#ifndef MI_OSX_ZONE
// some code allocates from default zone but deallocates using plain free :-( (like NxHashResizeToCapacity <https://github.com/nneonneo/osx-10.9-opensource/blob/master/objc4-551.1/runtime/hashtable2.mm>)
// sometimes code allocates from default zone but deallocates using plain free :-( (like NxHashResizeToCapacity <https://github.com/nneonneo/osx-10.9-opensource/blob/master/objc4-551.1/runtime/hashtable2.mm>)
MI_INTERPOSE_FUN(free,mi_cfree), // use safe free that checks if pointers are from us
MI_INTERPOSE_FUN(vfree,mi_cfree),
#else
// We interpose malloc_default_zone in alloc-override-osx.c
// we interpose malloc_default_zone in alloc-override-osx.c so we can use mi_free safely
MI_INTERPOSE_MI(free),
MI_INTERPOSE_FUN(vfree,mi_free),
#endif
// some code allocates from a zone but deallocates using plain free :-( (like NxHashResizeToCapacity <https://github.com/nneonneo/osx-10.9-opensource/blob/master/objc4-551.1/runtime/hashtable2.mm>)
MI_INTERPOSE_FUN(free,mi_cfree), // use safe free that checks if pointers are from us
};
#elif defined(_MSC_VER)
// cannot override malloc unless using a dll.
// we just override new/delete which does work in a static library.
#else
// On all other systems forward to our API
// On all other systems forward to our API
void* malloc(size_t size) MI_FORWARD1(mi_malloc, size)
void* calloc(size_t size, size_t n) MI_FORWARD2(mi_calloc, size, n)
void* realloc(void* p, size_t newsize) MI_FORWARD2(mi_realloc, p, newsize)
@ -123,7 +132,7 @@ terms of the MIT license. A copy of the license can be found in the file
void* operator new[](std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept { return mi_new_aligned_nothrow(n, static_cast<size_t>(al)); }
#endif
#elif (defined(__GNUC__) || defined(__clang__))
#elif (defined(__GNUC__) || defined(__clang__)) && !defined(MI_OSX_ZONE)
// ------------------------------------------------------
// Override by defining the mangled C++ names of the operators (as
// used by GCC and CLang).