From dccffea66286dfb16e642aef3fea7babee7038e3 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Thu, 14 Nov 2019 11:01:05 -0800 Subject: [PATCH 1/5] fix pr #173 by @zerodefect to use case-insensitive matching of the build type; also use MI_DEBUG_FULL option (instead of MI_CHECK_FULL) --- CMakeLists.txt | 74 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 44 insertions(+), 30 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7b455881..aa9c126f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,15 +6,14 @@ set(CMAKE_CXX_STANDARD 17) option(MI_OVERRIDE "Override the standard malloc interface" ON) option(MI_INTERPOSE "Use interpose to override standard malloc on macOS" ON) -option(MI_SEE_ASM "Generate assembly files" OFF) -option(MI_CHECK_FULL "Use full internal invariant checking in DEBUG mode" OFF) -option(MI_USE_CXX "Use the C++ compiler to compile the library" OFF) -option(MI_SECURE "Use security mitigations (like guard pages and randomization)" OFF) +option(MI_DEBUG_FULL "Use full internal heap invariant checking in DEBUG mode" OFF) +option(MI_SECURE "Use security mitigations (like guard pages, allocation randomization, and free-list corruption detection)" OFF) option(MI_SECURE_FULL "Use full security mitigations, may be more expensive (includes double-free mitigation)" OFF) +option(MI_USE_CXX "Use the C++ compiler to compile the library" OFF) +option(MI_SEE_ASM "Generate assembly files" OFF) option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanism (Unix)" OFF) option(MI_BUILD_TESTS "Build test executables" ON) - -set(mi_install_dir "${CMAKE_INSTALL_PREFIX}/lib/mimalloc-${mi_version}") +option(MI_CHECK_FULL "Use full internal invariant checking in DEBUG mode (deprecated, use MI_DEBUG_FULL instead)" OFF) set(mi_sources src/stats.c @@ -29,29 +28,33 @@ set(mi_sources src/options.c src/init.c) -# Set default build type +# ----------------------------------------------------------------------------- +# Converience: set default build type depending on the build directory +# ----------------------------------------------------------------------------- + if (NOT CMAKE_BUILD_TYPE) - if ("${CMAKE_BINARY_DIR}" MATCHES ".*(D|d)ebug$") - message(STATUS "No build type selected, default to *** Debug ***") + if ("${CMAKE_BINARY_DIR}" MATCHES ".*(D|d)ebug$" OR MI_DEBUG_FULL MATCHES "ON") + message(STATUS "No build type selected, default to: Debug") set(CMAKE_BUILD_TYPE "Debug") else() - message(STATUS "No build type selected, default to *** Release ***") + message(STATUS "No build type selected, default to: Release") set(CMAKE_BUILD_TYPE "Release") endif() -else() - message(STATUS "Build type specified as *** ${CMAKE_BUILD_TYPE} ***") endif() if("${CMAKE_BINARY_DIR}" MATCHES ".*(S|s)ecure$") + message(STATUS "Default to secure build") set(MI_SECURE "ON") endif() +# ----------------------------------------------------------------------------- +# Process options +# ----------------------------------------------------------------------------- + if(CMAKE_C_COMPILER_ID MATCHES "MSVC") set(MI_USE_CXX "ON") endif() - -# Options if(MI_OVERRIDE MATCHES "ON") message(STATUS "Override standard malloc (MI_OVERRIDE=ON)") if(APPLE) @@ -84,7 +87,12 @@ if(MI_SEE_ASM MATCHES "ON") endif() if(MI_CHECK_FULL MATCHES "ON") - message(STATUS "Set debug level to full invariant checking (MI_CHECK_FULL=ON)") + message(STATUS "The MI_CHECK_FULL option is deprecated, use MI_DEBUG_FULL instead") + set(MI_DEBUG_FULL "ON") +endif() + +if(MI_DEBUG_FULL MATCHES "ON") + message(STATUS "Set debug level to full invariant checking (MI_DEBUG_FULL=ON)") list(APPEND mi_defines MI_DEBUG=3) # full invariant checking endif() @@ -109,19 +117,6 @@ if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU") endif() endif() -if(NOT(CMAKE_BUILD_TYPE MATCHES "Release|release|RelWithDebInfo|relwithdebinfo")) - string(TOLOWER "${CMAKE_BUILD_TYPE}" build_type) - set(mi_basename "mimalloc-${build_type}") -else() - if(MI_SECURE MATCHES "ON") - set(mi_basename "mimalloc-secure") - else() - set(mi_basename "mimalloc") - endif() -endif() -message(STATUS "Output library name : ${mi_basename}") -message(STATUS "Installation directory: ${mi_install_dir}") - # extra needed libraries if(WIN32) list(APPEND mi_libraries psapi shell32 user32) @@ -134,9 +129,28 @@ else() endif() # ----------------------------------------------------------------------------- -# Main targets +# Install and output names # ----------------------------------------------------------------------------- +set(mi_install_dir "${CMAKE_INSTALL_PREFIX}/lib/mimalloc-${mi_version}") +if(MI_SECURE MATCHES "ON") + set(mi_basename "mimalloc-secure") +else() + set(mi_basename "mimalloc") +endif() +string(TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_LC) +if(NOT(CMAKE_BUILD_TYPE_LC MATCHES "^(release|relwithdebinfo|minsizerel)$")) + set(mi_basename "${mi_basename}-${CMAKE_BUILD_TYPE_LC}") #append build type (e.g. -debug) if not a release version +endif() +message(STATUS "") +message(STATUS "Library base name: ${mi_basename}") +message(STATUS "Build type : ${CMAKE_BUILD_TYPE_LC}") +message(STATUS "Install directory: ${mi_install_dir}") +message(STATUS "") + +# ----------------------------------------------------------------------------- +# Main targets +# ----------------------------------------------------------------------------- # shared library add_library(mimalloc SHARED ${mi_sources}) @@ -238,7 +252,7 @@ endif() if (MI_OVERRIDE MATCHES "ON") target_compile_definitions(mimalloc PRIVATE MI_MALLOC_OVERRIDE) if(NOT WIN32) - # It is only possible to override malloc on Windows when building as a DLL. (src/alloc-override.c) + # It is only possible to override malloc on Windows when building as a DLL. target_compile_definitions(mimalloc-static PRIVATE MI_MALLOC_OVERRIDE) target_compile_definitions(mimalloc-obj PRIVATE MI_MALLOC_OVERRIDE) endif() From 8637f113d5ed817fa93e584d716d2b5c91ca723f Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Fri, 15 Nov 2019 14:09:17 -0800 Subject: [PATCH 2/5] improve test-stress to run multiple iterations --- test/test-stress.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/test/test-stress.c b/test/test-stress.c index bb428072..4b6ec22d 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -18,7 +18,8 @@ terms of the MIT license. // argument defaults static int THREADS = 32; // more repeatable if THREADS <= #processors -static int N = 20; // scaling factor +static int N = 20; // scaling factor +static int ITER = 10; // N full iterations re-creating all threads // static int THREADS = 8; // more repeatable if THREADS <= #processors // static int N = 100; // scaling factor @@ -159,14 +160,17 @@ int main(int argc, char** argv) { //bench_start_program(); mi_stats_reset(); - memset((void*)transfer, 0, TRANSFERS*sizeof(void*)); - run_os_threads(THREADS); - for (int i = 0; i < TRANSFERS; i++) { - free_items((void*)transfer[i]); + for (int i = 0; i < ITER; i++) { + memset((void*)transfer, 0, TRANSFERS * sizeof(void*)); + run_os_threads(THREADS); + for (int i = 0; i < TRANSFERS; i++) { + free_items((void*)transfer[i]); + } } - #ifndef NDEBUG +#ifndef NDEBUG mi_collect(false); - #endif +#endif + mi_stats_print(NULL); //bench_end_program(); return 0; From fd3ce5dc7d22bf4155588ac2755a98e4a405303f Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 15 Nov 2019 16:28:11 -0800 Subject: [PATCH 3/5] improve stress test --- ide/vs2019/mimalloc-test-stress.vcxproj | 4 +- test/test-stress.c | 86 ++++++++++++++++--------- 2 files changed, 59 insertions(+), 31 deletions(-) diff --git a/ide/vs2019/mimalloc-test-stress.vcxproj b/ide/vs2019/mimalloc-test-stress.vcxproj index afbb6666..ef7ab357 100644 --- a/ide/vs2019/mimalloc-test-stress.vcxproj +++ b/ide/vs2019/mimalloc-test-stress.vcxproj @@ -149,8 +149,8 @@ - - {abb5eae7-b3e6-432e-b636-333449892ea7} + + {abb5eae7-b3e6-432e-b636-333449892ea6} diff --git a/test/test-stress.c b/test/test-stress.c index 4b6ec22d..b6ceaa0a 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -6,7 +6,8 @@ terms of the MIT license. /* This is a stress test for the allocator, using multiple threads and transferring objects between threads. This is not a typical workload - but uses a random linear size distribution. Do not use this test as a benchmark! + but uses a random linear size distribution. Timing can also depend on + (random) thread scheduling. Do not use this test as a benchmark! */ #include @@ -18,16 +19,31 @@ terms of the MIT license. // argument defaults static int THREADS = 32; // more repeatable if THREADS <= #processors -static int N = 20; // scaling factor -static int ITER = 10; // N full iterations re-creating all threads +static int SCALE = 12; // scaling factor +static int ITER = 50; // N full iterations re-creating all threads // static int THREADS = 8; // more repeatable if THREADS <= #processors -// static int N = 100; // scaling factor +// static int SCALE = 100; // scaling factor +static bool allow_large_objects = true; // allow very large objects? +static size_t use_one_size = 0; // use single object size of N uintptr_t? + + +#ifdef USE_STD_MALLOC +#define custom_malloc(s) malloc(s) +#define custom_realloc(p,s) realloc(p,s) +#define custom_free(p) free(p) +#else +#define custom_malloc(s) mi_malloc(s) +#define custom_realloc(p,s) mi_realloc(p,s) +#define custom_free(p) mi_free(p) +#endif + +// transfer pointer between threads #define TRANSFERS (1000) - static volatile void* transfer[TRANSFERS]; + #if (UINTPTR_MAX != UINT32_MAX) const uintptr_t cookie = 0xbf58476d1ce4e5b9UL; #else @@ -64,10 +80,17 @@ static bool chance(size_t perc, random_t r) { } static void* alloc_items(size_t items, random_t r) { - if (chance(1, r)) items *= 100; // 1% huge objects; + if (chance(1, r)) { + if (chance(1, r) && allow_large_objects) items *= 1000; // 0.01% giant + else if (chance(10, r) && allow_large_objects) items *= 100; // 0.1% huge + else items *= 10; // 1% large objects; + } if (items==40) items++; // pthreads uses that size for stack increases - uintptr_t* p = (uintptr_t*)mi_malloc(items*sizeof(uintptr_t)); - for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie; + if (use_one_size>0) items = (use_one_size/sizeof(uintptr_t)); + uintptr_t* p = (uintptr_t*)custom_malloc(items*sizeof(uintptr_t)); + if (p != NULL) { + for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie; + } return p; } @@ -82,7 +105,7 @@ static void free_items(void* p) { } } } - mi_free(p); + custom_free(p); } @@ -91,12 +114,12 @@ static void stress(intptr_t tid) { uintptr_t r = tid ^ 42; const size_t max_item = 128; // in words const size_t max_item_retained = 10*max_item; - size_t allocs = 25*N*(tid%8 + 1); // some threads do more + size_t allocs = 25*SCALE*(tid%8 + 1); // some threads do more size_t retain = allocs/2; void** data = NULL; size_t data_size = 0; size_t data_top = 0; - void** retained = (void**)mi_malloc(retain*sizeof(void*)); + void** retained = (void**)custom_malloc(retain*sizeof(void*)); size_t retain_top = 0; while (allocs>0 || retain>0) { @@ -105,7 +128,7 @@ static void stress(intptr_t tid) { allocs--; if (data_top >= data_size) { data_size += 100000; - data = (void**)mi_realloc(data, data_size*sizeof(void*)); + data = (void**)custom_realloc(data, data_size*sizeof(void*)); } data[data_top++] = alloc_items((pick(&r) % max_item) + 1, &r); } @@ -121,7 +144,7 @@ static void stress(intptr_t tid) { data[idx] = NULL; } if (chance(25, &r) && data_top > 0) { - // 25% transfer-swap + // 25% exchange a local pointer with the (shared) transfer buffer. size_t data_idx = pick(&r) % data_top; size_t transfer_idx = pick(&r) % TRANSFERS; void* p = data[data_idx]; @@ -136,8 +159,8 @@ static void stress(intptr_t tid) { for (size_t i = 0; i < data_top; i++) { free_items(data[i]); } - mi_free(retained); - mi_free(data); + custom_free(retained); + custom_free(data); //bench_end_thread(); } @@ -152,25 +175,29 @@ int main(int argc, char** argv) { if (argc>=3) { char* end; long n = (strtol(argv[2], &end, 10)); - if (n > 0) N = n; + if (n > 0) SCALE = n; } - printf("start with %i threads with a %i%% load-per-thread\n", THREADS, N); + printf("start with %i threads with a %i%% load-per-thread\n", THREADS, SCALE); //int res = mi_reserve_huge_os_pages(4,1); //printf("(reserve huge: %i\n)", res); - //bench_start_program(); + //bench_start_program(); + + // Run ITER full iterations where half the objects in the transfer buffer survive to the next round. mi_stats_reset(); - for (int i = 0; i < ITER; i++) { - memset((void*)transfer, 0, TRANSFERS * sizeof(void*)); + uintptr_t r = 43; + for (int n = 0; n < ITER; n++) { run_os_threads(THREADS); for (int i = 0; i < TRANSFERS; i++) { - free_items((void*)transfer[i]); + if (chance(50, &r) || n+1 == ITER) { // free all on last run, otherwise free half of the transfers + void* p = atomic_exchange_ptr(&transfer[i], NULL); + free_items(p); + } } } -#ifndef NDEBUG - mi_collect(false); -#endif + mi_collect(false); + mi_collect(true); mi_stats_print(NULL); //bench_end_program(); return 0; @@ -187,8 +214,8 @@ static DWORD WINAPI thread_entry(LPVOID param) { } static void run_os_threads(size_t nthreads) { - DWORD* tids = (DWORD*)malloc(nthreads * sizeof(DWORD)); - HANDLE* thandles = (HANDLE*)malloc(nthreads * sizeof(HANDLE)); + DWORD* tids = (DWORD*)custom_malloc(nthreads * sizeof(DWORD)); + HANDLE* thandles = (HANDLE*)custom_malloc(nthreads * sizeof(HANDLE)); for (uintptr_t i = 0; i < nthreads; i++) { thandles[i] = CreateThread(0, 4096, &thread_entry, (void*)(i), 0, &tids[i]); } @@ -198,8 +225,8 @@ static void run_os_threads(size_t nthreads) { for (size_t i = 0; i < nthreads; i++) { CloseHandle(thandles[i]); } - free(tids); - free(thandles); + custom_free(tids); + custom_free(thandles); } static void* atomic_exchange_ptr(volatile void** p, void* newval) { @@ -220,7 +247,7 @@ static void* thread_entry(void* param) { } static void run_os_threads(size_t nthreads) { - pthread_t* threads = (pthread_t*)mi_malloc(nthreads*sizeof(pthread_t)); + pthread_t* threads = (pthread_t*)custom_malloc(nthreads*sizeof(pthread_t)); memset(threads, 0, sizeof(pthread_t)*nthreads); //pthread_setconcurrency(nthreads); for (uintptr_t i = 0; i < nthreads; i++) { @@ -229,6 +256,7 @@ static void run_os_threads(size_t nthreads) { for (size_t i = 0; i < nthreads; i++) { pthread_join(threads[i], NULL); } + custom_free(threads); } static void* atomic_exchange_ptr(volatile void** p, void* newval) { From 94bfb4772575d43bb11247b957ee5c3741a97a1a Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Wed, 20 Nov 2019 12:59:26 -0800 Subject: [PATCH 4/5] update stress test for more realisitic size distribution --- test/test-stress.c | 87 ++++++++++++++++++++++++++-------------------- 1 file changed, 49 insertions(+), 38 deletions(-) diff --git a/test/test-stress.c b/test/test-stress.c index b6ceaa0a..6b2fb8c4 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -7,7 +7,7 @@ terms of the MIT license. /* This is a stress test for the allocator, using multiple threads and transferring objects between threads. This is not a typical workload but uses a random linear size distribution. Timing can also depend on - (random) thread scheduling. Do not use this test as a benchmark! + (random) thread scheduling. Do not use this test as a benchmark! */ #include @@ -17,10 +17,12 @@ terms of the MIT license. #include #include +// > mimalloc-test-stress [THREADS] [SCALE] [ITER] +// // argument defaults -static int THREADS = 32; // more repeatable if THREADS <= #processors -static int SCALE = 12; // scaling factor -static int ITER = 50; // N full iterations re-creating all threads +static int THREADS = 32; // more repeatable if THREADS <= #processors +static int SCALE = 50; // scaling factor +static int ITER = 10; // N full iterations re-creating all threads // static int THREADS = 8; // more repeatable if THREADS <= #processors // static int SCALE = 100; // scaling factor @@ -56,21 +58,21 @@ typedef uintptr_t* random_t; static uintptr_t pick(random_t r) { uintptr_t x = *r; - #if (UINTPTR_MAX > UINT32_MAX) - // by Sebastiano Vigna, see: +#if (UINTPTR_MAX > UINT32_MAX) + // by Sebastiano Vigna, see: x ^= x >> 30; x *= 0xbf58476d1ce4e5b9UL; x ^= x >> 27; x *= 0x94d049bb133111ebUL; x ^= x >> 31; - #else - // by Chris Wellons, see: +#else + // by Chris Wellons, see: x ^= x >> 16; x *= 0x7feb352dUL; x ^= x >> 15; x *= 0x846ca68bUL; x ^= x >> 16; - #endif +#endif *r = x; return x; } @@ -81,13 +83,13 @@ static bool chance(size_t perc, random_t r) { static void* alloc_items(size_t items, random_t r) { if (chance(1, r)) { - if (chance(1, r) && allow_large_objects) items *= 1000; // 0.01% giant - else if (chance(10, r) && allow_large_objects) items *= 100; // 0.1% huge - else items *= 10; // 1% large objects; + if (chance(1, r) && allow_large_objects) items *= 10000; // 0.01% giant + else if (chance(10, r) && allow_large_objects) items *= 1000; // 0.1% huge + else items *= 100; // 1% large objects; } - if (items==40) items++; // pthreads uses that size for stack increases - if (use_one_size>0) items = (use_one_size/sizeof(uintptr_t)); - uintptr_t* p = (uintptr_t*)custom_malloc(items*sizeof(uintptr_t)); + if (items == 40) items++; // pthreads uses that size for stack increases + if (use_one_size > 0) items = (use_one_size / sizeof(uintptr_t)); + uintptr_t* p = (uintptr_t*)custom_malloc(items * sizeof(uintptr_t)); if (p != NULL) { for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie; } @@ -99,7 +101,7 @@ static void free_items(void* p) { uintptr_t* q = (uintptr_t*)p; uintptr_t items = (q[0] ^ cookie); for (uintptr_t i = 0; i < items; i++) { - if ((q[i]^cookie) != items - i) { + if ((q[i] ^ cookie) != items - i) { fprintf(stderr, "memory corruption at block %p at %zu\n", p, i); abort(); } @@ -111,30 +113,30 @@ static void free_items(void* p) { static void stress(intptr_t tid) { //bench_start_thread(); - uintptr_t r = tid ^ 42; - const size_t max_item = 128; // in words - const size_t max_item_retained = 10*max_item; - size_t allocs = 25*SCALE*(tid%8 + 1); // some threads do more - size_t retain = allocs/2; + uintptr_t r = tid * 43; + const size_t max_item_shift = 5; // 128 + const size_t max_item_retained_shift = max_item_shift + 2; + size_t allocs = 100 * ((size_t)SCALE) * (tid % 8 + 1); // some threads do more + size_t retain = allocs / 2; void** data = NULL; size_t data_size = 0; size_t data_top = 0; - void** retained = (void**)custom_malloc(retain*sizeof(void*)); + void** retained = (void**)custom_malloc(retain * sizeof(void*)); size_t retain_top = 0; - while (allocs>0 || retain>0) { + while (allocs > 0 || retain > 0) { if (retain == 0 || (chance(50, &r) && allocs > 0)) { // 50%+ alloc allocs--; if (data_top >= data_size) { data_size += 100000; - data = (void**)custom_realloc(data, data_size*sizeof(void*)); + data = (void**)custom_realloc(data, data_size * sizeof(void*)); } - data[data_top++] = alloc_items((pick(&r) % max_item) + 1, &r); + data[data_top++] = alloc_items( 1ULL << (pick(&r) % max_item_shift), &r); } else { // 25% retain - retained[retain_top++] = alloc_items(10*((pick(&r) % max_item_retained) + 1), &r); + retained[retain_top++] = alloc_items( 1ULL << (pick(&r) % max_item_retained_shift), &r); retain--; } if (chance(66, &r) && data_top > 0) { @@ -167,36 +169,45 @@ static void stress(intptr_t tid) { static void run_os_threads(size_t nthreads); int main(int argc, char** argv) { - if (argc>=2) { + // > mimalloc-test-stress [THREADS] [SCALE] [ITER] + if (argc >= 2) { char* end; long n = strtol(argv[1], &end, 10); if (n > 0) THREADS = n; } - if (argc>=3) { + if (argc >= 3) { char* end; long n = (strtol(argv[2], &end, 10)); if (n > 0) SCALE = n; } - printf("start with %i threads with a %i%% load-per-thread\n", THREADS, SCALE); + if (argc >= 4) { + char* end; + long n = (strtol(argv[3], &end, 10)); + if (n > 0) ITER = n; + } + printf("start with %d threads with a %d%% load-per-thread and %d iterations\n", THREADS, SCALE, ITER); //int res = mi_reserve_huge_os_pages(4,1); //printf("(reserve huge: %i\n)", res); - //bench_start_program(); + //bench_start_program(); // Run ITER full iterations where half the objects in the transfer buffer survive to the next round. mi_stats_reset(); - uintptr_t r = 43; + uintptr_t r = 43 * 43; for (int n = 0; n < ITER; n++) { run_os_threads(THREADS); for (int i = 0; i < TRANSFERS; i++) { - if (chance(50, &r) || n+1 == ITER) { // free all on last run, otherwise free half of the transfers + if (chance(50, &r) || n + 1 == ITER) { // free all on last run, otherwise free half of the transfers void* p = atomic_exchange_ptr(&transfer[i], NULL); free_items(p); } } + mi_collect(false); +#ifndef NDEBUG + if ((n + 1) % 10 == 0) { printf("- iterations: %3d\n", n + 1); } +#endif } - mi_collect(false); mi_collect(true); mi_stats_print(NULL); //bench_end_program(); @@ -230,11 +241,11 @@ static void run_os_threads(size_t nthreads) { } static void* atomic_exchange_ptr(volatile void** p, void* newval) { - #if (INTPTR_MAX == UINT32_MAX) +#if (INTPTR_MAX == UINT32_MAX) return (void*)InterlockedExchange((volatile LONG*)p, (LONG)newval); - #else +#else return (void*)InterlockedExchange64((volatile LONG64*)p, (LONG64)newval); - #endif +#endif } #else @@ -247,8 +258,8 @@ static void* thread_entry(void* param) { } static void run_os_threads(size_t nthreads) { - pthread_t* threads = (pthread_t*)custom_malloc(nthreads*sizeof(pthread_t)); - memset(threads, 0, sizeof(pthread_t)*nthreads); + pthread_t* threads = (pthread_t*)custom_malloc(nthreads * sizeof(pthread_t)); + memset(threads, 0, sizeof(pthread_t) * nthreads); //pthread_setconcurrency(nthreads); for (uintptr_t i = 0; i < nthreads; i++) { pthread_create(&threads[i], NULL, &thread_entry, (void*)i); From 4d4a2885f5ef5d0b3db8de149b472380f495e729 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Wed, 20 Nov 2019 13:19:17 -0800 Subject: [PATCH 5/5] use atomic read/write on the page->heap field where concurrent interaction is possible --- src/alloc.c | 2 +- src/page-queue.c | 6 +++--- src/page.c | 21 ++++++++++++++------- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/alloc.c b/src/alloc.c index d2319f82..c4863115 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -235,7 +235,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc } else { // racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`) - mi_heap_t* heap = page->heap; + mi_heap_t* heap = (mi_heap_t*)mi_atomic_read_ptr(mi_atomic_cast(void*, &page->heap)); mi_assert_internal(heap != NULL); if (heap != NULL) { // add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity) diff --git a/src/page-queue.c b/src/page-queue.c index 4af70b50..95443a69 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -260,7 +260,7 @@ static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) { page->heap->page_count--; page->next = NULL; page->prev = NULL; - page->heap = NULL; + mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL); mi_page_set_in_full(page,false); } @@ -274,7 +274,7 @@ static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_ (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); mi_page_set_in_full(page, mi_page_queue_is_full(queue)); - page->heap = heap; + mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), heap); page->next = queue->first; page->prev = NULL; if (queue->first != NULL) { @@ -338,7 +338,7 @@ size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue // set append pages to new heap and count size_t count = 0; for (mi_page_t* page = append->first; page != NULL; page = page->next) { - page->heap = heap; + mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), heap); count++; } diff --git a/src/page.c b/src/page.c index aaf1cb91..a8115d27 100644 --- a/src/page.c +++ b/src/page.c @@ -343,18 +343,24 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) { mi_assert_internal(pq == mi_page_queue_of(page)); mi_assert_internal(page->heap != NULL); - _mi_page_use_delayed_free(page,MI_NEVER_DELAYED_FREE); +#if MI_DEBUG > 1 + mi_heap_t* pheap = (mi_heap_t*)mi_atomic_read_ptr(mi_atomic_cast(void*, &page->heap)); +#endif + + // remove from our page list + mi_segments_tld_t* segments_tld = &page->heap->tld->segments; + mi_page_queue_remove(pq, page); + + // page is no longer associated with our heap + mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL); + #if MI_DEBUG>1 // check there are no references left.. - for (mi_block_t* block = (mi_block_t*)page->heap->thread_delayed_free; block != NULL; block = mi_block_nextx(page->heap->cookie,block)) { + for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap->cookie, block)) { mi_assert_internal(_mi_ptr_page(block) != page); } #endif - // and then remove from our page list - mi_segments_tld_t* segments_tld = &page->heap->tld->segments; - mi_page_queue_remove(pq, page); - // and abandon it mi_assert_internal(page->heap == NULL); _mi_segment_page_abandon(page,segments_tld); @@ -755,7 +761,8 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) { mi_assert_internal(_mi_page_segment(page)->page_kind==MI_PAGE_HUGE); mi_assert_internal(_mi_page_segment(page)->used==1); mi_assert_internal(_mi_page_segment(page)->thread_id==0); // abandoned, not in the huge queue - page->heap = NULL; + mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL); + if (page->block_size > MI_HUGE_OBJ_SIZE_MAX) { _mi_stat_increase(&heap->tld->stats.giant, block_size); _mi_stat_counter_increase(&heap->tld->stats.giant_count, 1);