diff --git a/.gitattributes b/.gitattributes index acdbdbf4..1534e778 100644 --- a/.gitattributes +++ b/.gitattributes @@ -2,6 +2,7 @@ * text eol=lf *.png binary *.pdn binary +*.jpg binary *.sln binary *.suo binary *.vcproj binary diff --git a/CMakeLists.txt b/CMakeLists.txt index 707cf9b5..bbc293a8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,6 +19,8 @@ option(MI_BUILD_SHARED "Build shared library" ON) option(MI_BUILD_STATIC "Build static library" ON) option(MI_BUILD_OBJECT "Build object library" ON) option(MI_BUILD_TESTS "Build test executables" ON) +option(MI_DEBUG_TSAN "Build with thread sanitizer (needs clang)" OFF) +option(MI_DEBUG_UBSAN "Build with undefined-behavior sanitizer (needs clang++)" OFF) option(MI_CHECK_FULL "Use full internal invariant checking in DEBUG mode (deprecated, use MI_DEBUG_FULL instead)" OFF) option(MI_INSTALL_TOPLEVEL "Install directly into $CMAKE_INSTALL_PREFIX instead of PREFIX/lib/mimalloc-version" OFF) @@ -28,6 +30,7 @@ set(mi_sources src/stats.c src/random.c src/os.c + src/bitmap.c src/arena.c src/region.c src/segment.c @@ -44,7 +47,7 @@ set(mi_sources # ----------------------------------------------------------------------------- if (NOT CMAKE_BUILD_TYPE) - if ("${CMAKE_BINARY_DIR}" MATCHES ".*(D|d)ebug$" OR MI_DEBUG_FULL MATCHES "ON") + if ("${CMAKE_BINARY_DIR}" MATCHES ".*(D|d)ebug$" OR MI_DEBUG_FULL) message(STATUS "No build type selected, default to: Debug") set(CMAKE_BUILD_TYPE "Debug") else() @@ -66,20 +69,20 @@ if(CMAKE_C_COMPILER_ID MATCHES "MSVC|Intel") set(MI_USE_CXX "ON") endif() -if(MI_OVERRIDE MATCHES "ON") +if(MI_OVERRIDE) message(STATUS "Override standard malloc (MI_OVERRIDE=ON)") if(APPLE) - if(MI_OSX_ZONE MATCHES "ON") + if(MI_OSX_ZONE) # use zone's on macOS message(STATUS " Use malloc zone to override malloc (MI_OSX_ZONE=ON)") list(APPEND mi_sources src/alloc-override-osx.c) list(APPEND mi_defines MI_OSX_ZONE=1) - if(NOT MI_INTERPOSE MATCHES "ON") + if(NOT MI_INTERPOSE) message(STATUS " (enabling INTERPOSE as well since zone's require this)") set(MI_INTERPOSE "ON") endif() endif() - if(MI_INTERPOSE MATCHES "ON") + if(MI_INTERPOSE) # use interpose on macOS message(STATUS " Use interpose to override malloc (MI_INTERPOSE=ON)") list(APPEND mi_defines MI_INTERPOSE) @@ -87,42 +90,71 @@ if(MI_OVERRIDE MATCHES "ON") endif() endif() -if(MI_SECURE MATCHES "ON") +if(MI_SECURE) message(STATUS "Set full secure build (MI_SECURE=ON)") list(APPEND mi_defines MI_SECURE=4) endif() -if(MI_SEE_ASM MATCHES "ON") +if(MI_SEE_ASM) message(STATUS "Generate assembly listings (MI_SEE_ASM=ON)") list(APPEND mi_cflags -save-temps) endif() -if(MI_CHECK_FULL MATCHES "ON") +if(MI_CHECK_FULL) message(STATUS "The MI_CHECK_FULL option is deprecated, use MI_DEBUG_FULL instead") set(MI_DEBUG_FULL "ON") endif() -if(MI_DEBUG_FULL MATCHES "ON") +if(MI_DEBUG_FULL) message(STATUS "Set debug level to full internal invariant checking (MI_DEBUG_FULL=ON)") list(APPEND mi_defines MI_DEBUG=3) # full invariant checking endif() -if(MI_PADDING MATCHES "OFF") +if(NOT MI_PADDING) message(STATUS "Disable padding of heap blocks in debug mode (MI_PADDING=OFF)") list(APPEND mi_defines MI_PADDING=0) endif() -if(MI_XMALLOC MATCHES "ON") +if(MI_XMALLOC) message(STATUS "Enable abort() calls on memory allocation failure (MI_XMALLOC=ON)") list(APPEND mi_defines MI_XMALLOC=1) endif() -if(MI_SHOW_ERRORS MATCHES "ON") +if(MI_SHOW_ERRORS) message(STATUS "Enable printing of error and warning messages by default (MI_SHOW_ERRORS=ON)") list(APPEND mi_defines MI_SHOW_ERRORS=1) endif() -if(MI_USE_CXX MATCHES "ON") +if(MI_DEBUG_TSAN) + if(CMAKE_C_COMPILER_ID MATCHES "Clang") + message(STATUS "Build with thread sanitizer (MI_DEBUG_TSAN=ON)") + list(APPEND mi_defines MI_TSAN=1) + list(APPEND mi_cflags -fsanitize=thread -g -O1) + list(APPEND CMAKE_EXE_LINKER_FLAGS -fsanitize=thread) + else() + message(WARNING "Can only use thread sanitizer with clang (MI_DEBUG_TSAN=ON but ignored)") + endif() +endif() + +if(MI_DEBUG_UBSAN) + if(CMAKE_BUILD_TYPE MATCHES "Debug") + if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") + message(STATUS "Build with undefined-behavior sanitizer (MI_DEBUG_UBSAN=ON)") + list(APPEND mi_cflags -fsanitize=undefined -g) + list(APPEND CMAKE_EXE_LINKER_FLAGS -fsanitize=undefined) + if (NOT MI_USE_CXX) + message(STATUS "(switch to use C++ due to MI_DEBUG_UBSAN)") + set(MI_USE_CXX "ON") + endif() + else() + message(WARNING "Can only use undefined-behavior sanitizer with clang++ (MI_DEBUG_UBSAN=ON but ignored)") + endif() + else() + message(WARNING "Can only use thread sanitizer with a debug build (CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE})") + endif() +endif() + +if(MI_USE_CXX) message(STATUS "Use the C++ compiler to compile (MI_USE_CXX=ON)") set_source_files_properties(${mi_sources} PROPERTIES LANGUAGE CXX ) set_source_files_properties(src/static.c test/test-api.c test/test-stress PROPERTIES LANGUAGE CXX ) @@ -146,22 +178,29 @@ if(CMAKE_C_COMPILER_ID MATCHES "Intel") list(APPEND mi_cflags -Wall -fvisibility=hidden) endif() -if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel") - if(MI_LOCAL_DYNAMIC_TLS MATCHES "ON") +if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel" AND NOT CMAKE_SYSTEM_NAME MATCHES "Haiku") + if(MI_LOCAL_DYNAMIC_TLS) list(APPEND mi_cflags -ftls-model=local-dynamic) else() list(APPEND mi_cflags -ftls-model=initial-exec) endif() endif() +if (MSVC AND MSVC_VERSION GREATER_EQUAL 1914) + list(APPEND mi_cflags /Zc:__cplusplus) +endif() + # Architecture flags -if(${CMAKE_HOST_SYSTEM_PROCESSOR} MATCHES "arm") - list(APPEND mi_cflags -march=native) +if(${CMAKE_HOST_SYSTEM_PROCESSOR} MATCHES "arm" AND NOT APPLE) + check_cxx_compiler_flag(-march=native CXX_SUPPORTS_MARCH_NATIVE) + if (CXX_SUPPORTS_MARCH_NATIVE) + list(APPEND mi_cflags -march=native) + endif() endif() # extra needed libraries if(WIN32) - list(APPEND mi_libraries psapi shell32 user32 bcrypt) + list(APPEND mi_libraries psapi shell32 user32 advapi32 bcrypt) else() if(NOT ${CMAKE_C_COMPILER} MATCHES "android") list(APPEND mi_libraries pthread) @@ -176,16 +215,18 @@ endif() # Install and output names # ----------------------------------------------------------------------------- -if (MI_INSTALL_TOPLEVEL MATCHES "ON") +if (MI_INSTALL_TOPLEVEL) set(mi_install_dir "${CMAKE_INSTALL_PREFIX}") else() set(mi_install_dir "${CMAKE_INSTALL_PREFIX}/lib/mimalloc-${mi_version}") endif() -if(MI_SECURE MATCHES "ON") + +if(MI_SECURE MATCHES) set(mi_basename "mimalloc-secure") else() set(mi_basename "mimalloc") endif() + string(TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_LC) if(NOT(CMAKE_BUILD_TYPE_LC MATCHES "^(release|relwithdebinfo|minsizerel)$")) set(mi_basename "${mi_basename}-${CMAKE_BUILD_TYPE_LC}") #append build type (e.g. -debug) if not a release version @@ -202,9 +243,15 @@ endif() if(MI_BUILD_TESTS) list(APPEND mi_build_targets "tests") endif() + message(STATUS "") message(STATUS "Library base name: ${mi_basename}") message(STATUS "Build type : ${CMAKE_BUILD_TYPE_LC}") +if(MI_USE_CXX) + message(STATUS "Compiler : ${CMAKE_CXX_COMPILER}") +else() + message(STATUS "Compiler : ${CMAKE_C_COMPILER}") +endif() message(STATUS "Install directory: ${mi_install_dir}") message(STATUS "Build targets : ${mi_build_targets}") message(STATUS "") @@ -306,7 +353,7 @@ endif() # API surface testing # ----------------------------------------------------------------------------- -if (MI_BUILD_TESTS MATCHES "ON") +if (MI_BUILD_TESTS) add_executable(mimalloc-test-api test/test-api.c) target_compile_definitions(mimalloc-test-api PRIVATE ${mi_defines}) target_compile_options(mimalloc-test-api PRIVATE ${mi_cflags}) @@ -327,7 +374,7 @@ endif() # ----------------------------------------------------------------------------- # Set override properties # ----------------------------------------------------------------------------- -if (MI_OVERRIDE MATCHES "ON") +if (MI_OVERRIDE) if (MI_BUILD_SHARED) target_compile_definitions(mimalloc PRIVATE MI_MALLOC_OVERRIDE) endif() diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 954ec15d..85e89420 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -7,12 +7,12 @@ trigger: - master - dev -jobs: +jobs: - job: displayName: Windows pool: vmImage: - windows-2019 + windows-2019 strategy: matrix: Debug: @@ -36,18 +36,20 @@ jobs: inputs: solution: $(BuildType)/libmimalloc.sln configuration: '$(MSBuildConfiguration)' - - script: | - cd $(BuildType) - ctest + msbuildArguments: -m + - script: ctest --verbose --timeout 120 + workingDirectory: $(BuildType) displayName: CTest - - upload: $(Build.SourcesDirectory)/$(BuildType) - artifact: mimalloc-windows-$(BuildType) + #- script: $(BuildType)\$(BuildType)\mimalloc-test-stress + # displayName: TestStress + #- upload: $(Build.SourcesDirectory)/$(BuildType) + # artifact: mimalloc-windows-$(BuildType) - job: displayName: Linux pool: vmImage: - ubuntu-16.04 + ubuntu-18.04 strategy: matrix: Debug: @@ -97,10 +99,11 @@ jobs: cmakeArgs: .. $(cmakeExtraArgs) - script: make -j$(nproc) -C $(BuildType) displayName: Make - - script: make test -C $(BuildType) + - script: ctest --verbose --timeout 120 + workingDirectory: $(BuildType) displayName: CTest - - upload: $(Build.SourcesDirectory)/$(BuildType) - artifact: mimalloc-ubuntu-$(BuildType) +# - upload: $(Build.SourcesDirectory)/$(BuildType) +# artifact: mimalloc-ubuntu-$(BuildType) - job: displayName: macOS @@ -125,7 +128,41 @@ jobs: cmakeArgs: .. $(cmakeExtraArgs) - script: make -j$(sysctl -n hw.ncpu) -C $(BuildType) displayName: Make - - script: make test -C $(BuildType) + - script: ctest --verbose --timeout 120 + workingDirectory: $(BuildType) displayName: CTest - - upload: $(Build.SourcesDirectory)/$(BuildType) - artifact: mimalloc-macos-$(BuildType) +# - upload: $(Build.SourcesDirectory)/$(BuildType) +# artifact: mimalloc-macos-$(BuildType) + +# - job: +# displayName: Windows-2017 +# pool: +# vmImage: +# vs2017-win2016 +# strategy: +# matrix: +# Debug: +# BuildType: debug +# cmakeExtraArgs: -A x64 -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON +# MSBuildConfiguration: Debug +# Release: +# BuildType: release +# cmakeExtraArgs: -A x64 -DCMAKE_BUILD_TYPE=Release +# MSBuildConfiguration: Release +# Secure: +# BuildType: secure +# cmakeExtraArgs: -A x64 -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON +# MSBuildConfiguration: Release +# steps: +# - task: CMake@1 +# inputs: +# workingDirectory: $(BuildType) +# cmakeArgs: .. $(cmakeExtraArgs) +# - task: MSBuild@1 +# inputs: +# solution: $(BuildType)/libmimalloc.sln +# configuration: '$(MSBuildConfiguration)' +# - script: | +# cd $(BuildType) +# ctest --verbose --timeout 120 +# displayName: CTest diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake index 6454d91f..edffeea1 100644 --- a/cmake/mimalloc-config-version.cmake +++ b/cmake/mimalloc-config-version.cmake @@ -3,14 +3,16 @@ set(mi_version_minor 6) set(mi_version ${mi_version_major}.${mi_version_minor}) set(PACKAGE_VERSION ${mi_version}) -if("${PACKAGE_FIND_VERSION_MAJOR}" EQUAL "${mi_version_major}") - if ("${PACKAGE_FIND_VERSION_MINOR}" EQUAL "${mi_version_minor}") - set(PACKAGE_VERSION_EXACT TRUE) - elseif("${PACKAGE_FIND_VERSION_MINOR}" LESS "${mi_version_minor}") - set(PACKAGE_VERSION_COMPATIBLE TRUE) +if(PACKAGE_FIND_VERSION_MAJOR) + if("${PACKAGE_FIND_VERSION_MAJOR}" EQUAL "${mi_version_major}") + if ("${PACKAGE_FIND_VERSION_MINOR}" EQUAL "${mi_version_minor}") + set(PACKAGE_VERSION_EXACT TRUE) + elseif("${PACKAGE_FIND_VERSION_MINOR}" LESS "${mi_version_minor}") + set(PACKAGE_VERSION_COMPATIBLE TRUE) + else() + set(PACKAGE_VERSION_UNSUITABLE TRUE) + endif() else() set(PACKAGE_VERSION_UNSUITABLE TRUE) endif() -else() - set(PACKAGE_VERSION_UNSUITABLE TRUE) endif() diff --git a/doc/bench-c5-18xlarge-2020-01-20-a.svg b/doc/bench-c5-18xlarge-2020-01-20-a.svg index 0e550935..90050974 100644 --- a/doc/bench-c5-18xlarge-2020-01-20-a.svg +++ b/doc/bench-c5-18xlarge-2020-01-20-a.svg @@ -1,6 +1,7 @@ + diff --git a/doc/bench-c5-18xlarge-2020-01-20-b.svg b/doc/bench-c5-18xlarge-2020-01-20-b.svg index 22bfa5c2..2d853edc 100644 --- a/doc/bench-c5-18xlarge-2020-01-20-b.svg +++ b/doc/bench-c5-18xlarge-2020-01-20-b.svg @@ -1,6 +1,7 @@ + diff --git a/doc/bench-c5-18xlarge-2020-01-20-rss-a.svg b/doc/bench-c5-18xlarge-2020-01-20-rss-a.svg index 6b15ebe5..393bfad9 100644 --- a/doc/bench-c5-18xlarge-2020-01-20-rss-a.svg +++ b/doc/bench-c5-18xlarge-2020-01-20-rss-a.svg @@ -1,6 +1,7 @@ + diff --git a/doc/bench-c5-18xlarge-2020-01-20-rss-b.svg b/doc/bench-c5-18xlarge-2020-01-20-rss-b.svg index e3eb774c..419dc250 100644 --- a/doc/bench-c5-18xlarge-2020-01-20-rss-b.svg +++ b/doc/bench-c5-18xlarge-2020-01-20-rss-b.svg @@ -1,6 +1,7 @@ + diff --git a/doc/bench-r5a-1.svg b/doc/bench-r5a-1.svg index 127d6de8..c296a048 100644 --- a/doc/bench-r5a-1.svg +++ b/doc/bench-r5a-1.svg @@ -1,6 +1,7 @@ + diff --git a/doc/bench-r5a-12xlarge-2020-01-16-a.svg b/doc/bench-r5a-12xlarge-2020-01-16-a.svg index b110ff47..b8a2f20e 100644 --- a/doc/bench-r5a-12xlarge-2020-01-16-a.svg +++ b/doc/bench-r5a-12xlarge-2020-01-16-a.svg @@ -1,6 +1,7 @@ + diff --git a/doc/bench-r5a-12xlarge-2020-01-16-b.svg b/doc/bench-r5a-12xlarge-2020-01-16-b.svg index f7a3287e..4a7e21e7 100644 --- a/doc/bench-r5a-12xlarge-2020-01-16-b.svg +++ b/doc/bench-r5a-12xlarge-2020-01-16-b.svg @@ -1,6 +1,7 @@ + diff --git a/doc/bench-r5a-2.svg b/doc/bench-r5a-2.svg index 8b7b2da4..917ea573 100644 --- a/doc/bench-r5a-2.svg +++ b/doc/bench-r5a-2.svg @@ -1,6 +1,7 @@ + diff --git a/doc/bench-r5a-rss-1.svg b/doc/bench-r5a-rss-1.svg index 1c7f8566..375ebd20 100644 --- a/doc/bench-r5a-rss-1.svg +++ b/doc/bench-r5a-rss-1.svg @@ -1,6 +1,7 @@ + diff --git a/doc/bench-r5a-rss-2.svg b/doc/bench-r5a-rss-2.svg index e819884d..cb2bbc89 100644 --- a/doc/bench-r5a-rss-2.svg +++ b/doc/bench-r5a-rss-2.svg @@ -1,6 +1,7 @@ + diff --git a/doc/ds-logo.jpg b/doc/ds-logo.jpg new file mode 100644 index 00000000..853a7279 Binary files /dev/null and b/doc/ds-logo.jpg differ diff --git a/doc/mimalloc-doc.h b/doc/mimalloc-doc.h index 67f4fe95..7c238d29 100644 --- a/doc/mimalloc-doc.h +++ b/doc/mimalloc-doc.h @@ -26,17 +26,25 @@ without code changes, for example, on Unix you can use it as: Notable aspects of the design include: -- __small and consistent__: the library is less than 6k LOC using simple and +- __small and consistent__: the library is about 8k LOC using simple and consistent data structures. This makes it very suitable to integrate and adapt in other projects. For runtime systems it provides hooks for a monotonic _heartbeat_ and deferred freeing (for bounded worst-case times with reference counting). -- __free list sharding__: the big idea: instead of one big free list (per size class) we have - many smaller lists per memory "page" which both reduces fragmentation - and increases locality -- +- __free list sharding__: instead of one big free list (per size class) we have + many smaller lists per "mimalloc page" which reduces fragmentation and + increases locality -- things that are allocated close in time get allocated close in memory. - (A memory "page" in _mimalloc_ contains blocks of one size class and is - usually 64KiB on a 64-bit system). + (A mimalloc page contains blocks of one size class and is usually 64KiB on a 64-bit system). +- __free list multi-sharding__: the big idea! Not only do we shard the free list + per mimalloc page, but for each page we have multiple free lists. In particular, there + is one list for thread-local `free` operations, and another one for concurrent `free` + operations. Free-ing from another thread can now be a single CAS without needing + sophisticated coordination between threads. Since there will be + thousands of separate free lists, contention is naturally distributed over the heap, + and the chance of contending on a single location will be low -- this is quite + similar to randomized algorithms like skip lists where adding + a random oracle removes the need for a more complex algorithm. - __eager page reset__: when a "page" becomes empty (with increased chance due to free list sharding) the memory is marked to the OS as unused ("reset" or "purged") reducing (real) memory pressure and fragmentation, especially in long running @@ -51,7 +59,7 @@ Notable aspects of the design include: times (_wcat_), bounded space overhead (~0.2% meta-data, with at most 12.5% waste in allocation sizes), and has no internal points of contention using only atomic operations. - __fast__: In our benchmarks (see [below](#performance)), - _mimalloc_ always outperforms all other leading allocators (_jemalloc_, _tcmalloc_, _Hoard_, etc), + _mimalloc_ outperforms all other leading allocators (_jemalloc_, _tcmalloc_, _Hoard_, etc), and usually uses less memory (up to 25% more in the worst case). A nice property is that it does consistently well over a wide range of benchmarks. @@ -298,7 +306,7 @@ size_t mi_good_size(size_t size); /// resource usage by calling this every once in a while. void mi_collect(bool force); -/// Print the main statistics. +/// Deprecated /// @param out Ignored, outputs to the registered output function or stderr by default. /// /// Most detailed when using a debug build. @@ -309,7 +317,7 @@ void mi_stats_print(void* out); /// @param arg Optional argument passed to \a out (if not \a NULL) /// /// Most detailed when using a debug build. -void mi_stats_print(mi_output_fun* out, void* arg); +void mi_stats_print_out(mi_output_fun* out, void* arg); /// Reset statistics. void mi_stats_reset(void); @@ -441,6 +449,20 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec /// Currenty only used on Windows. bool mi_is_redirected(); +/// Return process information (time and memory usage). +/// @param elapsed_msecs Optional. Elapsed wall-clock time of the process in milli-seconds. +/// @param user_msecs Optional. User time in milli-seconds (as the sum over all threads). +/// @param system_msecs Optional. System time in milli-seconds. +/// @param current_rss Optional. Current working set size (touched pages). +/// @param peak_rss Optional. Peak working set size (touched pages). +/// @param current_commit Optional. Current committed memory (backed by the page file). +/// @param peak_commit Optional. Peak committed memory (backed by the page file). +/// @param page_faults Optional. Count of hard page faults. +/// +/// The \a current_rss is precise on Windows and MacOSX; other systems estimate +/// this using \a current_commit. The \a commit is precise on Windows but estimated +/// on other systems as the amount of read/write accessible memory reserved by mimalloc. +void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults); /// \} @@ -752,8 +774,8 @@ bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_blocks, mi_block /// Runtime options. typedef enum mi_option_e { // stable options - mi_option_show_stats, ///< Print statistics to `stderr` when the program is done. mi_option_show_errors, ///< Print error messages to `stderr`. + mi_option_show_stats, ///< Print statistics to `stderr` when the program is done. mi_option_verbose, ///< Print verbose messages to `stderr`. // the following options are experimental mi_option_eager_commit, ///< Eagerly commit segments (4MiB) (enabled by default). @@ -772,9 +794,11 @@ typedef enum mi_option_e { } mi_option_t; -bool mi_option_enabled(mi_option_t option); -void mi_option_enable(mi_option_t option, bool enable); -void mi_option_enable_default(mi_option_t option, bool enable); +bool mi_option_is_enabled(mi_option_t option); +void mi_option_enable(mi_option_t option); +void mi_option_disable(mi_option_t option); +void mi_option_set_enabled(mi_option_t option, bool enable); +void mi_option_set_enabled_default(mi_option_t option, bool enable); long mi_option_get(mi_option_t option); void mi_option_set(mi_option_t option, long value); diff --git a/docs/group__extended.html b/docs/group__extended.html index 325d62bf..12e51cbb 100644 --- a/docs/group__extended.html +++ b/docs/group__extended.html @@ -146,11 +146,11 @@ Functions
 Eagerly free memory. More...

  void mi_stats_print (void *out) - Print the main statistics. More...
+ Deprecated. More...
  -void mi_stats_print (mi_output_fun *out, void *arg) - Print the main statistics. More...
-  +void mi_stats_print_out (mi_output_fun *out, void *arg) + Print the main statistics. More...
+  void mi_stats_reset (void)  Reset statistics. More...
  @@ -187,6 +187,9 @@ Functions bool mi_is_redirected ()  Is the C runtime malloc API redirected? More...
  +void mi_process_info (size_t *elapsed_msecs, size_t *user_msecs, size_t *system_msecs, size_t *current_rss, size_t *peak_rss, size_t *current_commit, size_t *peak_commit, size_t *page_faults) + Return process information (time and memory usage). More...

Detailed Description

Extended functionality.

@@ -411,6 +414,86 @@ Functions
Returns
a pointer to newly allocated memory of at least size bytes, or NULL if out of memory. This function is meant for use in run-time systems for best performance and does not check if size was indeed small – use with care!
+ + + +

◆ mi_process_info()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
void mi_process_info (size_t * elapsed_msecs,
size_t * user_msecs,
size_t * system_msecs,
size_t * current_rss,
size_t * peak_rss,
size_t * current_commit,
size_t * peak_commit,
size_t * page_faults 
)
+
+ +

Return process information (time and memory usage).

+
Parameters
+ + + + + + + + + +
elapsed_msecsOptional. Elapsed wall-clock time of the process in milli-seconds.
user_msecsOptional. User time in milli-seconds (as the sum over all threads).
system_msecsOptional. System time in milli-seconds.
current_rssOptional. Current working set size (touched pages).
peak_rssOptional. Peak working set size (touched pages).
current_commitOptional. Current committed memory (backed by the page file).
peak_commitOptional. Peak committed memory (backed by the page file).
page_faultsOptional. Count of hard page faults.
+
+
+

The current_rss is precise on Windows and MacOSX; other systems estimate this using current_commit. The commit is precise on Windows but estimated on other systems as the amount of read/write accessible memory reserved by mimalloc.

+
@@ -646,7 +729,7 @@ Functions -

◆ mi_stats_print() [1/2]

+

◆ mi_stats_print()

@@ -661,7 +744,7 @@ Functions
-

Print the main statistics.

+

Deprecated.

Parameters
@@ -672,14 +755,14 @@ Functions - -

◆ mi_stats_print() [2/2]

+ +

◆ mi_stats_print_out()

outIgnored, outputs to the registered output function or stderr by default.
- + diff --git a/docs/group__extended.js b/docs/group__extended.js index ff8891b2..ed4a8b46 100644 --- a/docs/group__extended.js +++ b/docs/group__extended.js @@ -9,6 +9,7 @@ var group__extended = [ "mi_is_in_heap_region", "group__extended.html#ga5f071b10d4df1c3658e04e7fd67a94e6", null ], [ "mi_is_redirected", "group__extended.html#gaad25050b19f30cd79397b227e0157a3f", null ], [ "mi_malloc_small", "group__extended.html#ga7136c2e55cb22c98ecf95d08d6debb99", null ], + [ "mi_process_info", "group__extended.html#ga7d862c2affd5790381da14eb102a364d", null ], [ "mi_register_deferred_free", "group__extended.html#ga3460a6ca91af97be4058f523d3cb8ece", null ], [ "mi_register_error", "group__extended.html#gaa1d55e0e894be240827e5d87ec3a1f45", null ], [ "mi_register_output", "group__extended.html#gae5b17ff027cd2150b43a33040250cf3f", null ], @@ -16,7 +17,7 @@ var group__extended = [ "mi_reserve_huge_os_pages_interleave", "group__extended.html#ga3132f521fb756fc0e8ec0b74fb58df50", null ], [ "mi_stats_merge", "group__extended.html#ga854b1de8cb067c7316286c28b2fcd3d1", null ], [ "mi_stats_print", "group__extended.html#ga2d126e5c62d3badc35445e5d84166df2", null ], - [ "mi_stats_print", "group__extended.html#ga256cc6f13a142deabbadd954a217e228", null ], + [ "mi_stats_print_out", "group__extended.html#ga537f13b299ddf801e49a5a94fde02c79", null ], [ "mi_stats_reset", "group__extended.html#ga3bb8468b8cfcc6e2a61d98aee85c5f99", null ], [ "mi_thread_done", "group__extended.html#ga0ae4581e85453456a0d658b2b98bf7bf", null ], [ "mi_thread_init", "group__extended.html#gaf8e73efc2cbca9ebfdfb166983a04c17", null ], diff --git a/docs/group__options.html b/docs/group__options.html index 5e45d7bd..9425765e 100644 --- a/docs/group__options.html +++ b/docs/group__options.html @@ -112,8 +112,8 @@ $(document).ready(function(){initNavTree('group__options.html','');});
void mi_stats_print void mi_stats_print_out ( mi_output_fun out,

Enumerations

enum  mi_option_t {
-  mi_option_show_stats, -mi_option_show_errors, +  mi_option_show_errors, +mi_option_show_stats, mi_option_verbose, mi_option_eager_commit,
@@ -138,12 +138,16 @@ Enumerations
- - - - - - + + + + + + + + + + @@ -168,9 +172,9 @@ Functions

Runtime options.

Functions

bool mi_option_enabled (mi_option_t option)
 
void mi_option_enable (mi_option_t option, bool enable)
 
void mi_option_enable_default (mi_option_t option, bool enable)
 
bool mi_option_is_enabled (mi_option_t option)
 
void mi_option_enable (mi_option_t option)
 
void mi_option_disable (mi_option_t option)
 
void mi_option_set_enabled (mi_option_t option, bool enable)
 
void mi_option_set_enabled_default (mi_option_t option, bool enable)
 
long mi_option_get (mi_option_t option)
 
void mi_option_set (mi_option_t option, long value)
- - @@ -204,8 +208,26 @@ Functions

Function Documentation

- -

◆ mi_option_enable()

+ +

◆ mi_option_disable()

+ +
+
+
Enumerator
mi_option_show_stats 

Print statistics to stderr when the program is done.

+
Enumerator
mi_option_show_errors 

Print error messages to stderr.

mi_option_show_errors 

Print error messages to stderr.

+
mi_option_show_stats 

Print statistics to stderr when the program is done.

mi_option_verbose 

Print verbose messages to stderr.

+ + + + + + + +
void mi_option_disable (mi_option_t option)
+
+ +
+
+ +

◆ mi_option_enable()

@@ -214,62 +236,6 @@ Functions void mi_option_enable ( mi_option_t  - option, - - - - - bool  - enable  - - - - ) - - - -
- -
-
- -

◆ mi_option_enable_default()

- -
-
- - - - - - - - - - - - - - - - - - -
void mi_option_enable_default (mi_option_t option,
bool enable 
)
-
- -
-
- -

◆ mi_option_enabled()

- -
-
- - - - - @@ -294,6 +260,24 @@ Functions
bool mi_option_enabled (mi_option_t  option)
+
+
+ +

◆ mi_option_is_enabled()

+ +
+
+ + + + + + + + +
bool mi_option_is_enabled (mi_option_t option)
+
+
@@ -350,6 +334,62 @@ Functions
+
+ + +

◆ mi_option_set_enabled()

+ +
+
+ + + + + + + + + + + + + + + + + + +
void mi_option_set_enabled (mi_option_t option,
bool enable 
)
+
+ +
+
+ +

◆ mi_option_set_enabled_default()

+ +
+
+ + + + + + + + + + + + + + + + + + +
void mi_option_set_enabled_default (mi_option_t option,
bool enable 
)
+
+
diff --git a/docs/group__options.js b/docs/group__options.js index 1d84ea8b..9aaf2318 100644 --- a/docs/group__options.js +++ b/docs/group__options.js @@ -1,8 +1,8 @@ var group__options = [ [ "mi_option_t", "group__options.html#gafebf7ed116adb38ae5218bc3ce06884c", [ - [ "mi_option_show_stats", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca0957ef73b2550764b4840edf48422fda", null ], [ "mi_option_show_errors", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cafbf4822e5c00732c5984b32a032837f0", null ], + [ "mi_option_show_stats", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca0957ef73b2550764b4840edf48422fda", null ], [ "mi_option_verbose", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca7c8b7bf5281c581bad64f5daa6442777", null ], [ "mi_option_eager_commit", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca1e8de72c93da7ff22d91e1e27b52ac2b", null ], [ "mi_option_eager_region_commit", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca32ce97ece29f69e82579679cf8a307ad", null ], @@ -18,10 +18,12 @@ var group__options = [ "mi_option_os_tag", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca4b74ae2a69e445de6c2361b73c1d14bf", null ], [ "_mi_option_last", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca5b4357b74be0d87568036c32eb1a2e4a", null ] ] ], - [ "mi_option_enable", "group__options.html#ga6d45a20a3131f18bc351b69763b38ce4", null ], - [ "mi_option_enable_default", "group__options.html#ga37988264b915a7db92530cc02d5494cb", null ], - [ "mi_option_enabled", "group__options.html#gacebe3f6d91b4a50b54eb84e2a1da1b30", null ], + [ "mi_option_disable", "group__options.html#gaebf6ff707a2e688ebb1a2296ca564054", null ], + [ "mi_option_enable", "group__options.html#ga04180ae41b0d601421dd62ced40ca050", null ], [ "mi_option_get", "group__options.html#ga7e8af195cc81d3fa64ccf2662caa565a", null ], + [ "mi_option_is_enabled", "group__options.html#ga459ad98f18b3fc9275474807fe0ca188", null ], [ "mi_option_set", "group__options.html#gaf84921c32375e25754dc2ee6a911fa60", null ], - [ "mi_option_set_default", "group__options.html#ga7ef623e440e6e5545cb08c94e71e4b90", null ] + [ "mi_option_set_default", "group__options.html#ga7ef623e440e6e5545cb08c94e71e4b90", null ], + [ "mi_option_set_enabled", "group__options.html#ga9a13d05fcb77489cb06d4d017ebd8bed", null ], + [ "mi_option_set_enabled_default", "group__options.html#ga65518b69ec5d32336b50e07f74b3f629", null ] ]; \ No newline at end of file diff --git a/docs/index.html b/docs/index.html index ce9c983d..01af9bec 100644 --- a/docs/index.html +++ b/docs/index.html @@ -105,13 +105,14 @@ $(document).ready(function(){initNavTree('index.html','');});

This is the API documentation of the mimalloc allocator (pronounced "me-malloc") – a general purpose allocator with excellent performance characteristics. Initially developed by Daan Leijen for the run-time systems of the Koka and Lean languages.

It is a drop-in replacement for malloc and can be used in other programs without code changes, for example, on Unix you can use it as:

> LD_PRELOAD=/usr/bin/libmimalloc.so myprogram

Notable aspects of the design include:

You can read more on the design of mimalloc in the technical report which also has detailed benchmark results.

Further information:

diff --git a/docs/mimalloc-doc_8h_source.html b/docs/mimalloc-doc_8h_source.html index 09c03b9b..6d2e86ff 100644 --- a/docs/mimalloc-doc_8h_source.html +++ b/docs/mimalloc-doc_8h_source.html @@ -102,79 +102,82 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html','');});
mimalloc-doc.h
-
1 /* ----------------------------------------------------------------------------
2 Copyright (c) 2018, Microsoft Research, Daan Leijen
3 This is free software; you can redistribute it and/or modify it under the
4 terms of the MIT license. A copy of the license can be found in the file
5 "LICENSE" at the root of this distribution.
6 -----------------------------------------------------------------------------*/
7 
8 #error "documentation file only!"
9 
10 
83 
87 
91 void mi_free(void* p);
92 
97 void* mi_malloc(size_t size);
98 
103 void* mi_zalloc(size_t size);
104 
114 void* mi_calloc(size_t count, size_t size);
115 
128 void* mi_realloc(void* p, size_t newsize);
129 
140 void* mi_recalloc(void* p, size_t count, size_t size);
141 
155 void* mi_expand(void* p, size_t newsize);
156 
166 void* mi_mallocn(size_t count, size_t size);
167 
177 void* mi_reallocn(void* p, size_t count, size_t size);
178 
195 void* mi_reallocf(void* p, size_t newsize);
196 
197 
206 char* mi_strdup(const char* s);
207 
217 char* mi_strndup(const char* s, size_t n);
218 
231 char* mi_realpath(const char* fname, char* resolved_name);
232 
234 
235 // ------------------------------------------------------
236 // Extended functionality
237 // ------------------------------------------------------
238 
242 
245 #define MI_SMALL_SIZE_MAX (128*sizeof(void*))
246 
254 void* mi_malloc_small(size_t size);
255 
263 void* mi_zalloc_small(size_t size);
264 
279 size_t mi_usable_size(void* p);
280 
290 size_t mi_good_size(size_t size);
291 
299 void mi_collect(bool force);
300 
305 void mi_stats_print(void* out);
306 
312 void mi_stats_print(mi_output_fun* out, void* arg);
313 
315 void mi_stats_reset(void);
316 
318 void mi_stats_merge(void);
319 
323 void mi_thread_init(void);
324 
329 void mi_thread_done(void);
330 
336 void mi_thread_stats_print_out(mi_output_fun* out, void* arg);
337 
344 typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat, void* arg);
345 
361 void mi_register_deferred_free(mi_deferred_free_fun* deferred_free, void* arg);
362 
368 typedef void (mi_output_fun)(const char* msg, void* arg);
369 
376 void mi_register_output(mi_output_fun* out, void* arg);
377 
383 typedef void (mi_error_fun)(int err, void* arg);
384 
400 void mi_register_error(mi_error_fun* errfun, void* arg);
401 
406 bool mi_is_in_heap_region(const void* p);
407 
408 
421 int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs);
422 
435 int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs);
436 
437 
442 bool mi_is_redirected();
443 
444 
446 
447 // ------------------------------------------------------
448 // Aligned allocation
449 // ------------------------------------------------------
450 
456 
469 void* mi_malloc_aligned(size_t size, size_t alignment);
470 void* mi_zalloc_aligned(size_t size, size_t alignment);
471 void* mi_calloc_aligned(size_t count, size_t size, size_t alignment);
472 void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment);
473 
484 void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset);
485 void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset);
486 void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset);
487 void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset);
488 
490 
496 
501 struct mi_heap_s;
502 
507 typedef struct mi_heap_s mi_heap_t;
508 
511 
519 void mi_heap_delete(mi_heap_t* heap);
520 
528 void mi_heap_destroy(mi_heap_t* heap);
529 
534 
538 
545 
547 void mi_heap_collect(mi_heap_t* heap, bool force);
548 
551 void* mi_heap_malloc(mi_heap_t* heap, size_t size);
552 
556 void* mi_heap_malloc_small(mi_heap_t* heap, size_t size);
557 
560 void* mi_heap_zalloc(mi_heap_t* heap, size_t size);
561 
564 void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size);
565 
568 void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size);
569 
572 char* mi_heap_strdup(mi_heap_t* heap, const char* s);
573 
576 char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n);
577 
580 char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name);
581 
582 void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize);
583 void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size);
584 void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize);
585 
586 void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment);
587 void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset);
588 void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment);
589 void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset);
590 void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment);
591 void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset);
592 void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment);
593 void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset);
594 
596 
597 
606 
607 void* mi_rezalloc(void* p, size_t newsize);
608 void* mi_recalloc(void* p, size_t newcount, size_t size) ;
609 
610 void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment);
611 void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset);
612 void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment);
613 void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset);
614 
615 void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize);
616 void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t newcount, size_t size);
617 
618 void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment);
619 void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset);
620 void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment);
621 void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset);
622 
624 
633 
645 #define mi_malloc_tp(tp) ((tp*)mi_malloc(sizeof(tp)))
646 
648 #define mi_zalloc_tp(tp) ((tp*)mi_zalloc(sizeof(tp)))
649 
651 #define mi_calloc_tp(tp,count) ((tp*)mi_calloc(count,sizeof(tp)))
652 
654 #define mi_mallocn_tp(tp,count) ((tp*)mi_mallocn(count,sizeof(tp)))
655 
657 #define mi_reallocn_tp(p,tp,count) ((tp*)mi_reallocn(p,count,sizeof(tp)))
658 
660 #define mi_heap_malloc_tp(hp,tp) ((tp*)mi_heap_malloc(hp,sizeof(tp)))
661 
663 #define mi_heap_zalloc_tp(hp,tp) ((tp*)mi_heap_zalloc(hp,sizeof(tp)))
664 
666 #define mi_heap_calloc_tp(hp,tp,count) ((tp*)mi_heap_calloc(hp,count,sizeof(tp)))
667 
669 #define mi_heap_mallocn_tp(hp,tp,count) ((tp*)mi_heap_mallocn(hp,count,sizeof(tp)))
670 
672 #define mi_heap_reallocn_tp(hp,p,tp,count) ((tp*)mi_heap_reallocn(p,count,sizeof(tp)))
673 
675 #define mi_heap_recalloc_tp(hp,p,tp,count) ((tp*)mi_heap_recalloc(p,count,sizeof(tp)))
676 
678 
684 
691 bool mi_heap_contains_block(mi_heap_t* heap, const void* p);
692 
701 bool mi_heap_check_owned(mi_heap_t* heap, const void* p);
702 
710 bool mi_check_owned(const void* p);
711 
714 typedef struct mi_heap_area_s {
715  void* blocks;
716  size_t reserved;
717  size_t committed;
718  size_t used;
719  size_t block_size;
721 
729 typedef bool (mi_block_visit_fun)(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg);
730 
742 bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_blocks, mi_block_visit_fun* visitor, void* arg);
743 
745 
751 
753 typedef enum mi_option_e {
754  // stable options
758  // the following options are experimental
772 } mi_option_t;
773 
774 
775 bool mi_option_enabled(mi_option_t option);
776 void mi_option_enable(mi_option_t option, bool enable);
777 void mi_option_enable_default(mi_option_t option, bool enable);
778 
779 long mi_option_get(mi_option_t option);
780 void mi_option_set(mi_option_t option, long value);
781 void mi_option_set_default(mi_option_t option, long value);
782 
783 
785 
792 
793 void* mi_recalloc(void* p, size_t count, size_t size);
794 size_t mi_malloc_size(const void* p);
795 size_t mi_malloc_usable_size(const void *p);
796 
798 void mi_cfree(void* p);
799 
800 int mi_posix_memalign(void** p, size_t alignment, size_t size);
801 int mi__posix_memalign(void** p, size_t alignment, size_t size);
802 void* mi_memalign(size_t alignment, size_t size);
803 void* mi_valloc(size_t size);
804 
805 void* mi_pvalloc(size_t size);
806 void* mi_aligned_alloc(size_t alignment, size_t size);
807 void* mi_reallocarray(void* p, size_t count, size_t size);
808 
809 void mi_free_size(void* p, size_t size);
810 void mi_free_size_aligned(void* p, size_t size, size_t alignment);
811 void mi_free_aligned(void* p, size_t alignment);
812 
814 
827 
829 void* mi_new(std::size_t n) noexcept(false);
830 
832 void* mi_new_n(size_t count, size_t size) noexcept(false);
833 
835 void* mi_new_aligned(std::size_t n, std::align_val_t alignment) noexcept(false);
836 
838 void* mi_new_nothrow(size_t n);
839 
841 void* mi_new_aligned_nothrow(size_t n, size_t alignment);
842 
844 void* mi_new_realloc(void* p, size_t newsize);
845 
847 void* mi_new_reallocn(void* p, size_t newcount, size_t size);
848 
856 template<class T> struct mi_stl_allocator { }
857 
859 
void mi_option_enable_default(mi_option_t option, bool enable)
-
size_t mi_usable_size(void *p)
Return the available bytes in a memory block.
+
1 /* ----------------------------------------------------------------------------
2 Copyright (c) 2018, Microsoft Research, Daan Leijen
3 This is free software; you can redistribute it and/or modify it under the
4 terms of the MIT license. A copy of the license can be found in the file
5 "LICENSE" at the root of this distribution.
6 -----------------------------------------------------------------------------*/
7 
8 #error "documentation file only!"
9 
10 
91 
95 
99 void mi_free(void* p);
100 
105 void* mi_malloc(size_t size);
106 
111 void* mi_zalloc(size_t size);
112 
122 void* mi_calloc(size_t count, size_t size);
123 
136 void* mi_realloc(void* p, size_t newsize);
137 
148 void* mi_recalloc(void* p, size_t count, size_t size);
149 
163 void* mi_expand(void* p, size_t newsize);
164 
174 void* mi_mallocn(size_t count, size_t size);
175 
185 void* mi_reallocn(void* p, size_t count, size_t size);
186 
203 void* mi_reallocf(void* p, size_t newsize);
204 
205 
214 char* mi_strdup(const char* s);
215 
225 char* mi_strndup(const char* s, size_t n);
226 
239 char* mi_realpath(const char* fname, char* resolved_name);
240 
242 
243 // ------------------------------------------------------
244 // Extended functionality
245 // ------------------------------------------------------
246 
250 
253 #define MI_SMALL_SIZE_MAX (128*sizeof(void*))
254 
262 void* mi_malloc_small(size_t size);
263 
271 void* mi_zalloc_small(size_t size);
272 
287 size_t mi_usable_size(void* p);
288 
298 size_t mi_good_size(size_t size);
299 
307 void mi_collect(bool force);
308 
313 void mi_stats_print(void* out);
314 
320 void mi_stats_print_out(mi_output_fun* out, void* arg);
321 
323 void mi_stats_reset(void);
324 
326 void mi_stats_merge(void);
327 
331 void mi_thread_init(void);
332 
337 void mi_thread_done(void);
338 
344 void mi_thread_stats_print_out(mi_output_fun* out, void* arg);
345 
352 typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat, void* arg);
353 
369 void mi_register_deferred_free(mi_deferred_free_fun* deferred_free, void* arg);
370 
376 typedef void (mi_output_fun)(const char* msg, void* arg);
377 
384 void mi_register_output(mi_output_fun* out, void* arg);
385 
391 typedef void (mi_error_fun)(int err, void* arg);
392 
408 void mi_register_error(mi_error_fun* errfun, void* arg);
409 
414 bool mi_is_in_heap_region(const void* p);
415 
416 
429 int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs);
430 
443 int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs);
444 
445 
450 bool mi_is_redirected();
451 
465 void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults);
466 
468 
469 // ------------------------------------------------------
470 // Aligned allocation
471 // ------------------------------------------------------
472 
478 
491 void* mi_malloc_aligned(size_t size, size_t alignment);
492 void* mi_zalloc_aligned(size_t size, size_t alignment);
493 void* mi_calloc_aligned(size_t count, size_t size, size_t alignment);
494 void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment);
495 
506 void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset);
507 void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset);
508 void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset);
509 void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset);
510 
512 
518 
523 struct mi_heap_s;
524 
529 typedef struct mi_heap_s mi_heap_t;
530 
533 
541 void mi_heap_delete(mi_heap_t* heap);
542 
550 void mi_heap_destroy(mi_heap_t* heap);
551 
556 
560 
567 
569 void mi_heap_collect(mi_heap_t* heap, bool force);
570 
573 void* mi_heap_malloc(mi_heap_t* heap, size_t size);
574 
578 void* mi_heap_malloc_small(mi_heap_t* heap, size_t size);
579 
582 void* mi_heap_zalloc(mi_heap_t* heap, size_t size);
583 
586 void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size);
587 
590 void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size);
591 
594 char* mi_heap_strdup(mi_heap_t* heap, const char* s);
595 
598 char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n);
599 
602 char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name);
603 
604 void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize);
605 void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size);
606 void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize);
607 
608 void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment);
609 void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset);
610 void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment);
611 void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset);
612 void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment);
613 void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset);
614 void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment);
615 void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset);
616 
618 
619 
628 
629 void* mi_rezalloc(void* p, size_t newsize);
630 void* mi_recalloc(void* p, size_t newcount, size_t size) ;
631 
632 void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment);
633 void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset);
634 void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment);
635 void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset);
636 
637 void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize);
638 void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t newcount, size_t size);
639 
640 void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment);
641 void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset);
642 void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment);
643 void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset);
644 
646 
655 
667 #define mi_malloc_tp(tp) ((tp*)mi_malloc(sizeof(tp)))
668 
670 #define mi_zalloc_tp(tp) ((tp*)mi_zalloc(sizeof(tp)))
671 
673 #define mi_calloc_tp(tp,count) ((tp*)mi_calloc(count,sizeof(tp)))
674 
676 #define mi_mallocn_tp(tp,count) ((tp*)mi_mallocn(count,sizeof(tp)))
677 
679 #define mi_reallocn_tp(p,tp,count) ((tp*)mi_reallocn(p,count,sizeof(tp)))
680 
682 #define mi_heap_malloc_tp(hp,tp) ((tp*)mi_heap_malloc(hp,sizeof(tp)))
683 
685 #define mi_heap_zalloc_tp(hp,tp) ((tp*)mi_heap_zalloc(hp,sizeof(tp)))
686 
688 #define mi_heap_calloc_tp(hp,tp,count) ((tp*)mi_heap_calloc(hp,count,sizeof(tp)))
689 
691 #define mi_heap_mallocn_tp(hp,tp,count) ((tp*)mi_heap_mallocn(hp,count,sizeof(tp)))
692 
694 #define mi_heap_reallocn_tp(hp,p,tp,count) ((tp*)mi_heap_reallocn(p,count,sizeof(tp)))
695 
697 #define mi_heap_recalloc_tp(hp,p,tp,count) ((tp*)mi_heap_recalloc(p,count,sizeof(tp)))
698 
700 
706 
713 bool mi_heap_contains_block(mi_heap_t* heap, const void* p);
714 
723 bool mi_heap_check_owned(mi_heap_t* heap, const void* p);
724 
732 bool mi_check_owned(const void* p);
733 
736 typedef struct mi_heap_area_s {
737  void* blocks;
738  size_t reserved;
739  size_t committed;
740  size_t used;
741  size_t block_size;
743 
751 typedef bool (mi_block_visit_fun)(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg);
752 
764 bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_blocks, mi_block_visit_fun* visitor, void* arg);
765 
767 
773 
775 typedef enum mi_option_e {
776  // stable options
780  // the following options are experimental
794 } mi_option_t;
795 
796 
797 bool mi_option_is_enabled(mi_option_t option);
798 void mi_option_enable(mi_option_t option);
799 void mi_option_disable(mi_option_t option);
800 void mi_option_set_enabled(mi_option_t option, bool enable);
801 void mi_option_set_enabled_default(mi_option_t option, bool enable);
802 
803 long mi_option_get(mi_option_t option);
804 void mi_option_set(mi_option_t option, long value);
805 void mi_option_set_default(mi_option_t option, long value);
806 
807 
809 
816 
817 void* mi_recalloc(void* p, size_t count, size_t size);
818 size_t mi_malloc_size(const void* p);
819 size_t mi_malloc_usable_size(const void *p);
820 
822 void mi_cfree(void* p);
823 
824 int mi_posix_memalign(void** p, size_t alignment, size_t size);
825 int mi__posix_memalign(void** p, size_t alignment, size_t size);
826 void* mi_memalign(size_t alignment, size_t size);
827 void* mi_valloc(size_t size);
828 
829 void* mi_pvalloc(size_t size);
830 void* mi_aligned_alloc(size_t alignment, size_t size);
831 void* mi_reallocarray(void* p, size_t count, size_t size);
832 
833 void mi_free_size(void* p, size_t size);
834 void mi_free_size_aligned(void* p, size_t size, size_t alignment);
835 void mi_free_aligned(void* p, size_t alignment);
836 
838 
851 
853 void* mi_new(std::size_t n) noexcept(false);
854 
856 void* mi_new_n(size_t count, size_t size) noexcept(false);
857 
859 void* mi_new_aligned(std::size_t n, std::align_val_t alignment) noexcept(false);
860 
862 void* mi_new_nothrow(size_t n);
863 
865 void* mi_new_aligned_nothrow(size_t n, size_t alignment);
866 
868 void* mi_new_realloc(void* p, size_t newsize);
869 
871 void* mi_new_reallocn(void* p, size_t newcount, size_t size);
872 
880 template<class T> struct mi_stl_allocator { }
881 
883 
size_t mi_usable_size(void *p)
Return the available bytes in a memory block.
void * mi_new_nothrow(size_t n)
like mi_malloc, but when out of memory, use std::get_new_handler but return NULL on failure.
void * mi_reallocn(void *p, size_t count, size_t size)
Re-allocate memory to count elements of size bytes.
void * mi_malloc_aligned(size_t size, size_t alignment)
Allocate size bytes aligned by alignment.
void * mi_recalloc_aligned_at(void *p, size_t newcount, size_t size, size_t alignment, size_t offset)
void mi_stats_reset(void)
Reset statistics.
void * mi_heap_realloc_aligned(mi_heap_t *heap, void *p, size_t newsize, size_t alignment)
+
bool mi_option_is_enabled(mi_option_t option)
void * mi_new_realloc(void *p, size_t newsize)
like mi_realloc(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exceptio...
void * mi_recalloc(void *p, size_t count, size_t size)
Re-allocate memory to count elements of size bytes, with extra memory initialized to zero.
void * mi_mallocn(size_t count, size_t size)
Allocate count elements of size bytes.
size_t mi_malloc_size(const void *p)
+
void mi_option_set_enabled(mi_option_t option, bool enable)
int mi_posix_memalign(void **p, size_t alignment, size_t size)
void mi_stats_merge(void)
Merge thread local statistics with the main statistics and reset.
void * mi_new_n(size_t count, size_t size) noexcept(false)
like mi_mallocn(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exceptio...
void mi_option_set_default(mi_option_t option, long value)
-
void() mi_error_fun(int err, void *arg)
Type of error callback functions.
Definition: mimalloc-doc.h:383
+
void mi_stats_print_out(mi_output_fun *out, void *arg)
Print the main statistics.
+
void() mi_error_fun(int err, void *arg)
Type of error callback functions.
Definition: mimalloc-doc.h:391
void * mi_rezalloc(void *p, size_t newsize)
-
Eagerly commit segments (4MiB) (enabled by default).
Definition: mimalloc-doc.h:759
+
Eagerly commit segments (4MiB) (enabled by default).
Definition: mimalloc-doc.h:781
void * mi_heap_zalloc(mi_heap_t *heap, size_t size)
Allocate zero-initialized in a specific heap.
void mi_option_set(mi_option_t option, long value)
-
Eagerly commit large (256MiB) memory regions (enabled by default, except on Windows)
Definition: mimalloc-doc.h:760
+
Eagerly commit large (256MiB) memory regions (enabled by default, except on Windows)
Definition: mimalloc-doc.h:782
void mi_cfree(void *p)
Just as free but also checks if the pointer p belongs to our heap.
void * mi_recalloc_aligned(void *p, size_t newcount, size_t size, size_t alignment)
-
Definition: mimalloc-doc.h:771
+
Definition: mimalloc-doc.h:793
void * mi_realloc_aligned_at(void *p, size_t newsize, size_t alignment, size_t offset)
-
void * blocks
start of the area containing heap blocks
Definition: mimalloc-doc.h:715
+
void * blocks
start of the area containing heap blocks
Definition: mimalloc-doc.h:737
void * mi_realloc_aligned(void *p, size_t newsize, size_t alignment)
+
void mi_option_enable(mi_option_t option)
int mi__posix_memalign(void **p, size_t alignment, size_t size)
void mi_free(void *p)
Free previously allocated memory.
char * mi_heap_strdup(mi_heap_t *heap, const char *s)
Duplicate a string in a specific heap.
char * mi_heap_realpath(mi_heap_t *heap, const char *fname, char *resolved_name)
Resolve a file path name using a specific heap to allocate the result.
void * mi_heap_calloc_aligned_at(mi_heap_t *heap, size_t count, size_t size, size_t alignment, size_t offset)
+
void mi_process_info(size_t *elapsed_msecs, size_t *user_msecs, size_t *system_msecs, size_t *current_rss, size_t *peak_rss, size_t *current_commit, size_t *peak_commit, size_t *page_faults)
Return process information (time and memory usage).
void * mi_calloc_aligned(size_t count, size_t size, size_t alignment)
void * mi_heap_zalloc_aligned(mi_heap_t *heap, size_t size, size_t alignment)
void * mi_zalloc_small(size_t size)
Allocate a zero initialized small object.
char * mi_strndup(const char *s, size_t n)
Allocate and duplicate a string up to n bytes.
void * mi_expand(void *p, size_t newsize)
Try to re-allocate memory to newsize bytes in place.
void * mi_pvalloc(size_t size)
+
void mi_option_set_enabled_default(mi_option_t option, bool enable)
void * mi_heap_rezalloc_aligned_at(mi_heap_t *heap, void *p, size_t newsize, size_t alignment, size_t offset)
void * mi_zalloc(size_t size)
Allocate zero-initialized size bytes.
void * mi_heap_rezalloc(mi_heap_t *heap, void *p, size_t newsize)
-
The number of segments per thread to keep cached.
Definition: mimalloc-doc.h:763
+
The number of segments per thread to keep cached.
Definition: mimalloc-doc.h:785
void * mi_heap_calloc(mi_heap_t *heap, size_t count, size_t size)
Allocate count zero-initialized elements in a specific heap.
void * mi_heap_calloc_aligned(mi_heap_t *heap, size_t count, size_t size, size_t alignment)
bool mi_is_redirected()
Is the C runtime malloc API redirected?
-
size_t block_size
size in bytes of one block
Definition: mimalloc-doc.h:719
+
size_t block_size
size in bytes of one block
Definition: mimalloc-doc.h:741
void * mi_reallocarray(void *p, size_t count, size_t size)
int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs)
Reserve pages of huge OS pages (1GiB) evenly divided over numa_nodes nodes, but stops after at most t...
-
void() mi_deferred_free_fun(bool force, unsigned long long heartbeat, void *arg)
Type of deferred free functions.
Definition: mimalloc-doc.h:344
+
void() mi_deferred_free_fun(bool force, unsigned long long heartbeat, void *arg)
Type of deferred free functions.
Definition: mimalloc-doc.h:352
bool mi_is_in_heap_region(const void *p)
Is a pointer part of our heap?
-
void mi_option_enable(mi_option_t option, bool enable)
void * mi_new_aligned(std::size_t n, std::align_val_t alignment) noexcept(false)
like mi_malloc_aligned(), but when out of memory, use std::get_new_handler and raise std::bad_alloc e...
void * mi_realloc(void *p, size_t newsize)
Re-allocate memory to newsize bytes.
-
The number of huge OS pages (1GiB in size) to reserve at the start of the program.
Definition: mimalloc-doc.h:762
+
The number of huge OS pages (1GiB in size) to reserve at the start of the program.
Definition: mimalloc-doc.h:784
void * mi_heap_reallocf(mi_heap_t *heap, void *p, size_t newsize)
void mi_free_size_aligned(void *p, size_t size, size_t alignment)
void * mi_rezalloc_aligned_at(void *p, size_t newsize, size_t alignment, size_t offset)
-
Reset page memory after mi_option_reset_delay milliseconds when it becomes free.
Definition: mimalloc-doc.h:764
+
Reset page memory after mi_option_reset_delay milliseconds when it becomes free.
Definition: mimalloc-doc.h:786
void mi_thread_done(void)
Uninitialize mimalloc on a thread.
bool mi_heap_visit_blocks(const mi_heap_t *heap, bool visit_all_blocks, mi_block_visit_fun *visitor, void *arg)
Visit all areas and blocks in a heap.
-
Pretend there are at most N NUMA nodes.
Definition: mimalloc-doc.h:767
+
Pretend there are at most N NUMA nodes.
Definition: mimalloc-doc.h:789
void * mi_malloc(size_t size)
Allocate size bytes.
-
bool mi_option_enabled(mi_option_t option)
void mi_register_error(mi_error_fun *errfun, void *arg)
Register an error callback function.
-
Experimental.
Definition: mimalloc-doc.h:768
+
Experimental.
Definition: mimalloc-doc.h:790
char * mi_heap_strndup(mi_heap_t *heap, const char *s, size_t n)
Duplicate a string of at most length n in a specific heap.
-
bool() mi_block_visit_fun(const mi_heap_t *heap, const mi_heap_area_t *area, void *block, size_t block_size, void *arg)
Visitor function passed to mi_heap_visit_blocks()
Definition: mimalloc-doc.h:729
+
bool() mi_block_visit_fun(const mi_heap_t *heap, const mi_heap_area_t *area, void *block, size_t block_size, void *arg)
Visitor function passed to mi_heap_visit_blocks()
Definition: mimalloc-doc.h:751
void * mi_heap_recalloc(mi_heap_t *heap, void *p, size_t newcount, size_t size)
void * mi_heap_malloc_aligned_at(mi_heap_t *heap, size_t size, size_t alignment, size_t offset)
char * mi_realpath(const char *fname, char *resolved_name)
Resolve a file path name.
-
Print error messages to stderr.
Definition: mimalloc-doc.h:756
-
Experimental.
Definition: mimalloc-doc.h:765
+
Print error messages to stderr.
Definition: mimalloc-doc.h:777
+
Experimental.
Definition: mimalloc-doc.h:787
void * mi_heap_rezalloc_aligned(mi_heap_t *heap, void *p, size_t newsize, size_t alignment)
void * mi_new_aligned_nothrow(size_t n, size_t alignment)
like mi_malloc_aligned, but when out of memory, use std::get_new_handler but return NULL on failure.
void * mi_memalign(size_t alignment, size_t size)
@@ -182,42 +185,43 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html','');});
bool mi_heap_contains_block(mi_heap_t *heap, const void *p)
Does a heap contain a pointer to a previously allocated block?
void mi_heap_collect(mi_heap_t *heap, bool force)
Release outstanding resources in a specific heap.
void * mi_heap_recalloc_aligned_at(mi_heap_t *heap, void *p, size_t newcount, size_t size, size_t alignment, size_t offset)
-
Print verbose messages to stderr.
Definition: mimalloc-doc.h:757
+
Print verbose messages to stderr.
Definition: mimalloc-doc.h:779
void * mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset)
void * mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset)
Allocate size bytes aligned by alignment at a specified offset.
void mi_heap_delete(mi_heap_t *heap)
Delete a previously allocated heap.
-
OS tag to assign to mimalloc'd memory.
Definition: mimalloc-doc.h:770
+
OS tag to assign to mimalloc'd memory.
Definition: mimalloc-doc.h:792
mi_heap_t * mi_heap_get_default()
Get the default heap that is used for mi_malloc() et al.
int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs)
Reserve pages of huge OS pages (1GiB) at a specific numa_node, but stops after at most timeout_msecs ...
+
void mi_option_disable(mi_option_t option)
void * mi_aligned_alloc(size_t alignment, size_t size)
void * mi_valloc(size_t size)
void mi_thread_init(void)
Initialize mimalloc on a thread.
size_t mi_good_size(size_t size)
Return the used allocation size.
-
void mi_stats_print(void *out)
Print the main statistics.
-
Experimental.
Definition: mimalloc-doc.h:769
+
void mi_stats_print(void *out)
Deprecated.
+
Experimental.
Definition: mimalloc-doc.h:791
void * mi_heap_recalloc_aligned(mi_heap_t *heap, void *p, size_t newcount, size_t size, size_t alignment)
void * mi_heap_mallocn(mi_heap_t *heap, size_t count, size_t size)
Allocate count elements in a specific heap.
-
An area of heap space contains blocks of a single size.
Definition: mimalloc-doc.h:714
+
An area of heap space contains blocks of a single size.
Definition: mimalloc-doc.h:736
void mi_thread_stats_print_out(mi_output_fun *out, void *arg)
Print out heap statistics for this thread.
-
Print statistics to stderr when the program is done.
Definition: mimalloc-doc.h:755
+
Print statistics to stderr when the program is done.
Definition: mimalloc-doc.h:778
void * mi_zalloc_aligned(size_t size, size_t alignment)
-
size_t reserved
bytes reserved for this area
Definition: mimalloc-doc.h:716
-
struct mi_heap_s mi_heap_t
Type of first-class heaps.
Definition: mimalloc-doc.h:507
-
size_t used
bytes in use by allocated blocks
Definition: mimalloc-doc.h:718
+
size_t reserved
bytes reserved for this area
Definition: mimalloc-doc.h:738
+
struct mi_heap_s mi_heap_t
Type of first-class heaps.
Definition: mimalloc-doc.h:529
+
size_t used
bytes in use by allocated blocks
Definition: mimalloc-doc.h:740
void mi_register_deferred_free(mi_deferred_free_fun *deferred_free, void *arg)
Register a deferred free function.
void mi_free_size(void *p, size_t size)
void mi_collect(bool force)
Eagerly free memory.
void * mi_new_reallocn(void *p, size_t newcount, size_t size)
like mi_reallocn(), but when out of memory, use std::get_new_handler and raise std::bad_alloc excepti...
void mi_heap_destroy(mi_heap_t *heap)
Destroy a heap, freeing all its still allocated blocks.
void * mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset)
-
Use large OS pages (2MiB in size) if possible.
Definition: mimalloc-doc.h:761
+
Use large OS pages (2MiB in size) if possible.
Definition: mimalloc-doc.h:783
void * mi_heap_reallocn(mi_heap_t *heap, void *p, size_t count, size_t size)
void mi_register_output(mi_output_fun *out, void *arg)
Register an output function.
-
std::allocator implementation for mimalloc for use in STL containers.
Definition: mimalloc-doc.h:856
+
std::allocator implementation for mimalloc for use in STL containers.
Definition: mimalloc-doc.h:880
void * mi_heap_malloc_small(mi_heap_t *heap, size_t size)
Allocate a small object in a specific heap.
void * mi_heap_realloc(mi_heap_t *heap, void *p, size_t newsize)
size_t mi_malloc_usable_size(const void *p)
-
void() mi_output_fun(const char *msg, void *arg)
Type of output functions.
Definition: mimalloc-doc.h:368
+
void() mi_output_fun(const char *msg, void *arg)
Type of output functions.
Definition: mimalloc-doc.h:376
char * mi_strdup(const char *s)
Allocate and duplicate a string.
void * mi_heap_realloc_aligned_at(mi_heap_t *heap, void *p, size_t newsize, size_t alignment, size_t offset)
void * mi_reallocf(void *p, size_t newsize)
Re-allocate memory to newsize bytes,.
@@ -230,11 +234,11 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html','');});
mi_heap_t * mi_heap_get_backing()
Get the backing heap.
void mi_free_aligned(void *p, size_t alignment)
void * mi_new(std::size_t n) noexcept(false)
like mi_malloc(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exception...
-
Delay in milli-seconds before resetting a page (100ms by default)
Definition: mimalloc-doc.h:766
+
Delay in milli-seconds before resetting a page (100ms by default)
Definition: mimalloc-doc.h:788
mi_heap_t * mi_heap_new()
Create a new heap that can be used for allocation.
void * mi_heap_malloc(mi_heap_t *heap, size_t size)
Allocate in a specific heap.
-
size_t committed
current committed bytes of this area
Definition: mimalloc-doc.h:717
-
mi_option_t
Runtime options.
Definition: mimalloc-doc.h:753
+
size_t committed
current committed bytes of this area
Definition: mimalloc-doc.h:739
+
mi_option_t
Runtime options.
Definition: mimalloc-doc.h:775
bool mi_heap_check_owned(mi_heap_t *heap, const void *p)
Check safely if any pointer is part of a heap.
mi_heap_t * mi_heap_set_default(mi_heap_t *heap)
Set the default heap to use for mi_malloc() et al.
diff --git a/docs/navtreeindex0.js b/docs/navtreeindex0.js index 047d6dbc..4a1e93fa 100644 --- a/docs/navtreeindex0.js +++ b/docs/navtreeindex0.js @@ -38,29 +38,30 @@ var NAVTREEINDEX0 = "group__cpp.html#gaef2c2bdb4f70857902d3c8903ac095f3":[5,9,2], "group__cpp.html#structmi__stl__allocator":[5,9,0], "group__extended.html":[5,1], -"group__extended.html#ga089c859d9eddc5f9b4bd946cd53cebee":[5,1,21], -"group__extended.html#ga0ae4581e85453456a0d658b2b98bf7bf":[5,1,18], +"group__extended.html#ga089c859d9eddc5f9b4bd946cd53cebee":[5,1,22], +"group__extended.html#ga0ae4581e85453456a0d658b2b98bf7bf":[5,1,19], "group__extended.html#ga1ea64283508718d9d645c38efc2f4305":[5,1,0], -"group__extended.html#ga220f29f40a44404b0061c15bc1c31152":[5,1,22], +"group__extended.html#ga220f29f40a44404b0061c15bc1c31152":[5,1,23], "group__extended.html#ga251d369cda3f1c2a955c555486ed90e5":[5,1,2], -"group__extended.html#ga256cc6f13a142deabbadd954a217e228":[5,1,16], "group__extended.html#ga299dae78d25ce112e384a98b7309c5be":[5,1,1], -"group__extended.html#ga2d126e5c62d3badc35445e5d84166df2":[5,1,15], -"group__extended.html#ga3132f521fb756fc0e8ec0b74fb58df50":[5,1,13], -"group__extended.html#ga3460a6ca91af97be4058f523d3cb8ece":[5,1,9], -"group__extended.html#ga3bb8468b8cfcc6e2a61d98aee85c5f99":[5,1,17], +"group__extended.html#ga2d126e5c62d3badc35445e5d84166df2":[5,1,16], +"group__extended.html#ga3132f521fb756fc0e8ec0b74fb58df50":[5,1,14], +"group__extended.html#ga3460a6ca91af97be4058f523d3cb8ece":[5,1,10], +"group__extended.html#ga3bb8468b8cfcc6e2a61d98aee85c5f99":[5,1,18], "group__extended.html#ga421430e2226d7d468529cec457396756":[5,1,4], +"group__extended.html#ga537f13b299ddf801e49a5a94fde02c79":[5,1,17], "group__extended.html#ga5f071b10d4df1c3658e04e7fd67a94e6":[5,1,6], "group__extended.html#ga7136c2e55cb22c98ecf95d08d6debb99":[5,1,8], -"group__extended.html#ga7795a13d20087447281858d2c771cca1":[5,1,12], -"group__extended.html#ga854b1de8cb067c7316286c28b2fcd3d1":[5,1,14], -"group__extended.html#gaa1d55e0e894be240827e5d87ec3a1f45":[5,1,10], +"group__extended.html#ga7795a13d20087447281858d2c771cca1":[5,1,13], +"group__extended.html#ga7d862c2affd5790381da14eb102a364d":[5,1,9], +"group__extended.html#ga854b1de8cb067c7316286c28b2fcd3d1":[5,1,15], +"group__extended.html#gaa1d55e0e894be240827e5d87ec3a1f45":[5,1,11], "group__extended.html#gaad25050b19f30cd79397b227e0157a3f":[5,1,7], -"group__extended.html#gab1dac8476c46cb9eecab767eb40c1525":[5,1,20], +"group__extended.html#gab1dac8476c46cb9eecab767eb40c1525":[5,1,21], "group__extended.html#gac057927cd06c854b45fe7847e921bd47":[5,1,5], "group__extended.html#gad823d23444a4b77a40f66bf075a98a0c":[5,1,3], -"group__extended.html#gae5b17ff027cd2150b43a33040250cf3f":[5,1,11], -"group__extended.html#gaf8e73efc2cbca9ebfdfb166983a04c17":[5,1,19], +"group__extended.html#gae5b17ff027cd2150b43a33040250cf3f":[5,1,12], +"group__extended.html#gaf8e73efc2cbca9ebfdfb166983a04c17":[5,1,20], "group__heap.html":[5,3], "group__heap.html#ga00e95ba1e01acac3cfd95bb7a357a6f0":[5,3,20], "group__heap.html#ga08ca6419a5c057a4d965868998eef487":[5,3,3], @@ -104,14 +105,16 @@ var NAVTREEINDEX0 = "group__malloc.html#gafdd9d8bb2986e668ba9884f28af38000":[5,0,12], "group__malloc.html#gafe68ac7c5e24a65cd55c9d6b152211a0":[5,0,6], "group__options.html":[5,7], -"group__options.html#ga37988264b915a7db92530cc02d5494cb":[5,7,2], -"group__options.html#ga6d45a20a3131f18bc351b69763b38ce4":[5,7,1], -"group__options.html#ga7e8af195cc81d3fa64ccf2662caa565a":[5,7,4], +"group__options.html#ga04180ae41b0d601421dd62ced40ca050":[5,7,2], +"group__options.html#ga459ad98f18b3fc9275474807fe0ca188":[5,7,4], +"group__options.html#ga65518b69ec5d32336b50e07f74b3f629":[5,7,8], +"group__options.html#ga7e8af195cc81d3fa64ccf2662caa565a":[5,7,3], "group__options.html#ga7ef623e440e6e5545cb08c94e71e4b90":[5,7,6], -"group__options.html#gacebe3f6d91b4a50b54eb84e2a1da1b30":[5,7,3], +"group__options.html#ga9a13d05fcb77489cb06d4d017ebd8bed":[5,7,7], +"group__options.html#gaebf6ff707a2e688ebb1a2296ca564054":[5,7,1], "group__options.html#gaf84921c32375e25754dc2ee6a911fa60":[5,7,5], "group__options.html#gafebf7ed116adb38ae5218bc3ce06884c":[5,7,0], -"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca0957ef73b2550764b4840edf48422fda":[5,7,0,0], +"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca0957ef73b2550764b4840edf48422fda":[5,7,0,1], "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca0ac33a18f6b659fcfaf44efb0bab1b74":[5,7,0,11], "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca154fe170131d5212cff57e22b99523c5":[5,7,0,10], "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca17a190c25be381142d87e0468c4c068c":[5,7,0,13], @@ -126,7 +129,7 @@ var NAVTREEINDEX0 = "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884caca7ed041be3b0b9d0b82432c7bf41af2":[5,7,0,6], "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cada854dd272c66342f18a93ee254a2968":[5,7,0,8], "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cafb121d30d87591850d5410ccc3a95c6d":[5,7,0,9], -"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cafbf4822e5c00732c5984b32a032837f0":[5,7,0,1], +"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cafbf4822e5c00732c5984b32a032837f0":[5,7,0,0], "group__posix.html":[5,8], "group__posix.html#ga06d07cf357bbac5c73ba5d0c0c421e17":[5,8,7], "group__posix.html#ga0d28d5cf61e6bfbb18c63092939fe5c9":[5,8,3], diff --git a/docs/search/all_6.js b/docs/search/all_6.js index c757cbbf..491883f4 100644 --- a/docs/search/all_6.js +++ b/docs/search/all_6.js @@ -80,13 +80,13 @@ var searchData= ['mi_5fnew_5fnothrow',['mi_new_nothrow',['../group__cpp.html#gaeaded64eda71ed6b1d569d3e723abc4a',1,'mimalloc-doc.h']]], ['mi_5fnew_5frealloc',['mi_new_realloc',['../group__cpp.html#gaab78a32f55149e9fbf432d5288e38e1e',1,'mimalloc-doc.h']]], ['mi_5fnew_5freallocn',['mi_new_reallocn',['../group__cpp.html#ga756f4b2bc6a7ecd0a90baea8e90c7907',1,'mimalloc-doc.h']]], + ['mi_5foption_5fdisable',['mi_option_disable',['../group__options.html#gaebf6ff707a2e688ebb1a2296ca564054',1,'mimalloc-doc.h']]], ['mi_5foption_5feager_5fcommit',['mi_option_eager_commit',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca1e8de72c93da7ff22d91e1e27b52ac2b',1,'mimalloc-doc.h']]], ['mi_5foption_5feager_5fcommit_5fdelay',['mi_option_eager_commit_delay',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca17a190c25be381142d87e0468c4c068c',1,'mimalloc-doc.h']]], ['mi_5foption_5feager_5fregion_5fcommit',['mi_option_eager_region_commit',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca32ce97ece29f69e82579679cf8a307ad',1,'mimalloc-doc.h']]], - ['mi_5foption_5fenable',['mi_option_enable',['../group__options.html#ga6d45a20a3131f18bc351b69763b38ce4',1,'mimalloc-doc.h']]], - ['mi_5foption_5fenable_5fdefault',['mi_option_enable_default',['../group__options.html#ga37988264b915a7db92530cc02d5494cb',1,'mimalloc-doc.h']]], - ['mi_5foption_5fenabled',['mi_option_enabled',['../group__options.html#gacebe3f6d91b4a50b54eb84e2a1da1b30',1,'mimalloc-doc.h']]], + ['mi_5foption_5fenable',['mi_option_enable',['../group__options.html#ga04180ae41b0d601421dd62ced40ca050',1,'mimalloc-doc.h']]], ['mi_5foption_5fget',['mi_option_get',['../group__options.html#ga7e8af195cc81d3fa64ccf2662caa565a',1,'mimalloc-doc.h']]], + ['mi_5foption_5fis_5fenabled',['mi_option_is_enabled',['../group__options.html#ga459ad98f18b3fc9275474807fe0ca188',1,'mimalloc-doc.h']]], ['mi_5foption_5flarge_5fos_5fpages',['mi_option_large_os_pages',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca4192d491200d0055df0554d4cf65054e',1,'mimalloc-doc.h']]], ['mi_5foption_5fos_5ftag',['mi_option_os_tag',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca4b74ae2a69e445de6c2361b73c1d14bf',1,'mimalloc-doc.h']]], ['mi_5foption_5fpage_5freset',['mi_option_page_reset',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cada854dd272c66342f18a93ee254a2968',1,'mimalloc-doc.h']]], @@ -97,6 +97,8 @@ var searchData= ['mi_5foption_5fsegment_5freset',['mi_option_segment_reset',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cafb121d30d87591850d5410ccc3a95c6d',1,'mimalloc-doc.h']]], ['mi_5foption_5fset',['mi_option_set',['../group__options.html#gaf84921c32375e25754dc2ee6a911fa60',1,'mimalloc-doc.h']]], ['mi_5foption_5fset_5fdefault',['mi_option_set_default',['../group__options.html#ga7ef623e440e6e5545cb08c94e71e4b90',1,'mimalloc-doc.h']]], + ['mi_5foption_5fset_5fenabled',['mi_option_set_enabled',['../group__options.html#ga9a13d05fcb77489cb06d4d017ebd8bed',1,'mimalloc-doc.h']]], + ['mi_5foption_5fset_5fenabled_5fdefault',['mi_option_set_enabled_default',['../group__options.html#ga65518b69ec5d32336b50e07f74b3f629',1,'mimalloc-doc.h']]], ['mi_5foption_5fshow_5ferrors',['mi_option_show_errors',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cafbf4822e5c00732c5984b32a032837f0',1,'mimalloc-doc.h']]], ['mi_5foption_5fshow_5fstats',['mi_option_show_stats',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca0957ef73b2550764b4840edf48422fda',1,'mimalloc-doc.h']]], ['mi_5foption_5ft',['mi_option_t',['../group__options.html#gafebf7ed116adb38ae5218bc3ce06884c',1,'mimalloc-doc.h']]], @@ -104,6 +106,7 @@ var searchData= ['mi_5foption_5fverbose',['mi_option_verbose',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca7c8b7bf5281c581bad64f5daa6442777',1,'mimalloc-doc.h']]], ['mi_5foutput_5ffun',['mi_output_fun',['../group__extended.html#gad823d23444a4b77a40f66bf075a98a0c',1,'mimalloc-doc.h']]], ['mi_5fposix_5fmemalign',['mi_posix_memalign',['../group__posix.html#gacff84f226ba9feb2031b8992e5579447',1,'mimalloc-doc.h']]], + ['mi_5fprocess_5finfo',['mi_process_info',['../group__extended.html#ga7d862c2affd5790381da14eb102a364d',1,'mimalloc-doc.h']]], ['mi_5fpvalloc',['mi_pvalloc',['../group__posix.html#gaeb325c39b887d3b90d85d1eb1712fb1e',1,'mimalloc-doc.h']]], ['mi_5frealloc',['mi_realloc',['../group__malloc.html#gaf11eb497da57bdfb2de65eb191c69db6',1,'mimalloc-doc.h']]], ['mi_5frealloc_5faligned',['mi_realloc_aligned',['../group__aligned.html#ga4028d1cf4aa4c87c880747044a8322ae',1,'mimalloc-doc.h']]], @@ -126,7 +129,8 @@ var searchData= ['mi_5frezalloc_5faligned_5fat',['mi_rezalloc_aligned_at',['../group__zeroinit.html#gae8b358c417e61d5307da002702b0a8e1',1,'mimalloc-doc.h']]], ['mi_5fsmall_5fsize_5fmax',['MI_SMALL_SIZE_MAX',['../group__extended.html#ga1ea64283508718d9d645c38efc2f4305',1,'mimalloc-doc.h']]], ['mi_5fstats_5fmerge',['mi_stats_merge',['../group__extended.html#ga854b1de8cb067c7316286c28b2fcd3d1',1,'mimalloc-doc.h']]], - ['mi_5fstats_5fprint',['mi_stats_print',['../group__extended.html#ga2d126e5c62d3badc35445e5d84166df2',1,'mi_stats_print(void *out): mimalloc-doc.h'],['../group__extended.html#ga256cc6f13a142deabbadd954a217e228',1,'mi_stats_print(mi_output_fun *out, void *arg): mimalloc-doc.h']]], + ['mi_5fstats_5fprint',['mi_stats_print',['../group__extended.html#ga2d126e5c62d3badc35445e5d84166df2',1,'mimalloc-doc.h']]], + ['mi_5fstats_5fprint_5fout',['mi_stats_print_out',['../group__extended.html#ga537f13b299ddf801e49a5a94fde02c79',1,'mimalloc-doc.h']]], ['mi_5fstats_5freset',['mi_stats_reset',['../group__extended.html#ga3bb8468b8cfcc6e2a61d98aee85c5f99',1,'mimalloc-doc.h']]], ['mi_5fstl_5fallocator',['mi_stl_allocator',['../group__cpp.html#structmi__stl__allocator',1,'']]], ['mi_5fstrdup',['mi_strdup',['../group__malloc.html#gac7cffe13f1f458ed16789488bf92b9b2',1,'mimalloc-doc.h']]], diff --git a/docs/search/functions_0.js b/docs/search/functions_0.js index 6271797a..b188b270 100644 --- a/docs/search/functions_0.js +++ b/docs/search/functions_0.js @@ -66,13 +66,16 @@ var searchData= ['mi_5fnew_5fnothrow',['mi_new_nothrow',['../group__cpp.html#gaeaded64eda71ed6b1d569d3e723abc4a',1,'mimalloc-doc.h']]], ['mi_5fnew_5frealloc',['mi_new_realloc',['../group__cpp.html#gaab78a32f55149e9fbf432d5288e38e1e',1,'mimalloc-doc.h']]], ['mi_5fnew_5freallocn',['mi_new_reallocn',['../group__cpp.html#ga756f4b2bc6a7ecd0a90baea8e90c7907',1,'mimalloc-doc.h']]], - ['mi_5foption_5fenable',['mi_option_enable',['../group__options.html#ga6d45a20a3131f18bc351b69763b38ce4',1,'mimalloc-doc.h']]], - ['mi_5foption_5fenable_5fdefault',['mi_option_enable_default',['../group__options.html#ga37988264b915a7db92530cc02d5494cb',1,'mimalloc-doc.h']]], - ['mi_5foption_5fenabled',['mi_option_enabled',['../group__options.html#gacebe3f6d91b4a50b54eb84e2a1da1b30',1,'mimalloc-doc.h']]], + ['mi_5foption_5fdisable',['mi_option_disable',['../group__options.html#gaebf6ff707a2e688ebb1a2296ca564054',1,'mimalloc-doc.h']]], + ['mi_5foption_5fenable',['mi_option_enable',['../group__options.html#ga04180ae41b0d601421dd62ced40ca050',1,'mimalloc-doc.h']]], ['mi_5foption_5fget',['mi_option_get',['../group__options.html#ga7e8af195cc81d3fa64ccf2662caa565a',1,'mimalloc-doc.h']]], + ['mi_5foption_5fis_5fenabled',['mi_option_is_enabled',['../group__options.html#ga459ad98f18b3fc9275474807fe0ca188',1,'mimalloc-doc.h']]], ['mi_5foption_5fset',['mi_option_set',['../group__options.html#gaf84921c32375e25754dc2ee6a911fa60',1,'mimalloc-doc.h']]], ['mi_5foption_5fset_5fdefault',['mi_option_set_default',['../group__options.html#ga7ef623e440e6e5545cb08c94e71e4b90',1,'mimalloc-doc.h']]], + ['mi_5foption_5fset_5fenabled',['mi_option_set_enabled',['../group__options.html#ga9a13d05fcb77489cb06d4d017ebd8bed',1,'mimalloc-doc.h']]], + ['mi_5foption_5fset_5fenabled_5fdefault',['mi_option_set_enabled_default',['../group__options.html#ga65518b69ec5d32336b50e07f74b3f629',1,'mimalloc-doc.h']]], ['mi_5fposix_5fmemalign',['mi_posix_memalign',['../group__posix.html#gacff84f226ba9feb2031b8992e5579447',1,'mimalloc-doc.h']]], + ['mi_5fprocess_5finfo',['mi_process_info',['../group__extended.html#ga7d862c2affd5790381da14eb102a364d',1,'mimalloc-doc.h']]], ['mi_5fpvalloc',['mi_pvalloc',['../group__posix.html#gaeb325c39b887d3b90d85d1eb1712fb1e',1,'mimalloc-doc.h']]], ['mi_5frealloc',['mi_realloc',['../group__malloc.html#gaf11eb497da57bdfb2de65eb191c69db6',1,'mimalloc-doc.h']]], ['mi_5frealloc_5faligned',['mi_realloc_aligned',['../group__aligned.html#ga4028d1cf4aa4c87c880747044a8322ae',1,'mimalloc-doc.h']]], @@ -93,7 +96,8 @@ var searchData= ['mi_5frezalloc_5faligned',['mi_rezalloc_aligned',['../group__zeroinit.html#gacd71a7bce96aab38ae6de17af2eb2cf0',1,'mimalloc-doc.h']]], ['mi_5frezalloc_5faligned_5fat',['mi_rezalloc_aligned_at',['../group__zeroinit.html#gae8b358c417e61d5307da002702b0a8e1',1,'mimalloc-doc.h']]], ['mi_5fstats_5fmerge',['mi_stats_merge',['../group__extended.html#ga854b1de8cb067c7316286c28b2fcd3d1',1,'mimalloc-doc.h']]], - ['mi_5fstats_5fprint',['mi_stats_print',['../group__extended.html#ga2d126e5c62d3badc35445e5d84166df2',1,'mi_stats_print(void *out): mimalloc-doc.h'],['../group__extended.html#ga256cc6f13a142deabbadd954a217e228',1,'mi_stats_print(mi_output_fun *out, void *arg): mimalloc-doc.h']]], + ['mi_5fstats_5fprint',['mi_stats_print',['../group__extended.html#ga2d126e5c62d3badc35445e5d84166df2',1,'mimalloc-doc.h']]], + ['mi_5fstats_5fprint_5fout',['mi_stats_print_out',['../group__extended.html#ga537f13b299ddf801e49a5a94fde02c79',1,'mimalloc-doc.h']]], ['mi_5fstats_5freset',['mi_stats_reset',['../group__extended.html#ga3bb8468b8cfcc6e2a61d98aee85c5f99',1,'mimalloc-doc.h']]], ['mi_5fstrdup',['mi_strdup',['../group__malloc.html#gac7cffe13f1f458ed16789488bf92b9b2',1,'mimalloc-doc.h']]], ['mi_5fstrndup',['mi_strndup',['../group__malloc.html#gaaabf971c2571891433477e2d21a35266',1,'mimalloc-doc.h']]], diff --git a/ide/vs2017/mimalloc-override.vcxproj b/ide/vs2017/mimalloc-override.vcxproj index 990d6ca9..a1266dc9 100644 --- a/ide/vs2017/mimalloc-override.vcxproj +++ b/ide/vs2017/mimalloc-override.vcxproj @@ -215,6 +215,7 @@ + @@ -232,6 +233,7 @@ + @@ -251,4 +253,4 @@ - + \ No newline at end of file diff --git a/ide/vs2017/mimalloc-override.vcxproj.filters b/ide/vs2017/mimalloc-override.vcxproj.filters index 02652658..e045ed8c 100644 --- a/ide/vs2017/mimalloc-override.vcxproj.filters +++ b/ide/vs2017/mimalloc-override.vcxproj.filters @@ -29,6 +29,9 @@ Header Files + + Header Files + @@ -76,5 +79,8 @@ Source Files + + Source Files + - + \ No newline at end of file diff --git a/ide/vs2017/mimalloc.vcxproj b/ide/vs2017/mimalloc.vcxproj index 770a87b2..8102b9fe 100644 --- a/ide/vs2017/mimalloc.vcxproj +++ b/ide/vs2017/mimalloc.vcxproj @@ -110,7 +110,7 @@ true ../../include _CRT_SECURE_NO_WARNINGS;MI_DEBUG=3;%(PreprocessorDefinitions); - CompileAsCpp + CompileAsC false stdcpp17 @@ -129,7 +129,7 @@ true ../../include _CRT_SECURE_NO_WARNINGS;MI_DEBUG=3;%(PreprocessorDefinitions); - CompileAsCpp + CompileAsC false stdcpp17 @@ -161,7 +161,7 @@ false false Default - CompileAsCpp + CompileAsC true @@ -188,7 +188,7 @@ false false Default - CompileAsCpp + CompileAsC true @@ -230,6 +230,7 @@ + @@ -253,8 +254,9 @@ + - + \ No newline at end of file diff --git a/ide/vs2017/mimalloc.vcxproj.filters b/ide/vs2017/mimalloc.vcxproj.filters index 43660519..500292c5 100644 --- a/ide/vs2017/mimalloc.vcxproj.filters +++ b/ide/vs2017/mimalloc.vcxproj.filters @@ -59,6 +59,9 @@ Source Files + + Source Files + @@ -79,5 +82,8 @@ Header Files + + Header Files + - + \ No newline at end of file diff --git a/ide/vs2019/mimalloc-override.vcxproj b/ide/vs2019/mimalloc-override.vcxproj index a0e79fb0..182ddab1 100644 --- a/ide/vs2019/mimalloc-override.vcxproj +++ b/ide/vs2019/mimalloc-override.vcxproj @@ -215,6 +215,7 @@ + @@ -232,9 +233,7 @@ - - true - + diff --git a/ide/vs2019/mimalloc-override.vcxproj.filters b/ide/vs2019/mimalloc-override.vcxproj.filters index 8e36f50e..c06fd1de 100644 --- a/ide/vs2019/mimalloc-override.vcxproj.filters +++ b/ide/vs2019/mimalloc-override.vcxproj.filters @@ -40,15 +40,15 @@ Source Files - - Source Files - Source Files Source Files + + Source Files + @@ -69,6 +69,9 @@ Header Files + + Source Files + diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index e18db0c5..6c7e276c 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -220,8 +220,8 @@ - - true + + false @@ -246,6 +246,7 @@ + diff --git a/ide/vs2019/mimalloc.vcxproj.filters b/ide/vs2019/mimalloc.vcxproj.filters index 4704fb2e..4cd0eb2e 100644 --- a/ide/vs2019/mimalloc.vcxproj.filters +++ b/ide/vs2019/mimalloc.vcxproj.filters @@ -46,10 +46,10 @@ Source Files - + Source Files - + Source Files @@ -72,6 +72,9 @@ Header Files + + Source Files + @@ -81,4 +84,4 @@ {852a14ae-6dde-4e95-8077-ca705e97e5af} - + \ No newline at end of file diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h index 8577dbc5..2d725a25 100644 --- a/include/mimalloc-atomic.h +++ b/include/mimalloc-atomic.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018, Microsoft Research, Daan Leijen +Copyright (c) 2018,2020 Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -8,120 +8,117 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_ATOMIC_H #define MIMALLOC_ATOMIC_H -// ------------------------------------------------------ +// -------------------------------------------------------------------------------------------- // Atomics // We need to be portable between C, C++, and MSVC. -// ------------------------------------------------------ +// We base the primitives on the C/C++ atomics and create a mimimal wrapper for MSVC in C compilation mode. +// This is why we try to use only `uintptr_t` and `*` as atomic types. +// To gain better insight in the range of used atomics, we use explicitly named memory order operations +// instead of passing the memory order as a parameter. +// ----------------------------------------------------------------------------------------------- -#if defined(_MSC_VER) -#define _Atomic(tp) tp -#define ATOMIC_VAR_INIT(x) x -#elif defined(__cplusplus) +#if defined(__cplusplus) +// Use C++ atomics #include -#define _Atomic(tp) std::atomic +#define _Atomic(tp) std::atomic +#define mi_atomic(name) std::atomic_##name +#define mi_memory_order(name) std::memory_order_##name +#elif defined(_MSC_VER) +// Use MSVC C wrapper for C11 atomics +#define _Atomic(tp) tp +#define ATOMIC_VAR_INIT(x) x +#define mi_atomic(name) mi_atomic_##name +#define mi_memory_order(name) mi_memory_order_##name #else +// Use C11 atomics #include +#define mi_atomic(name) atomic_##name +#define mi_memory_order(name) memory_order_##name #endif -// ------------------------------------------------------ -// Atomic operations specialized for mimalloc -// ------------------------------------------------------ +// Various defines for all used memory orders in mimalloc +#define mi_atomic_cas_weak(p,expected,desired,mem_success,mem_fail) \ + mi_atomic(compare_exchange_weak_explicit)(p,expected,desired,mem_success,mem_fail) -// Atomically add a 64-bit value; returns the previous value. -// Note: not using _Atomic(int64_t) as it is only used for statistics. -static inline void mi_atomic_addi64(volatile int64_t* p, int64_t add); +#define mi_atomic_cas_strong(p,expected,desired,mem_success,mem_fail) \ + mi_atomic(compare_exchange_strong_explicit)(p,expected,desired,mem_success,mem_fail) -// Atomically add a value; returns the previous value. Memory ordering is relaxed. -static inline uintptr_t mi_atomic_add(volatile _Atomic(uintptr_t)* p, uintptr_t add); +#define mi_atomic_load_acquire(p) mi_atomic(load_explicit)(p,mi_memory_order(acquire)) +#define mi_atomic_load_relaxed(p) mi_atomic(load_explicit)(p,mi_memory_order(relaxed)) +#define mi_atomic_store_release(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(release)) +#define mi_atomic_store_relaxed(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed)) +#define mi_atomic_exchange_release(p,x) mi_atomic(exchange_explicit)(p,x,mi_memory_order(release)) +#define mi_atomic_exchange_acq_rel(p,x) mi_atomic(exchange_explicit)(p,x,mi_memory_order(acq_rel)) +#define mi_atomic_cas_weak_release(p,exp,des) mi_atomic_cas_weak(p,exp,des,mi_memory_order(release),mi_memory_order(relaxed)) +#define mi_atomic_cas_weak_acq_rel(p,exp,des) mi_atomic_cas_weak(p,exp,des,mi_memory_order(acq_rel),mi_memory_order(acquire)) +#define mi_atomic_cas_strong_release(p,exp,des) mi_atomic_cas_strong(p,exp,des,mi_memory_order(release),mi_memory_order(relaxed)) +#define mi_atomic_cas_strong_acq_rel(p,exp,des) mi_atomic_cas_strong(p,exp,des,mi_memory_order(acq_rel),mi_memory_order(acquire)) -// Atomically "and" a value; returns the previous value. Memory ordering is relaxed. -static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x); +#define mi_atomic_add_relaxed(p,x) mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(relaxed)) +#define mi_atomic_sub_relaxed(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(relaxed)) +#define mi_atomic_add_acq_rel(p,x) mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(acq_rel)) +#define mi_atomic_sub_acq_rel(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(acq_rel)) +#define mi_atomic_and_acq_rel(p,x) mi_atomic(fetch_and_explicit)(p,x,mi_memory_order(acq_rel)) +#define mi_atomic_or_acq_rel(p,x) mi_atomic(fetch_or_explicit)(p,x,mi_memory_order(acq_rel)) -// Atomically "or" a value; returns the previous value. Memory ordering is relaxed. -static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x); +#define mi_atomic_increment_relaxed(p) mi_atomic_add_relaxed(p,(uintptr_t)1) +#define mi_atomic_decrement_relaxed(p) mi_atomic_sub_relaxed(p,(uintptr_t)1) +#define mi_atomic_increment_acq_rel(p) mi_atomic_add_acq_rel(p,(uintptr_t)1) +#define mi_atomic_decrement_acq_rel(p) mi_atomic_sub_acq_rel(p,(uintptr_t)1) -// Atomically compare and exchange a value; returns `true` if successful. -// May fail spuriously. Memory ordering as release on success, and relaxed on failure. -// (Note: expected and desired are in opposite order from atomic_compare_exchange) -static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected); - -// Atomically compare and exchange a value; returns `true` if successful. -// Memory ordering is acquire-release -// (Note: expected and desired are in opposite order from atomic_compare_exchange) -static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected); - -// Atomically exchange a value. Memory ordering is acquire-release. -static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange); - -// Atomically read a value. Memory ordering is relaxed. -static inline uintptr_t mi_atomic_read_relaxed(const volatile _Atomic(uintptr_t)* p); - -// Atomically read a value. Memory ordering is acquire. -static inline uintptr_t mi_atomic_read(const volatile _Atomic(uintptr_t)* p); - -// Atomically write a value. Memory ordering is release. -static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x); - -// Yield static inline void mi_atomic_yield(void); +static inline intptr_t mi_atomic_addi(_Atomic(intptr_t)*p, intptr_t add); +static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub); -// Atomically subtract a value; returns the previous value. -static inline uintptr_t mi_atomic_sub(volatile _Atomic(uintptr_t)* p, uintptr_t sub) { - return mi_atomic_add(p, (uintptr_t)(-((intptr_t)sub))); +#if defined(__cplusplus) || !defined(_MSC_VER) + +// In C++/C11 atomics we have polymorphic atomics so can use the typed `ptr` variants (where `tp` is the type of atomic value) +// We use these macros so we can provide a typed wrapper in MSVC in C compilation mode as well +#define mi_atomic_load_ptr_acquire(tp,p) mi_atomic_load_acquire(p) +#define mi_atomic_load_ptr_relaxed(tp,p) mi_atomic_load_relaxed(p) + +// In C++ we need to add casts to help resolve templates if NULL is passed +#if defined(__cplusplus) +#define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release(p,(tp*)x) +#define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed(p,(tp*)x) +#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,(tp*)des) +#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,(tp*)des) +#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,(tp*)des) +#define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,(tp*)x) +#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,(tp*)x) +#else +#define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release(p,x) +#define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed(p,x) +#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,des) +#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,des) +#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,des) +#define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,x) +#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,x) +#endif + +// These are used by the statistics +static inline int64_t mi_atomic_addi64_relaxed(volatile int64_t* p, int64_t add) { + return mi_atomic(fetch_add_explicit)((_Atomic(int64_t)*)p, add, mi_memory_order(relaxed)); +} +static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x) { + int64_t current = mi_atomic_load_relaxed((_Atomic(int64_t)*)p); + while (current < x && !mi_atomic_cas_weak_release((_Atomic(int64_t)*)p, ¤t, x)) { /* nothing */ }; } -// Atomically increment a value; returns the incremented result. -static inline uintptr_t mi_atomic_increment(volatile _Atomic(uintptr_t)* p) { - return mi_atomic_add(p, 1); -} - -// Atomically decrement a value; returns the decremented result. -static inline uintptr_t mi_atomic_decrement(volatile _Atomic(uintptr_t)* p) { - return mi_atomic_sub(p, 1); -} - -// Atomically add a signed value; returns the previous value. -static inline intptr_t mi_atomic_addi(volatile _Atomic(intptr_t)* p, intptr_t add) { - return (intptr_t)mi_atomic_add((volatile _Atomic(uintptr_t)*)p, (uintptr_t)add); -} - -// Atomically subtract a signed value; returns the previous value. -static inline intptr_t mi_atomic_subi(volatile _Atomic(intptr_t)* p, intptr_t sub) { - return (intptr_t)mi_atomic_addi(p,-sub); -} - -// Atomically read a pointer; Memory order is relaxed (i.e. no fence, only atomic). -#define mi_atomic_read_ptr_relaxed(T,p) \ - (T*)(mi_atomic_read_relaxed((const volatile _Atomic(uintptr_t)*)(p))) - -// Atomically read a pointer; Memory order is acquire. -#define mi_atomic_read_ptr(T,p) \ - (T*)(mi_atomic_read((const volatile _Atomic(uintptr_t)*)(p))) - -// Atomically write a pointer; Memory order is acquire. -#define mi_atomic_write_ptr(T,p,x) \ - mi_atomic_write((volatile _Atomic(uintptr_t)*)(p), (uintptr_t)((T*)x)) - -// Atomically compare and exchange a pointer; returns `true` if successful. May fail spuriously. -// Memory order is release. (like a write) -// (Note: expected and desired are in opposite order from atomic_compare_exchange) -#define mi_atomic_cas_ptr_weak(T,p,desired,expected) \ - mi_atomic_cas_weak((volatile _Atomic(uintptr_t)*)(p), (uintptr_t)((T*)(desired)), (uintptr_t)((T*)(expected))) - -// Atomically compare and exchange a pointer; returns `true` if successful. Memory order is acquire_release. -// (Note: expected and desired are in opposite order from atomic_compare_exchange) -#define mi_atomic_cas_ptr_strong(T,p,desired,expected) \ - mi_atomic_cas_strong((volatile _Atomic(uintptr_t)*)(p),(uintptr_t)((T*)(desired)), (uintptr_t)((T*)(expected))) - -// Atomically exchange a pointer value. -#define mi_atomic_exchange_ptr(T,p,exchange) \ - (T*)mi_atomic_exchange((volatile _Atomic(uintptr_t)*)(p), (uintptr_t)((T*)exchange)) +// Used by timers +#define mi_atomic_loadi64_acquire(p) mi_atomic(load_explicit)(p,mi_memory_order(acquire)) +#define mi_atomic_loadi64_relaxed(p) mi_atomic(load_explicit)(p,mi_memory_order(relaxed)) +#define mi_atomic_storei64_release(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(release)) +#define mi_atomic_storei64_relaxed(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed)) -#ifdef _MSC_VER + +#elif defined(_MSC_VER) + +// MSVC C compilation wrapper that uses Interlocked operations to model C11 atomics. #define WIN32_LEAN_AND_MEAN -#include +#include #include #ifdef _WIN64 typedef LONG64 msc_intptr_t; @@ -130,128 +127,206 @@ typedef LONG64 msc_intptr_t; typedef LONG msc_intptr_t; #define MI_64(f) f #endif -static inline uintptr_t mi_atomic_add(volatile _Atomic(uintptr_t)* p, uintptr_t add) { + +typedef enum mi_memory_order_e { + mi_memory_order_relaxed, + mi_memory_order_consume, + mi_memory_order_acquire, + mi_memory_order_release, + mi_memory_order_acq_rel, + mi_memory_order_seq_cst +} mi_memory_order; + +static inline uintptr_t mi_atomic_fetch_add_explicit(_Atomic(uintptr_t)*p, uintptr_t add, mi_memory_order mo) { + (void)(mo); return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add); } -static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x) { +static inline uintptr_t mi_atomic_fetch_sub_explicit(_Atomic(uintptr_t)*p, uintptr_t sub, mi_memory_order mo) { + (void)(mo); + return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, -((msc_intptr_t)sub)); +} +static inline uintptr_t mi_atomic_fetch_and_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) { + (void)(mo); return (uintptr_t)MI_64(_InterlockedAnd)((volatile msc_intptr_t*)p, (msc_intptr_t)x); } -static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x) { +static inline uintptr_t mi_atomic_fetch_or_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) { + (void)(mo); return (uintptr_t)MI_64(_InterlockedOr)((volatile msc_intptr_t*)p, (msc_intptr_t)x); } -static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) { - return (expected == (uintptr_t)MI_64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)expected)); +static inline bool mi_atomic_compare_exchange_strong_explicit(_Atomic(uintptr_t)*p, uintptr_t* expected, uintptr_t desired, mi_memory_order mo1, mi_memory_order mo2) { + (void)(mo1); (void)(mo2); + uintptr_t read = (uintptr_t)MI_64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)(*expected)); + if (read == *expected) { + return true; + } + else { + *expected = read; + return false; + } } -static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) { - return mi_atomic_cas_strong(p,desired,expected); +static inline bool mi_atomic_compare_exchange_weak_explicit(_Atomic(uintptr_t)*p, uintptr_t* expected, uintptr_t desired, mi_memory_order mo1, mi_memory_order mo2) { + return mi_atomic_compare_exchange_strong_explicit(p, expected, desired, mo1, mo2); } -static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange) { +static inline uintptr_t mi_atomic_exchange_explicit(_Atomic(uintptr_t)*p, uintptr_t exchange, mi_memory_order mo) { + (void)(mo); return (uintptr_t)MI_64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange); } -static inline uintptr_t mi_atomic_read(volatile _Atomic(uintptr_t) const* p) { - return *p; +static inline void mi_atomic_thread_fence(mi_memory_order mo) { + (void)(mo); + _Atomic(uintptr_t)x = 0; + mi_atomic_exchange_explicit(&x, 1, mo); } -static inline uintptr_t mi_atomic_read_relaxed(volatile _Atomic(uintptr_t) const* p) { +static inline uintptr_t mi_atomic_load_explicit(_Atomic(uintptr_t) const* p, mi_memory_order mo) { + (void)(mo); +#if defined(_M_IX86) || defined(_M_X64) return *p; +#else + uintptr_t x = *p; + if (mo > mi_memory_order_relaxed) { + while (!mi_atomic_compare_exchange_weak_explicit(p, &x, x, mo, mi_memory_order_relaxed)) { /* nothing */ }; + } + return x; +#endif } -static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x) { - #if defined(_M_IX86) || defined(_M_X64) +static inline void mi_atomic_store_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) { + (void)(mo); +#if defined(_M_IX86) || defined(_M_X64) *p = x; - #else - mi_atomic_exchange(p,x); - #endif +#else + mi_atomic_exchange_explicit(p, x, mo); +#endif } -static inline void mi_atomic_yield(void) { - YieldProcessor(); +static inline int64_t mi_atomic_loadi64_explicit(_Atomic(int64_t)*p, mi_memory_order mo) { + (void)(mo); +#if defined(_M_X64) + return *p; +#else + int64_t old = *p; + int64_t x = old; + while ((old = InterlockedCompareExchange64(p, x, old)) != x) { + x = old; + } + return x; +#endif } -static inline void mi_atomic_addi64(volatile _Atomic(int64_t)* p, int64_t add) { - #ifdef _WIN64 - mi_atomic_addi(p,add); - #else +static inline void mi_atomic_storei64_explicit(_Atomic(int64_t)*p, int64_t x, mi_memory_order mo) { + (void)(mo); +#if defined(x_M_IX86) || defined(_M_X64) + *p = x; +#else + InterlockedExchange64(p, x); +#endif +} + +// These are used by the statistics +static inline int64_t mi_atomic_addi64_relaxed(volatile _Atomic(int64_t)*p, int64_t add) { +#ifdef _WIN64 + return (int64_t)mi_atomic_addi((int64_t*)p, add); +#else int64_t current; int64_t sum; do { current = *p; sum = current + add; } while (_InterlockedCompareExchange64(p, sum, current) != current); - #endif -} - -#else -#ifdef __cplusplus -#define MI_USING_STD using namespace std; -#else -#define MI_USING_STD + return current; #endif -static inline void mi_atomic_addi64(volatile int64_t* p, int64_t add) { - MI_USING_STD - atomic_fetch_add_explicit((volatile _Atomic(int64_t)*)p, add, memory_order_relaxed); } -static inline uintptr_t mi_atomic_add(volatile _Atomic(uintptr_t)* p, uintptr_t add) { - MI_USING_STD - return atomic_fetch_add_explicit(p, add, memory_order_relaxed); -} -static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x) { - MI_USING_STD - return atomic_fetch_and_explicit(p, x, memory_order_relaxed); -} -static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x) { - MI_USING_STD - return atomic_fetch_or_explicit(p, x, memory_order_relaxed); -} -static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) { - MI_USING_STD - return atomic_compare_exchange_weak_explicit(p, &expected, desired, memory_order_release, memory_order_relaxed); -} -static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) { - MI_USING_STD - return atomic_compare_exchange_strong_explicit(p, &expected, desired, memory_order_acq_rel, memory_order_relaxed); -} -static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange) { - MI_USING_STD - return atomic_exchange_explicit(p, exchange, memory_order_acq_rel); -} -static inline uintptr_t mi_atomic_read_relaxed(const volatile _Atomic(uintptr_t)* p) { - MI_USING_STD - return atomic_load_explicit((volatile _Atomic(uintptr_t)*) p, memory_order_relaxed); -} -static inline uintptr_t mi_atomic_read(const volatile _Atomic(uintptr_t)* p) { - MI_USING_STD - return atomic_load_explicit((volatile _Atomic(uintptr_t)*) p, memory_order_acquire); -} -static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x) { - MI_USING_STD - return atomic_store_explicit(p, x, memory_order_release); +static inline void mi_atomic_maxi64_relaxed(volatile _Atomic(int64_t)*p, int64_t x) { + int64_t current; + do { + current = *p; + } while (current < x && _InterlockedCompareExchange64(p, x, current) != current); } +// The pointer macros cast to `uintptr_t`. +#define mi_atomic_load_ptr_acquire(tp,p) (tp*)mi_atomic_load_acquire((_Atomic(uintptr_t)*)(p)) +#define mi_atomic_load_ptr_relaxed(tp,p) (tp*)mi_atomic_load_relaxed((_Atomic(uintptr_t)*)(p)) +#define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release((_Atomic(uintptr_t)*)(p),(uintptr_t)(x)) +#define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed((_Atomic(uintptr_t)*)(p),(uintptr_t)(x)) +#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) +#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) +#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) +#define mi_atomic_exchange_ptr_release(tp,p,x) (tp*)mi_atomic_exchange_release((_Atomic(uintptr_t)*)(p),(uintptr_t)x) +#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) (tp*)mi_atomic_exchange_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t)x) + +#define mi_atomic_loadi64_acquire(p) mi_atomic(loadi64_explicit)(p,mi_memory_order(acquire)) +#define mi_atomic_loadi64_relaxed(p) mi_atomic(loadi64_explicit)(p,mi_memory_order(relaxed)) +#define mi_atomic_storei64_release(p,x) mi_atomic(storei64_explicit)(p,x,mi_memory_order(release)) +#define mi_atomic_storei64_relaxed(p,x) mi_atomic(storei64_explicit)(p,x,mi_memory_order(relaxed)) + + +#endif + + +// Atomically add a signed value; returns the previous value. +static inline intptr_t mi_atomic_addi(_Atomic(intptr_t)*p, intptr_t add) { + return (intptr_t)mi_atomic_add_acq_rel((_Atomic(uintptr_t)*)p, (uintptr_t)add); +} + +// Atomically subtract a signed value; returns the previous value. +static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub) { + return (intptr_t)mi_atomic_addi(p, -sub); +} + +// Yield #if defined(__cplusplus) - #include - static inline void mi_atomic_yield(void) { - std::this_thread::yield(); - } +#include +static inline void mi_atomic_yield(void) { + std::this_thread::yield(); +} +#elif defined(_WIN32) +#define WIN32_LEAN_AND_MEAN +#include +static inline void mi_atomic_yield(void) { + YieldProcessor(); +} +#elif defined(__SSE2__) +#include +static inline void mi_atomic_yield(void) { + _mm_pause(); +} #elif (defined(__GNUC__) || defined(__clang__)) && \ - (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__)) + (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__armel__) || defined(__ARMEL__) || \ + defined(__aarch64__) || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)) #if defined(__x86_64__) || defined(__i386__) - static inline void mi_atomic_yield(void) { - asm volatile ("pause" ::: "memory"); - } -#elif defined(__arm__) || defined(__aarch64__) - static inline void mi_atomic_yield(void) { - asm volatile("yield"); - } +static inline void mi_atomic_yield(void) { + __asm__ volatile ("pause" ::: "memory"); +} +#elif defined(__aarch64__) +static inline void mi_atomic_yield(void) { + asm volatile("wfe"); +} +#elif (defined(__arm__) && __ARM_ARCH__ >= 7) +static inline void mi_atomic_yield(void) { + __asm__ volatile("yield" ::: "memory"); +} +#elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) +static inline void mi_atomic_yield(void) { + __asm__ __volatile__ ("or 27,27,27" ::: "memory"); +} +#elif defined(__armel__) || defined(__ARMEL__) +static inline void mi_atomic_yield(void) { + asm volatile ("nop" ::: "memory"); +} #endif +#elif defined(__sun) +// Fallback for other archs +#include +static inline void mi_atomic_yield(void) { + smt_pause(); +} #elif defined(__wasi__) - #include - static inline void mi_atomic_yield(void) { - sched_yield(); - } +#include +static inline void mi_atomic_yield(void) { + sched_yield(); +} #else - #include - static inline void mi_atomic_yield(void) { - sleep(0); - } +#include +static inline void mi_atomic_yield(void) { + sleep(0); +} #endif -#endif #endif // __MIMALLOC_ATOMIC_H diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index d0c0b3f3..6a239f1a 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -32,7 +32,6 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_decl_cache_align #endif - // "options.c" void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message); void _mi_fprintf(mi_output_fun* out, void* arg, const char* fmt, ...); @@ -64,7 +63,7 @@ void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free th size_t _mi_os_good_alloc_size(size_t size); // memory.c -void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* id, mi_os_tld_t* tld); +void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* id, mi_os_tld_t* tld); void _mi_mem_free(void* p, size_t size, size_t id, bool fully_committed, bool any_reset, mi_os_tld_t* tld); bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld); @@ -107,7 +106,6 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page); // callback fro size_t _mi_bin_size(uint8_t bin); // for stats uint8_t _mi_bin(size_t size); // for stats -uint8_t _mi_bsr(uintptr_t x); // bit-scan-right, used on BSD in "os.c" // "heap.c" void _mi_heap_destroy_pages(mi_heap_t* heap); @@ -238,23 +236,28 @@ static inline bool mi_malloc_satisfies_alignment(size_t alignment, size_t size) } // Overflow detecting multiply -static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { #if __has_builtin(__builtin_umul_overflow) || __GNUC__ >= 5 -#include // UINT_MAX, ULONG_MAX -#if (SIZE_MAX == UINT_MAX) - return __builtin_umul_overflow(count, size, total); -#elif (SIZE_MAX == ULONG_MAX) - return __builtin_umull_overflow(count, size, total); -#else - return __builtin_umulll_overflow(count, size, total); +#include // UINT_MAX, ULONG_MAX +#if defined(_CLOCK_T) // for Illumos +#undef _CLOCK_T #endif +static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { + #if (SIZE_MAX == UINT_MAX) + return __builtin_umul_overflow(count, size, total); + #elif (SIZE_MAX == ULONG_MAX) + return __builtin_umull_overflow(count, size, total); + #else + return __builtin_umulll_overflow(count, size, total); + #endif +} #else /* __builtin_umul_overflow is unavailable */ +static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { #define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX) *total = count * size; return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW) - && size > 0 && (SIZE_MAX / size) < count); -#endif + && size > 0 && (SIZE_MAX / size) < count); } +#endif // Safe multiply `count*size` into `total`; return `true` on overflow. static inline bool mi_count_size_overflow(size_t count, size_t size, size_t* total) { @@ -263,7 +266,7 @@ static inline bool mi_count_size_overflow(size_t count, size_t size, size_t* tot return false; } else if (mi_unlikely(mi_mul_overflow(count, size, total))) { - _mi_error_message(EOVERFLOW, "allocation request too large (%zu * %zu bytes)\n", count, size); + _mi_error_message(EOVERFLOW, "allocation request is too large (%zu * %zu bytes)\n", count, size); *total = SIZE_MAX; return true; } @@ -282,7 +285,7 @@ We try to circumvent this in an efficient way: - macOSX : we use an unused TLS slot from the OS allocated slots (MI_TLS_SLOT). On OSX, the loader itself calls `malloc` even before the modules are initialized. - OpenBSD: we use an unused slot from the pthread block (MI_TLS_PTHREAD_SLOT_OFS). -- DragonFly: not yet working. +- DragonFly: the uniqueid use is buggy but kept for reference. ------------------------------------------------------------------------------------------- */ extern const mi_heap_t _mi_heap_empty; // read-only empty heap, initial value of the thread local default heap @@ -300,7 +303,7 @@ mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing hea #define MI_TLS_PTHREAD_SLOT_OFS (6*sizeof(int) + 4*sizeof(void*) + 24) #elif defined(__DragonFly__) #warning "mimalloc is not working correctly on DragonFly yet." -#define MI_TLS_PTHREAD_SLOT_OFS (4 + 1*sizeof(void*)) // offset `uniqueid` (also used by gdb?) +//#define MI_TLS_PTHREAD_SLOT_OFS (4 + 1*sizeof(void*)) // offset `uniqueid` (also used by gdb?) #endif #endif @@ -312,7 +315,7 @@ static inline mi_heap_t** mi_tls_pthread_heap_slot(void) { pthread_t self = pthread_self(); #if defined(__DragonFly__) if (self==NULL) { - static mi_heap_t* pheap_main = _mi_heap_main_get(); + mi_heap_t* pheap_main = _mi_heap_main_get(); return &pheap_main; } #endif @@ -443,21 +446,21 @@ static inline size_t mi_page_usable_block_size(const mi_page_t* page) { // Thread free access static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) { - return (mi_block_t*)(mi_atomic_read_relaxed(&page->xthread_free) & ~3); + return (mi_block_t*)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free) & ~3); } static inline mi_delayed_t mi_page_thread_free_flag(const mi_page_t* page) { - return (mi_delayed_t)(mi_atomic_read_relaxed(&page->xthread_free) & 3); + return (mi_delayed_t)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free) & 3); } // Heap access static inline mi_heap_t* mi_page_heap(const mi_page_t* page) { - return (mi_heap_t*)(mi_atomic_read_relaxed(&page->xheap)); + return (mi_heap_t*)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xheap)); } static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) { mi_assert_internal(mi_page_thread_free_flag(page) != MI_DELAYED_FREEING); - mi_atomic_write(&page->xheap,(uintptr_t)heap); + mi_atomic_store_release(&page->xheap,(uintptr_t)heap); } // Thread free flag helpers @@ -569,11 +572,11 @@ static inline bool mi_is_in_same_page(const void* p, const void* q) { static inline uintptr_t mi_rotl(uintptr_t x, uintptr_t shift) { shift %= MI_INTPTR_BITS; - return ((x << shift) | (x >> (MI_INTPTR_BITS - shift))); + return (shift==0 ? x : ((x << shift) | (x >> (MI_INTPTR_BITS - shift)))); } static inline uintptr_t mi_rotr(uintptr_t x, uintptr_t shift) { shift %= MI_INTPTR_BITS; - return ((x >> shift) | (x << (MI_INTPTR_BITS - shift))); + return (shift==0 ? x : ((x >> shift) | (x << (MI_INTPTR_BITS - shift)))); } static inline void* mi_ptr_decode(const void* null, const mi_encoded_t x, const uintptr_t* keys) { @@ -694,15 +697,21 @@ static inline void* mi_tls_slot(size_t slot) mi_attr_noexcept { __asm__("movl %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // 32-bit always uses GS #elif defined(__MACH__) && defined(__x86_64__) __asm__("movq %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 macOSX uses GS +#elif defined(__x86_64__) && (MI_INTPTR_SIZE==4) + __asm__("movl %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x32 ABI #elif defined(__x86_64__) __asm__("movq %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 Linux, BSD uses FS #elif defined(__arm__) void** tcb; UNUSED(ofs); - asm volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb)); + __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb)); res = tcb[slot]; #elif defined(__aarch64__) void** tcb; UNUSED(ofs); - asm volatile ("mrs %0, tpidr_el0" : "=r" (tcb)); +#if defined(__APPLE__) // issue #343 + __asm__ volatile ("mrs %0, tpidrro_el0" : "=r" (tcb)); +#else + __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb)); +#endif res = tcb[slot]; #endif return res; @@ -715,15 +724,21 @@ static inline void mi_tls_slot_set(size_t slot, void* value) mi_attr_noexcept { __asm__("movl %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // 32-bit always uses GS #elif defined(__MACH__) && defined(__x86_64__) __asm__("movq %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 macOSX uses GS +#elif defined(__x86_64__) && (MI_INTPTR_SIZE==4) + __asm__("movl %1,%%fs:%1" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x32 ABI #elif defined(__x86_64__) __asm__("movq %1,%%fs:%1" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 Linux, BSD uses FS #elif defined(__arm__) void** tcb; UNUSED(ofs); - asm volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb)); + __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb)); tcb[slot] = value; #elif defined(__aarch64__) void** tcb; UNUSED(ofs); - asm volatile ("mrs %0, tpidr_el0" : "=r" (tcb)); +#if defined(__APPLE__) // issue #343 + __asm__ volatile ("mrs %0, tpidrro_el0" : "=r" (tcb)); +#else + __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb)); +#endif tcb[slot] = value; #endif } @@ -739,5 +754,108 @@ static inline uintptr_t _mi_thread_id(void) mi_attr_noexcept { } #endif +// ----------------------------------------------------------------------- +// Count bits: trailing or leading zeros (with MI_INTPTR_BITS on all zero) +// ----------------------------------------------------------------------- + +#if defined(__GNUC__) + +#include // LONG_MAX +#define MI_HAVE_FAST_BITSCAN +static inline size_t mi_clz(uintptr_t x) { + if (x==0) return MI_INTPTR_BITS; +#if (INTPTR_MAX == LONG_MAX) + return __builtin_clzl(x); +#else + return __builtin_clzll(x); +#endif +} +static inline size_t mi_ctz(uintptr_t x) { + if (x==0) return MI_INTPTR_BITS; +#if (INTPTR_MAX == LONG_MAX) + return __builtin_ctzl(x); +#else + return __builtin_ctzll(x); +#endif +} + +#elif defined(_MSC_VER) + +#include // LONG_MAX +#define MI_HAVE_FAST_BITSCAN +static inline size_t mi_clz(uintptr_t x) { + if (x==0) return MI_INTPTR_BITS; + unsigned long idx; +#if (INTPTR_MAX == LONG_MAX) + _BitScanReverse(&idx, x); +#else + _BitScanReverse64(&idx, x); +#endif + return ((MI_INTPTR_BITS - 1) - idx); +} +static inline size_t mi_ctz(uintptr_t x) { + if (x==0) return MI_INTPTR_BITS; + unsigned long idx; +#if (INTPTR_MAX == LONG_MAX) + _BitScanForward(&idx, x); +#else + _BitScanForward64(&idx, x); +#endif + return idx; +} + +#else +static inline size_t mi_ctz32(uint32_t x) { + // de Bruijn multiplication, see + static const unsigned char debruijn[32] = { + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 + }; + if (x==0) return 32; + return debruijn[((x & -(int32_t)x) * 0x077CB531UL) >> 27]; +} +static inline size_t mi_clz32(uint32_t x) { + // de Bruijn multiplication, see + static const uint8_t debruijn[32] = { + 31, 22, 30, 21, 18, 10, 29, 2, 20, 17, 15, 13, 9, 6, 28, 1, + 23, 19, 11, 3, 16, 14, 7, 24, 12, 4, 8, 25, 5, 26, 27, 0 + }; + if (x==0) return 32; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + return debruijn[(uint32_t)(x * 0x07C4ACDDUL) >> 27]; +} + +static inline size_t mi_clz(uintptr_t x) { + if (x==0) return MI_INTPTR_BITS; +#if (MI_INTPTR_BITS <= 32) + return mi_clz32((uint32_t)x); +#else + size_t count = mi_clz32((uint32_t)(x >> 32)); + if (count < 32) return count; + return (32 + mi_clz32((uint32_t)x)); +#endif +} +static inline size_t mi_ctz(uintptr_t x) { + if (x==0) return MI_INTPTR_BITS; +#if (MI_INTPTR_BITS <= 32) + return mi_ctz32((uint32_t)x); +#else + size_t count = mi_ctz32((uint32_t)x); + if (count < 32) return count; + return (32 + mi_ctz32((uint32_t)(x>>32))); +#endif +} + +#endif + +// "bit scan reverse": Return index of the highest bit (or MI_INTPTR_BITS if `x` is zero) +static inline size_t mi_bsr(uintptr_t x) { + return (x==0 ? MI_INTPTR_BITS : MI_INTPTR_BITS - 1 - mi_clz(x)); +} + #endif diff --git a/include/mimalloc-override.h b/include/mimalloc-override.h index 201fb8b4..2362bfbc 100644 --- a/include/mimalloc-override.h +++ b/include/mimalloc-override.h @@ -24,7 +24,7 @@ not accidentally mix pointers from different allocators). #define free(p) mi_free(p) #define strdup(s) mi_strdup(s) -#define strndup(s) mi_strndup(s) +#define strndup(s,n) mi_strndup(s,n) #define realpath(f,n) mi_realpath(f,n) // Microsoft extensions @@ -33,7 +33,7 @@ not accidentally mix pointers from different allocators). #define _recalloc(p,n,c) mi_recalloc(p,n,c) #define _strdup(s) mi_strdup(s) -#define _strndup(s) mi_strndup(s) +#define _strndup(s,n) mi_strndup(s,n) #define _wcsdup(s) (wchar_t*)mi_wcsdup((const unsigned short*)(s)) #define _mbsdup(s) mi_mbsdup(s) #define _dupenv_s(b,n,v) mi_dupenv_s(b,n,v) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 449e2e41..99024679 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -12,9 +12,15 @@ terms of the MIT license. A copy of the license can be found in the file #include // uintptr_t, uint16_t, etc #include // _Atomic +#ifdef _MSC_VER +#pragma warning(disable:4214) // bitfield is not int +#endif + // Minimal alignment necessary. On most platforms 16 bytes are needed // due to SSE registers for example. This must be at least `MI_INTPTR_SIZE` +#ifndef MI_MAX_ALIGN_SIZE #define MI_MAX_ALIGN_SIZE 16 // sizeof(max_align_t) +#endif // ------------------------------------------------------ // Variants @@ -155,6 +161,7 @@ typedef enum mi_delayed_e { // The `in_full` and `has_aligned` page flags are put in a union to efficiently // test if both are false (`full_aligned == 0`) in the `mi_free` routine. +#if !MI_TSAN typedef union mi_page_flags_s { uint8_t full_aligned; struct { @@ -162,6 +169,16 @@ typedef union mi_page_flags_s { uint8_t has_aligned : 1; } x; } mi_page_flags_t; +#else +// under thread sanitizer, use a byte for each flag to suppress warning, issue #130 +typedef union mi_page_flags_s { + uint16_t full_aligned; + struct { + uint8_t in_full; + uint8_t has_aligned; + } x; +} mi_page_flags_t; +#endif // Thread free list. // We use the bottom 2 bits of the pointer for mi_delayed_t flags @@ -222,8 +239,8 @@ typedef struct mi_page_s { uint32_t xblock_size; // size available in each block (always `>0`) mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) - volatile _Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads - volatile _Atomic(uintptr_t) xheap; + _Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads + _Atomic(uintptr_t) xheap; struct mi_page_s* next; // next page owned by this thread with the same `block_size` struct mi_page_s* prev; // previous page owned by this thread with the same `block_size` @@ -243,28 +260,29 @@ typedef enum mi_page_kind_e { // contain blocks. typedef struct mi_segment_s { // memory fields - size_t memid; // id for the os-level memory manager - bool mem_is_fixed; // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages) - bool mem_is_committed; // `true` if the whole segment is eagerly committed + size_t memid; // id for the os-level memory manager + bool mem_is_pinned; // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages) + bool mem_is_committed; // `true` if the whole segment is eagerly committed // segment fields - struct mi_segment_s* next; // must be the first segment field -- see `segment.c:segment_alloc` + _Atomic(struct mi_segment_s*) abandoned_next; + struct mi_segment_s* next; // must be the first segment field after abandoned_next -- see `segment.c:segment_init` struct mi_segment_s* prev; - struct mi_segment_s* abandoned_next; - size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) - size_t abandoned_visits; // count how often this segment is visited in the abandoned list (to force reclaim it it is too long) - size_t used; // count of pages in use (`used <= capacity`) - size_t capacity; // count of available pages (`#free + used`) - size_t segment_size;// for huge pages this may be different from `MI_SEGMENT_SIZE` - size_t segment_info_size; // space we are using from the first page for segment meta-data and possible guard pages. - uintptr_t cookie; // verify addresses in secure mode: `_mi_ptr_cookie(segment) == segment->cookie` + size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) + size_t abandoned_visits; // count how often this segment is visited in the abandoned list (to force reclaim it it is too long) + + size_t used; // count of pages in use (`used <= capacity`) + size_t capacity; // count of available pages (`#free + used`) + size_t segment_size; // for huge pages this may be different from `MI_SEGMENT_SIZE` + size_t segment_info_size;// space we are using from the first page for segment meta-data and possible guard pages. + uintptr_t cookie; // verify addresses in secure mode: `_mi_ptr_cookie(segment) == segment->cookie` // layout like this to optimize access in `mi_free` - size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`). - volatile _Atomic(uintptr_t) thread_id; // unique id of the thread owning this segment - mi_page_kind_t page_kind; // kind of pages: small, large, or huge - mi_page_t pages[1]; // up to `MI_SMALL_PAGES_PER_SEGMENT` pages + size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`). + _Atomic(uintptr_t) thread_id; // unique id of the thread owning this segment + mi_page_kind_t page_kind; // kind of pages: small, large, or huge + mi_page_t pages[1]; // up to `MI_SMALL_PAGES_PER_SEGMENT` pages } mi_segment_t; @@ -322,7 +340,7 @@ struct mi_heap_s { mi_tld_t* tld; mi_page_t* pages_free_direct[MI_PAGES_DIRECT]; // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size. mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin") - volatile _Atomic(mi_block_t*) thread_delayed_free; + _Atomic(mi_block_t*) thread_delayed_free; uintptr_t thread_id; // thread this heap belongs too uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`) uintptr_t keys[2]; // two random keys used to encode the `thread_delayed_free` list @@ -398,6 +416,7 @@ typedef struct mi_stats_s { mi_stat_count_t segments_abandoned; mi_stat_count_t pages_abandoned; mi_stat_count_t threads; + mi_stat_count_t normal; mi_stat_count_t huge; mi_stat_count_t giant; mi_stat_count_t malloc; @@ -407,10 +426,11 @@ typedef struct mi_stats_s { mi_stat_counter_t commit_calls; mi_stat_counter_t page_no_retire; mi_stat_counter_t searches; + mi_stat_counter_t normal_count; mi_stat_counter_t huge_count; mi_stat_counter_t giant_count; #if MI_STAT>1 - mi_stat_count_t normal[MI_BIN_HUGE+1]; + mi_stat_count_t normal_bins[MI_BIN_HUGE+1]; #endif } mi_stats_t; @@ -429,6 +449,7 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount); #define mi_stat_counter_increase(stat,amount) (void)0 #endif +#define mi_heap_stat_counter_increase(heap,stat,amount) mi_stat_counter_increase( (heap)->tld->stats.stat, amount) #define mi_heap_stat_increase(heap,stat,amount) mi_stat_increase( (heap)->tld->stats.stat, amount) #define mi_heap_stat_decrease(heap,stat,amount) mi_stat_decrease( (heap)->tld->stats.stat, amount) diff --git a/include/mimalloc.h b/include/mimalloc.h index 0af04a94..03535f0c 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018, Microsoft Research, Daan Leijen +Copyright (c) 2018-2020, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_H #define MIMALLOC_H -#define MI_MALLOC_VERSION 163 // major + 2 digits minor +#define MI_MALLOC_VERSION 167 // major + 2 digits minor // ------------------------------------------------------ // Compiler specific attributes @@ -24,7 +24,7 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_attr_noexcept #endif -#if (__cplusplus >= 201703) +#if defined(__cplusplus) && (__cplusplus >= 201703) #define mi_decl_nodiscard [[nodiscard]] #elif (__GNUC__ >= 4) || defined(__clang__) // includes clang, icc, and clang-cl #define mi_decl_nodiscard __attribute__((warn_unused_result)) @@ -153,6 +153,9 @@ mi_decl_export void mi_thread_init(void) mi_attr_noexcept; mi_decl_export void mi_thread_done(void) mi_attr_noexcept; mi_decl_export void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept; +mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, + size_t* current_rss, size_t* peak_rss, + size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept; // ------------------------------------------------------------------------------------- // Aligned allocation @@ -192,7 +195,7 @@ mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_mallocn(mi_heap_ mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); mi_decl_nodiscard mi_decl_export void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(3); -mi_decl_nodiscard mi_decl_export void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_alloc_size2(3,4);; +mi_decl_nodiscard mi_decl_export void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_alloc_size2(3,4); mi_decl_nodiscard mi_decl_export void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(3); mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_noexcept mi_attr_malloc; @@ -256,11 +259,15 @@ mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_b // Experimental mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept; -mi_decl_nodiscard mi_decl_export bool mi_is_redirected() mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export bool mi_is_redirected(void) mi_attr_noexcept; mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept; mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept; +mi_decl_export int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept; +mi_decl_export bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept; + + // deprecated mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept; @@ -299,6 +306,7 @@ typedef enum mi_option_e { mi_option_reset_decommits, mi_option_large_os_pages, // implies eager commit mi_option_reserve_huge_os_pages, + mi_option_reserve_os_memory, mi_option_segment_cache, mi_option_page_reset, mi_option_abandoned_page_reset, @@ -306,8 +314,10 @@ typedef enum mi_option_e { mi_option_eager_commit_delay, mi_option_reset_delay, mi_option_use_numa_nodes, + mi_option_limit_os_alloc, mi_option_os_tag, mi_option_max_errors, + mi_option_max_warnings, _mi_option_last } mi_option_t; diff --git a/readme.md b/readme.md index 7ec20d72..18d50636 100644 --- a/readme.md +++ b/readme.md @@ -11,26 +11,34 @@ mimalloc (pronounced "me-malloc") is a general purpose allocator with excellent [performance](#performance) characteristics. Initially developed by Daan Leijen for the run-time systems of the [Koka](https://github.com/koka-lang/koka) and [Lean](https://github.com/leanprover/lean) languages. -Latest release:`v1.6.3` (2020-05-05). +Latest release:`v1.6.7` (2020-09-24). It is a drop-in replacement for `malloc` and can be used in other programs without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as: ``` > LD_PRELOAD=/usr/bin/libmimalloc.so myprogram ``` -It also has an easy way to override the allocator in [Windows](#override_on_windows). Notable aspects of the design include: +It also has an easy way to override the default allocator in [Windows](#override_on_windows). Notable aspects of the design include: -- __small and consistent__: the library is about 6k LOC using simple and +- __small and consistent__: the library is about 8k LOC using simple and consistent data structures. This makes it very suitable to integrate and adapt in other projects. For runtime systems it provides hooks for a monotonic _heartbeat_ and deferred freeing (for bounded worst-case times with reference counting). -- __free list sharding__: the big idea: instead of one big free list (per size class) we have - many smaller lists per memory "page" which both reduces fragmentation - and increases locality -- +- __free list sharding__: instead of one big free list (per size class) we have + many smaller lists per "mimalloc page" which reduces fragmentation and + increases locality -- things that are allocated close in time get allocated close in memory. - (A memory "page" in _mimalloc_ contains blocks of one size class and is - usually 64KiB on a 64-bit system). + (A mimalloc page contains blocks of one size class and is usually 64KiB on a 64-bit system). +- __free list multi-sharding__: the big idea! Not only do we shard the free list + per mimalloc page, but for each page we have multiple free lists. In particular, there + is one list for thread-local `free` operations, and another one for concurrent `free` + operations. Free-ing from another thread can now be a single CAS without needing + sophisticated coordination between threads. Since there will be + thousands of separate free lists, contention is naturally distributed over the heap, + and the chance of contending on a single location will be low -- this is quite + similar to randomized algorithms like skip lists where adding + a random oracle removes the need for a more complex algorithm. - __eager page reset__: when a "page" becomes empty (with increased chance due to free list sharding) the memory is marked to the OS as unused ("reset" or "purged") reducing (real) memory pressure and fragmentation, especially in long running @@ -55,8 +63,20 @@ You can read more on the design of _mimalloc_ in the [technical report](https:// Enjoy! +### Branches + +* `master`: latest stable release. +* `dev`: latest development branch. +* `dev-slice`: experimental branch with a different way of managing mimalloc pages that tends + to use less memory than regular mimalloc with similar performance. Give it a try and please + report any significant performance improvement or degradation. + ### Releases +* 2020-09-24, `v1.6.7`: stable release 1.6: using standard C atomics, passing tsan testing, improved + handling of failing to commit on Windows, add [`mi_process_info`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc.h#L156) api call. +* 2020-08-06, `v1.6.4`: stable release 1.6: improved error recovery in low-memory situations, + support for IllumOS and Haiku, NUMA support for Vista/XP, improved NUMA detection for AMD Ryzen, ubsan support. * 2020-05-05, `v1.6.3`: stable release 1.6: improved behavior in out-of-memory situations, improved malloc zones on macOS, build PIC static libraries by default, add option to abort on out-of-memory, line buffered statistics. * 2020-04-20, `v1.6.2`: stable release 1.6: fix compilation on Android, MingW, Raspberry, and Conda, @@ -82,9 +102,26 @@ free list encoding](https://github.com/microsoft/mimalloc/blob/783e3377f79ee82af Special thanks to: -* Jason Gibson (@jasongibson) for exhaustive testing on large workloads and server environments and finding complex bugs in (early versions of) `mimalloc`. +* [David Carlier](https://devnexen.blogspot.com/) (@devnexen) for his many contributions, and making + mimalloc work better on many less common operating systems, like Haiku, Dragonfly, etc. +* Mary Feofanova (@mary3000), Evgeniy Moiseenko, and Manuel Pöter (@mpoeter) for making mimalloc TSAN checkable, and finding + memory model bugs using the [genMC] model checker. +* Weipeng Liu (@pongba), Zhuowei Li, Junhua Wang, and Jakub Szymanski, for their early support of mimalloc and deployment + at large scale services, leading to many improvements in the mimalloc algorithms for large workloads. +* Jason Gibson (@jasongibson) for exhaustive testing on large scale workloads and server environments, and finding complex bugs + in (early versions of) `mimalloc`. * Manuel Pöter (@mpoeter) and Sam Gross (@colesbury) for finding an ABA concurrency issue in abandoned segment reclamation. +[genMC]: https://plv.mpi-sws.org/genmc/ + +### Usage + +mimalloc is used in various large scale low-latency services and programs, for example: + + + + + # Building ## Windows @@ -213,7 +250,7 @@ completely and redirect all calls to the _mimalloc_ library instead . ## Environment Options You can set further options either programmatically (using [`mi_option_set`](https://microsoft.github.io/mimalloc/group__options.html)), -or via environment variables. +or via environment variables: - `MIMALLOC_SHOW_STATS=1`: show statistics when the program terminates. - `MIMALLOC_VERBOSE=1`: show verbose messages. @@ -263,11 +300,11 @@ _mimalloc_ can be build in secure mode by using the `-DMI_SECURE=ON` flags in `c to make mimalloc more robust against exploits. In particular: - All internal mimalloc pages are surrounded by guard pages and the heap metadata is behind a guard page as well (so a buffer overflow - exploit cannot reach into the metadata), + exploit cannot reach into the metadata). - All free list pointers are [encoded](https://github.com/microsoft/mimalloc/blob/783e3377f79ee82af43a0793910a9f2d01ac7863/include/mimalloc-internal.h#L396) - with per-page keys which is used both to prevent overwrites with a known pointer, as well as to detect heap corruption, -- Double free's are detected (and ignored), + with per-page keys which is used both to prevent overwrites with a known pointer, as well as to detect heap corruption. +- Double free's are detected (and ignored). - The free lists are initialized in a random order and allocation randomly chooses between extension and reuse within a page to mitigate against attacks that rely on a predicable allocation order. Similarly, the larger heap blocks allocated by mimalloc from the OS are also address randomized. @@ -409,7 +446,7 @@ as [mimalloc-bench](https://github.com/daanx/mimalloc-bench). Testing on a big Amazon EC2 compute instance ([c5.18xlarge](https://aws.amazon.com/ec2/instance-types/#Compute_Optimized)) consisting of a 72 processor Intel Xeon at 3GHz -with 144GiB ECC memory, running Ubuntu 18.04.1 with LibC 2.27 and GCC 7.4.0. +with 144GiB ECC memory, running Ubuntu 18.04.1 with glibc 2.27 and GCC 7.4.0. The measured allocators are _mimalloc_ (xmi, tag:v1.4.0, page reset enabled) and its secure build as _smi_, Google's [_tcmalloc_](https://github.com/gperftools/gperftools) (tc, tag:gperftools-2.7) used in Chrome, @@ -419,7 +456,7 @@ the Intel thread building blocks [allocator](https://github.com/intel/tbb) (tbb, the original scalable [_Hoard_](https://github.com/emeryberger/Hoard) (tag:3.13) allocator by Emery Berger \[1], the memory compacting [_Mesh_](https://github.com/plasma-umass/Mesh) (git:51222e7) allocator by Bobby Powers _et al_ \[8], -and finally the default system allocator (glibc, 2.7.0) (based on _PtMalloc2_). +and finally the default system allocator (glibc, 2.27) (based on _PtMalloc2_). diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 7eeb9e92..ca16d367 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -17,8 +17,7 @@ terms of the MIT license. A copy of the license can be found in the file static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept { // note: we don't require `size > offset`, we just guarantee that // the address at offset is aligned regardless of the allocated size. - mi_assert(alignment > 0 && alignment % sizeof(void*) == 0); - + mi_assert(alignment > 0); if (mi_unlikely(size > PTRDIFF_MAX)) return NULL; // we don't allocate more than PTRDIFF_MAX (see ) if (mi_unlikely(alignment==0 || !_mi_is_power_of_two(alignment))) return NULL; // require power-of-two (see ) const uintptr_t align_mask = alignment-1; // for any x, `(x & align_mask) == (x % alignment)` @@ -54,7 +53,7 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t // .. and align within the allocation uintptr_t adjust = alignment - (((uintptr_t)p + offset) & align_mask); - mi_assert_internal(adjust % sizeof(uintptr_t) == 0); + mi_assert_internal(adjust <= alignment); void* aligned_p = (adjust == alignment ? p : (void*)((uintptr_t)p + adjust)); if (aligned_p != p) mi_page_set_has_aligned(_mi_ptr_page(p), true); mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0); diff --git a/src/alloc-override-osx.c b/src/alloc-override-osx.c index c1c880ca..4b77f631 100644 --- a/src/alloc-override-osx.c +++ b/src/alloc-override-osx.c @@ -35,7 +35,6 @@ terms of the MIT license. A copy of the license can be found in the file extern malloc_zone_t* malloc_default_purgeable_zone(void) __attribute__((weak_import)); #endif - /* ------------------------------------------------------ malloc zone members ------------------------------------------------------ */ @@ -44,7 +43,7 @@ static size_t zone_size(malloc_zone_t* zone, const void* p) { UNUSED(zone); if (!mi_is_in_heap_region(p)) return 0; // not our pointer, bail out - + return mi_usable_size(p); } @@ -190,63 +189,85 @@ static malloc_zone_t* mi_get_default_zone() } } -static void __attribute__((constructor)) _mi_macos_override_malloc() -{ - static malloc_introspection_t intro; - memset(&intro, 0, sizeof(intro)); +static malloc_introspection_t mi_introspect = { + .enumerator = &intro_enumerator, + .good_size = &intro_good_size, + .check = &intro_check, + .print = &intro_print, + .log = &intro_log, + .force_lock = &intro_force_lock, + .force_unlock = &intro_force_unlock, +#if defined(MAC_OS_X_VERSION_10_6) && \ + MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6 + .zone_locked = &intro_zone_locked, + .statistics = &intro_statistics, +#endif +}; - intro.enumerator = &intro_enumerator; - intro.good_size = &intro_good_size; - intro.check = &intro_check; - intro.print = &intro_print; - intro.log = &intro_log; - intro.force_lock = &intro_force_lock; - intro.force_unlock = &intro_force_unlock; +static malloc_zone_t mi_malloc_zone = { + .size = &zone_size, + .zone_name = "mimalloc", + .introspect = &mi_introspect, + .malloc = &zone_malloc, + .calloc = &zone_calloc, + .valloc = &zone_valloc, + .free = &zone_free, + .realloc = &zone_realloc, + .destroy = &zone_destroy, + .batch_malloc = &zone_batch_malloc, + .batch_free = &zone_batch_free, +#if defined(MAC_OS_X_VERSION_10_6) && \ + MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6 + // switch to version 9 on OSX 10.6 to support memalign. + .version = 9, + .memalign = &zone_memalign, + .free_definite_size = &zone_free_definite_size, + .pressure_relief = &zone_pressure_relief, +#else + .version = 4, +#endif +}; - static malloc_zone_t zone; - memset(&zone, 0, sizeof(zone)); - zone.version = 4; - zone.zone_name = "mimalloc"; - zone.size = &zone_size; - zone.introspect = &intro; - zone.malloc = &zone_malloc; - zone.calloc = &zone_calloc; - zone.valloc = &zone_valloc; - zone.free = &zone_free; - zone.realloc = &zone_realloc; - zone.destroy = &zone_destroy; - zone.batch_malloc = &zone_batch_malloc; - zone.batch_free = &zone_batch_free; +#if defined(MI_SHARED_LIB_EXPORT) && defined(MI_INTERPOSE) +static malloc_zone_t *mi_malloc_default_zone(void) { + return &mi_malloc_zone; +} +// TODO: should use the macros in alloc-override but they aren't available here. +__attribute__((used)) static struct { + const void *replacement; + const void *target; +} replace_malloc_default_zone[] __attribute__((section("__DATA, __interpose"))) = { + { (const void*)mi_malloc_default_zone, (const void*)malloc_default_zone }, +}; +#endif + +static void __attribute__((constructor(0))) _mi_macos_override_malloc() { malloc_zone_t* purgeable_zone = NULL; #if defined(MAC_OS_X_VERSION_10_6) && \ MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6 - // switch to version 9 on OSX 10.6 to support memalign. - zone.version = 9; - zone.memalign = &zone_memalign; - zone.free_definite_size = &zone_free_definite_size; - zone.pressure_relief = &zone_pressure_relief; - intro.zone_locked = &intro_zone_locked; - intro.statistics = &intro_statistics; - // force the purgeable zone to exist to avoid strange bugs if (malloc_default_purgeable_zone) { purgeable_zone = malloc_default_purgeable_zone(); } #endif - // Register our zone - malloc_zone_register(&zone); - + // Register our zone. + // thomcc: I think this is still needed to put us in the zone list. + malloc_zone_register(&mi_malloc_zone); // Unregister the default zone, this makes our zone the new default // as that was the last registered. malloc_zone_t *default_zone = mi_get_default_zone(); - malloc_zone_unregister(default_zone); + // thomcc: Unsure if the next test is *always* false or just false in the + // cases I've tried. I'm also unsure if the code inside is needed. at all + if (default_zone != &mi_malloc_zone) { + malloc_zone_unregister(default_zone); - // Reregister the default zone so free and realloc in that zone keep working. - malloc_zone_register(default_zone); + // Reregister the default zone so free and realloc in that zone keep working. + malloc_zone_register(default_zone); + } // Unregister, and re-register the purgeable_zone to avoid bugs if it occurs // earlier than the default zone. @@ -257,4 +278,4 @@ static void __attribute__((constructor)) _mi_macos_override_malloc() } -#endif // MI_MALLOC_OVERRIDE +#endif // MI_MALLOC_OVERRIDE \ No newline at end of file diff --git a/src/alloc-override.c b/src/alloc-override.c index a09153c5..5906bd20 100644 --- a/src/alloc-override.c +++ b/src/alloc-override.c @@ -60,6 +60,13 @@ terms of the MIT license. A copy of the license can be found in the file MI_INTERPOSE_MI(posix_memalign), MI_INTERPOSE_MI(reallocf), MI_INTERPOSE_MI(valloc), + #ifndef MI_OSX_ZONE + // some code allocates from default zone but deallocates using plain free :-( (like NxHashResizeToCapacity ) + MI_INTERPOSE_FUN(free,mi_cfree), // use safe free that checks if pointers are from us + #else + // We interpose malloc_default_zone in alloc-override-osx.c + MI_INTERPOSE_MI(free), + #endif // some code allocates from a zone but deallocates using plain free :-( (like NxHashResizeToCapacity ) MI_INTERPOSE_FUN(free,mi_cfree), // use safe free that checks if pointers are from us }; @@ -183,7 +190,8 @@ void* _aligned_malloc(size_t alignment, size_t size) { return mi_aligne // on some glibc `aligned_alloc` is declared `static inline` so we cannot override it (e.g. Conda). This happens // when _GLIBCXX_HAVE_ALIGNED_ALLOC is not defined. However, in those cases it will use `memalign`, `posix_memalign`, // or `_aligned_malloc` and we can avoid overriding it ourselves. -#if _GLIBCXX_HAVE_ALIGNED_ALLOC +// We should always override if using C compilation. (issue #276) +#if _GLIBCXX_HAVE_ALIGNED_ALLOC || !defined(__cplusplus) void* aligned_alloc(size_t alignment, size_t size) { return mi_aligned_alloc(alignment, size); } #endif diff --git a/src/alloc-posix.c b/src/alloc-posix.c index 4395893b..1ba1509b 100644 --- a/src/alloc-posix.c +++ b/src/alloc-posix.c @@ -20,6 +20,10 @@ terms of the MIT license. A copy of the license can be found in the file #include // memcpy #include // getenv +#ifdef _MSC_VER +#pragma warning(disable:4996) // getenv _wgetenv +#endif + #ifndef EINVAL #define EINVAL 22 #endif @@ -111,8 +115,7 @@ mi_decl_restrict unsigned char* mi_mbsdup(const unsigned char* s) mi_attr_noexc int mi_dupenv_s(char** buf, size_t* size, const char* name) mi_attr_noexcept { if (buf==NULL || name==NULL) return EINVAL; if (size != NULL) *size = 0; - #pragma warning(suppress:4996) - char* p = getenv(name); + char* p = getenv(name); // mscver warning 4996 if (p==NULL) { *buf = NULL; } @@ -132,8 +135,7 @@ int mi_wdupenv_s(unsigned short** buf, size_t* size, const unsigned short* name) *buf = NULL; return EINVAL; #else - #pragma warning(suppress:4996) - unsigned short* p = (unsigned short*)_wgetenv((const wchar_t*)name); + unsigned short* p = (unsigned short*)_wgetenv((const wchar_t*)name); // msvc warning 4996 if (p==NULL) { *buf = NULL; } diff --git a/src/alloc.c b/src/alloc.c index 607d15b6..76d6f8c8 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -23,27 +23,34 @@ terms of the MIT license. A copy of the license can be found in the file // Fall back to generic allocation only if the list is empty. extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept { mi_assert_internal(page->xblock_size==0||mi_page_block_size(page) >= size); - mi_block_t* block = page->free; + mi_block_t* const block = page->free; if (mi_unlikely(block == NULL)) { return _mi_malloc_generic(heap, size); } mi_assert_internal(block != NULL && _mi_ptr_page(block) == page); // pop from the free list - page->free = mi_block_next(page, block); page->used++; + page->free = mi_block_next(page, block); mi_assert_internal(page->free == NULL || _mi_ptr_page(page->free) == page); + #if (MI_DEBUG>0) if (!page->is_zero) { memset(block, MI_DEBUG_UNINIT, size); } #elif (MI_SECURE!=0) block->next = 0; // don't leak internal data #endif -#if (MI_STAT>1) + +#if (MI_STAT>0) const size_t bsize = mi_page_usable_block_size(page); if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { + mi_heap_stat_increase(heap, normal, bsize); + mi_heap_stat_counter_increase(heap, normal_count, 1); +#if (MI_STAT>1) const size_t bin = _mi_bin(bsize); - mi_heap_stat_increase(heap, normal[bin], 1); + mi_heap_stat_increase(heap, normal_bins[bin], 1); +#endif } #endif + #if (MI_PADDING > 0) && defined(MI_ENCODE_FREELIST) mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + mi_page_usable_block_size(page)); ptrdiff_t delta = ((uint8_t*)padding - (uint8_t*)block - (size - MI_PADDING_SIZE)); @@ -54,6 +61,7 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // set at most N initial padding bytes for (size_t i = 0; i < maxpad; i++) { fill[i] = MI_DEBUG_PADDING; } #endif + return block; } @@ -282,6 +290,49 @@ static void mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, co } #endif +// only maintain stats for smaller objects if requested +#if (MI_STAT>0) +static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) { +#if (MI_STAT < 2) + UNUSED(block); +#endif + mi_heap_t* const heap = mi_heap_get_default(); + const size_t bsize = mi_page_usable_block_size(page); +#if (MI_STAT>1) + const size_t usize = mi_page_usable_size_of(page, block); + mi_heap_stat_decrease(heap, malloc, usize); +#endif + if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { + mi_heap_stat_decrease(heap, normal, bsize); +#if (MI_STAT > 1) + mi_heap_stat_decrease(heap, normal_bins[_mi_bin(bsize)], 1); +#endif + } +} +#else +static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) { + UNUSED(page); UNUSED(block); +} +#endif + +#if (MI_STAT>0) +// maintain stats for huge objects +static void mi_stat_huge_free(const mi_page_t* page) { + mi_heap_t* const heap = mi_heap_get_default(); + const size_t bsize = mi_page_block_size(page); // to match stats in `page.c:mi_page_huge_alloc` + if (bsize <= MI_HUGE_OBJ_SIZE_MAX) { + mi_heap_stat_decrease(heap, huge, bsize); + } + else { + mi_heap_stat_decrease(heap, giant, bsize); + } +} +#else +static void mi_stat_huge_free(const mi_page_t* page) { + UNUSED(page); +} +#endif + // ------------------------------------------------------ // Free // ------------------------------------------------------ @@ -300,16 +351,16 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc // huge page segments are always abandoned and can be freed immediately mi_segment_t* const segment = _mi_page_segment(page); if (segment->page_kind==MI_PAGE_HUGE) { + mi_stat_huge_free(page); _mi_segment_huge_page_free(segment, page, block); return; } // Try to put the block on either the page-local thread free list, or the heap delayed free list. - mi_thread_free_t tfree; mi_thread_free_t tfreex; bool use_delayed; + mi_thread_free_t tfree = mi_atomic_load_relaxed(&page->xthread_free); do { - tfree = mi_atomic_read_relaxed(&page->xthread_free); use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE); if (mi_unlikely(use_delayed)) { // unlikely: this only happens on the first concurrent free in a page that is in the full list @@ -320,31 +371,30 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc mi_block_set_next(page, block, mi_tf_block(tfree)); tfreex = mi_tf_set_block(tfree,block); } - } while (!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree)); + } while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex)); if (mi_unlikely(use_delayed)) { // racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`) - mi_heap_t* const heap = mi_page_heap(page); + mi_heap_t* const heap = (mi_heap_t*)(mi_atomic_load_acquire(&page->xheap)); //mi_page_heap(page); mi_assert_internal(heap != NULL); if (heap != NULL) { // add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity) - mi_block_t* dfree; + mi_block_t* dfree = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free); do { - dfree = mi_atomic_read_ptr_relaxed(mi_block_t,&heap->thread_delayed_free); mi_block_set_nextx(heap,block,dfree, heap->keys); - } while (!mi_atomic_cas_ptr_weak(mi_block_t,&heap->thread_delayed_free, block, dfree)); + } while (!mi_atomic_cas_ptr_weak_release(mi_block_t,&heap->thread_delayed_free, &dfree, block)); } // and reset the MI_DELAYED_FREEING flag + tfree = mi_atomic_load_relaxed(&page->xthread_free); do { - tfreex = tfree = mi_atomic_read_relaxed(&page->xthread_free); + tfreex = tfree; mi_assert_internal(mi_tf_delayed(tfree) == MI_DELAYED_FREEING); tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE); - } while (!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree)); + } while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex)); } } - // regular free static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block) { @@ -384,62 +434,65 @@ mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* p static void mi_decl_noinline mi_free_generic(const mi_segment_t* segment, bool local, void* p) { mi_page_t* const page = _mi_segment_page_of(segment, p); mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p); + mi_stat_free(page, block); _mi_free_block(page, local, block); } +// Get the segment data belonging to a pointer +// This is just a single `and` in assembly but does further checks in debug mode +// (and secure mode) if this was a valid pointer. +static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* msg) +{ + UNUSED(msg); +#if (MI_DEBUG>0) + if (mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0)) { + _mi_error_message(EINVAL, "%s: invalid (unaligned) pointer: %p\n", msg, p); + return NULL; + } +#endif + + mi_segment_t* const segment = _mi_ptr_segment(p); + if (mi_unlikely(segment == NULL)) return NULL; // checks also for (p==NULL) + +#if (MI_DEBUG>0) + if (mi_unlikely(!mi_is_in_heap_region(p))) { + _mi_warning_message("%s: pointer might not point to a valid heap region: %p\n" + "(this may still be a valid very large allocation (over 64MiB))\n", msg, p); + if (mi_likely(_mi_ptr_cookie(segment) == segment->cookie)) { + _mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p); + } + } +#endif +#if (MI_DEBUG>0 || MI_SECURE>=4) + if (mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie)) { + _mi_error_message(EINVAL, "%s: pointer does not point to a valid heap space: %p\n", p); + } +#endif + return segment; +} + + // Free a block void mi_free(void* p) mi_attr_noexcept { -#if (MI_DEBUG>0) - if (mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0)) { - _mi_error_message(EINVAL, "trying to free an invalid (unaligned) pointer: %p\n", p); - return; - } -#endif - - const mi_segment_t* const segment = _mi_ptr_segment(p); - if (mi_unlikely(segment == NULL)) return; // checks for (p==NULL) - -#if (MI_DEBUG!=0) - if (mi_unlikely(!mi_is_in_heap_region(p))) { - _mi_warning_message("possibly trying to free a pointer that does not point to a valid heap region: %p\n" - "(this may still be a valid very large allocation (over 64MiB))\n", p); - if (mi_likely(_mi_ptr_cookie(segment) == segment->cookie)) { - _mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p); - } - } -#endif -#if (MI_DEBUG!=0 || MI_SECURE>=4) - if (mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie)) { - _mi_error_message(EINVAL, "trying to free a pointer that does not point to a valid heap space: %p\n", p); - return; - } -#endif + const mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free"); + if (mi_unlikely(segment == NULL)) return; const uintptr_t tid = _mi_thread_id(); mi_page_t* const page = _mi_segment_page_of(segment, p); mi_block_t* const block = (mi_block_t*)p; -#if (MI_STAT>1) - mi_heap_t* const heap = mi_heap_get_default(); - const size_t bsize = mi_page_usable_block_size(page); - mi_heap_stat_decrease(heap, malloc, bsize); - if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { // huge page stats are accounted for in `_mi_page_retire` - mi_heap_stat_decrease(heap, normal[_mi_bin(bsize)], 1); - } -#endif - if (mi_likely(tid == segment->thread_id && page->flags.full_aligned == 0)) { // the thread id matches and it is not a full page, nor has aligned blocks // local, and not full or aligned if (mi_unlikely(mi_check_is_double_free(page,block))) return; mi_check_padding(page, block); + mi_stat_free(page, block); #if (MI_DEBUG!=0) memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); #endif mi_block_set_next(page, block, page->local_free); page->local_free = block; - page->used--; - if (mi_unlikely(mi_page_all_free(page))) { + if (mi_unlikely(--page->used == 0)) { // using this expression generates better code than: page->used--; if (mi_page_all_free(page)) _mi_page_retire(page); } } @@ -473,9 +526,9 @@ bool _mi_free_delayed_block(mi_block_t* block) { } // Bytes available in a block -size_t mi_usable_size(const void* p) mi_attr_noexcept { - if (p==NULL) return 0; - const mi_segment_t* const segment = _mi_ptr_segment(p); +static size_t _mi_usable_size(const void* p, const char* msg) mi_attr_noexcept { + const mi_segment_t* const segment = mi_checked_ptr_segment(p,msg); + if (segment==NULL) return 0; const mi_page_t* const page = _mi_segment_page_of(segment, p); const mi_block_t* block = (const mi_block_t*)p; if (mi_unlikely(mi_page_has_aligned(page))) { @@ -490,6 +543,10 @@ size_t mi_usable_size(const void* p) mi_attr_noexcept { } } +size_t mi_usable_size(const void* p) mi_attr_noexcept { + return _mi_usable_size(p, "mi_usable_size"); +} + // ------------------------------------------------------ // ensure explicit external inline definitions are emitted! @@ -513,7 +570,7 @@ void* _mi_externs[] = { void mi_free_size(void* p, size_t size) mi_attr_noexcept { UNUSED_RELEASE(size); - mi_assert(p == NULL || size <= mi_usable_size(p)); + mi_assert(p == NULL || size <= _mi_usable_size(p,"mi_free_size")); mi_free(p); } @@ -553,14 +610,14 @@ mi_decl_restrict void* mi_mallocn(size_t count, size_t size) mi_attr_noexcept { // Expand in place or fail void* mi_expand(void* p, size_t newsize) mi_attr_noexcept { if (p == NULL) return NULL; - size_t size = mi_usable_size(p); + size_t size = _mi_usable_size(p,"mi_expand"); if (newsize > size) return NULL; return p; // it fits } void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) { if (p == NULL) return _mi_heap_malloc_zero(heap,newsize,zero); - size_t size = mi_usable_size(p); + size_t size = _mi_usable_size(p,"mi_realloc"); if (newsize <= size && newsize >= (size / 2)) { return p; // reallocation still fits and not more than 50% waste } @@ -669,7 +726,7 @@ mi_decl_restrict char* mi_strndup(const char* s, size_t n) mi_attr_noexcept { #ifndef PATH_MAX #define PATH_MAX MAX_PATH #endif -#include +#include mi_decl_restrict char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept { // todo: use GetFullPathNameW to allow longer file names char buf[PATH_MAX]; @@ -733,7 +790,12 @@ but we call `exit` instead (i.e. not returning). #ifdef __cplusplus #include static bool mi_try_new_handler(bool nothrow) { - std::new_handler h = std::get_new_handler(); + #if defined(_MSC_VER) || (__cplusplus >= 201103L) + std::new_handler h = std::get_new_handler(); + #else + std::new_handler h = std::set_new_handler(); + std::set_new_handler(h); + #endif if (h==NULL) { if (!nothrow) throw std::bad_alloc(); return false; @@ -750,12 +812,12 @@ typedef void (*std_new_handler_t)(); std_new_handler_t __attribute((weak)) _ZSt15get_new_handlerv() { return NULL; } -std_new_handler_t mi_get_new_handler() { +static std_new_handler_t mi_get_new_handler() { return _ZSt15get_new_handlerv(); } #else // note: on windows we could dynamically link to `?get_new_handler@std@@YAP6AXXZXZ`. -std_new_handler_t mi_get_new_handler() { +static std_new_handler_t mi_get_new_handler() { return NULL; } #endif diff --git a/src/arena.c b/src/arena.c index bb9fc174..6e1526ac 100644 --- a/src/arena.c +++ b/src/arena.c @@ -30,12 +30,13 @@ of 256MiB in practice. #include "mimalloc-atomic.h" #include // memset +#include // ENOMEM -#include "bitmap.inc.c" // atomic bitmap +#include "bitmap.h" // atomic bitmap // os.c -void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld); +void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_stats_t* stats); void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats); void _mi_os_free(void* p, size_t size, mi_stats_t* stats); @@ -43,14 +44,14 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_sec void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats); bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); +bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); /* ----------------------------------------------------------- Arena allocation ----------------------------------------------------------- */ #define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE -#define MI_ARENA_BLOCK_SIZE (8*MI_SEGMENT_ALIGN) // 32MiB -#define MI_ARENA_MAX_OBJ_SIZE (MI_BITMAP_FIELD_BITS * MI_ARENA_BLOCK_SIZE) // 2GiB +#define MI_ARENA_BLOCK_SIZE (4*MI_SEGMENT_ALIGN) // 32MiB #define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 16MiB #define MI_MAX_ARENAS (64) // not more than 256 (since we use 8 bits in the memid) @@ -61,12 +62,12 @@ typedef struct mi_arena_s { size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`) int numa_node; // associated NUMA node bool is_zero_init; // is the arena zero initialized? - bool is_committed; // is the memory committed - bool is_large; // large OS page allocated - volatile _Atomic(uintptr_t) search_idx; // optimization to start the search for free blocks + bool is_committed; // is the memory fully committed? (if so, block_committed == NULL) + bool is_large; // large- or huge OS pages (always committed) + _Atomic(uintptr_t) search_idx; // optimization to start the search for free blocks mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? mi_bitmap_field_t* blocks_committed; // if `!is_committed`, are the blocks committed? - mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`) + mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`) } mi_arena_t; @@ -104,16 +105,11 @@ static size_t mi_block_count_of_size(size_t size) { ----------------------------------------------------------- */ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx) { - const size_t fcount = arena->field_count; - size_t idx = mi_atomic_read(&arena->search_idx); // start from last search - for (size_t visited = 0; visited < fcount; visited++, idx++) { - if (idx >= fcount) idx = 0; // wrap around - // try to atomically claim a range of bits - if (mi_bitmap_try_find_claim_field(arena->blocks_inuse, idx, blocks, bitmap_idx)) { - mi_atomic_write(&arena->search_idx, idx); // start search from here next time - return true; - } - } + size_t idx = mi_atomic_load_acquire(&arena->search_idx); // start from last search + if (_mi_bitmap_try_find_from_claim_across(arena->blocks_inuse, arena->field_count, idx, blocks, bitmap_idx)) { + mi_atomic_store_release(&arena->search_idx, idx); // start search from here next time + return true; + }; return false; } @@ -123,16 +119,17 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* ----------------------------------------------------------- */ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, - bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) + bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { mi_bitmap_index_t bitmap_index; if (!mi_arena_alloc(arena, needed_bcount, &bitmap_index)) return NULL; // claimed it! set the dirty bits (todo: no need for an atomic op here?) - void* p = arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE); - *memid = mi_arena_id_create(arena_index, bitmap_index); - *is_zero = mi_bitmap_claim(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL); - *large = arena->is_large; + void* p = arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE); + *memid = mi_arena_id_create(arena_index, bitmap_index); + *is_zero = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL); + *large = arena->is_large; + *is_pinned = (arena->is_large || arena->is_committed); if (arena->is_committed) { // always committed *commit = true; @@ -140,7 +137,7 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n else if (*commit) { // arena not committed as a whole, but commit requested: ensure commit now bool any_uncommitted; - mi_bitmap_claim(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted); + _mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted); if (any_uncommitted) { bool commit_zero; _mi_os_commit(p, needed_bcount * MI_ARENA_BLOCK_SIZE, &commit_zero, tld->stats); @@ -149,25 +146,25 @@ static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t n } else { // no need to commit, but check if already fully committed - *commit = mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index); + *commit = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index); } return p; } -void* _mi_arena_alloc_aligned(size_t size, size_t alignment, - bool* commit, bool* large, bool* is_zero, +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { - mi_assert_internal(commit != NULL && large != NULL && is_zero != NULL && memid != NULL && tld != NULL); + mi_assert_internal(commit != NULL && is_pinned != NULL && is_zero != NULL && memid != NULL && tld != NULL); mi_assert_internal(size > 0); *memid = MI_MEMID_OS; *is_zero = false; + *is_pinned = false; // try to allocate in an arena if the alignment is small enough // and the object is not too large or too small. if (alignment <= MI_SEGMENT_ALIGN && - size <= MI_ARENA_MAX_OBJ_SIZE && - size >= MI_ARENA_MIN_OBJ_SIZE) + size >= MI_ARENA_MIN_OBJ_SIZE && + mi_atomic_load_relaxed(&mi_arena_count) > 0) { const size_t bcount = mi_block_count_of_size(size); const int numa_node = _mi_os_numa_node(tld); // current numa node @@ -175,24 +172,24 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE); // try numa affine allocation for (size_t i = 0; i < MI_MAX_ARENAS; i++) { - mi_arena_t* arena = mi_atomic_read_ptr_relaxed(mi_arena_t, &mi_arenas[i]); + mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); if (arena==NULL) break; // end reached if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local? (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages { - void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid, tld); + void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, memid, tld); mi_assert_internal((uintptr_t)p % alignment == 0); if (p != NULL) return p; } } // try from another numa node instead.. for (size_t i = 0; i < MI_MAX_ARENAS; i++) { - mi_arena_t* arena = mi_atomic_read_ptr_relaxed(mi_arena_t, &mi_arenas[i]); + mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); if (arena==NULL) break; // end reached if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local! (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages { - void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid, tld); + void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, memid, tld); mi_assert_internal((uintptr_t)p % alignment == 0); if (p != NULL) return p; } @@ -200,14 +197,17 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, } // finally, fall back to the OS + if (mi_option_is_enabled(mi_option_limit_os_alloc)) return NULL; *is_zero = true; - *memid = MI_MEMID_OS; - return _mi_os_alloc_aligned(size, alignment, *commit, large, tld); + *memid = MI_MEMID_OS; + void* p = _mi_os_alloc_aligned(size, alignment, *commit, large, tld->stats); + if (p != NULL) *is_pinned = *large; + return p; } -void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) +void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { - return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, commit, large, is_zero, memid, tld); + return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, commit, large, is_pinned, is_zero, memid, tld); } /* ----------------------------------------------------------- @@ -228,8 +228,10 @@ void _mi_arena_free(void* p, size_t size, size_t memid, bool all_committed, mi_s size_t bitmap_idx; mi_arena_id_indices(memid, &arena_idx, &bitmap_idx); mi_assert_internal(arena_idx < MI_MAX_ARENAS); - mi_arena_t* arena = mi_atomic_read_ptr_relaxed(mi_arena_t,&mi_arenas[arena_idx]); + mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t,&mi_arenas[arena_idx]); mi_assert_internal(arena != NULL); + const size_t blocks = mi_block_count_of_size(size); + // checks if (arena == NULL) { _mi_error_message(EINVAL, "trying to free from non-existent arena: %p, size %zu, memid: 0x%zx\n", p, size, memid); return; @@ -239,9 +241,18 @@ void _mi_arena_free(void* p, size_t size, size_t memid, bool all_committed, mi_s _mi_error_message(EINVAL, "trying to free from non-existent arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid); return; } - const size_t blocks = mi_block_count_of_size(size); - bool ones = mi_bitmap_unclaim(arena->blocks_inuse, arena->field_count, blocks, bitmap_idx); - if (!ones) { + // potentially decommit + if (arena->is_committed) { + mi_assert_internal(all_committed); + } + else { + mi_assert_internal(arena->blocks_committed != NULL); + _mi_os_decommit(p, blocks * MI_ARENA_BLOCK_SIZE, stats); // ok if this fails + _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx); + } + // and make it available to others again + bool all_inuse = _mi_bitmap_unclaim_across(arena->blocks_inuse, arena->field_count, blocks, bitmap_idx); + if (!all_inuse) { _mi_error_message(EAGAIN, "trying to free an already freed block: %p, size %zu\n", p, size); return; }; @@ -254,24 +265,76 @@ void _mi_arena_free(void* p, size_t size, size_t memid, bool all_committed, mi_s static bool mi_arena_add(mi_arena_t* arena) { mi_assert_internal(arena != NULL); - mi_assert_internal((uintptr_t)mi_atomic_read_ptr_relaxed(uint8_t,&arena->start) % MI_SEGMENT_ALIGN == 0); + mi_assert_internal((uintptr_t)mi_atomic_load_ptr_relaxed(uint8_t,&arena->start) % MI_SEGMENT_ALIGN == 0); mi_assert_internal(arena->block_count > 0); - uintptr_t i = mi_atomic_increment(&mi_arena_count); + uintptr_t i = mi_atomic_increment_acq_rel(&mi_arena_count); if (i >= MI_MAX_ARENAS) { - mi_atomic_decrement(&mi_arena_count); + mi_atomic_decrement_acq_rel(&mi_arena_count); return false; } - mi_atomic_write_ptr(mi_arena_t,&mi_arenas[i], arena); + mi_atomic_store_ptr_release(mi_arena_t,&mi_arenas[i], arena); return true; } +bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept +{ + if (is_large) { + mi_assert_internal(is_committed); + is_committed = true; + } + + const size_t bcount = mi_block_count_of_size(size); + const size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS); + const size_t bitmaps = (is_committed ? 2 : 3); + const size_t asize = sizeof(mi_arena_t) + (bitmaps*fields*sizeof(mi_bitmap_field_t)); + mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS? + if (arena == NULL) return false; + + arena->block_count = bcount; + arena->field_count = fields; + arena->start = (uint8_t*)start; + arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1) + arena->is_large = is_large; + arena->is_zero_init = is_zero; + arena->is_committed = is_committed; + arena->search_idx = 0; + arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap + arena->blocks_committed = (is_committed ? NULL : &arena->blocks_inuse[2*fields]); // just after dirty bitmap + // the bitmaps are already zero initialized due to os_alloc + // just claim leftover blocks if needed + ptrdiff_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount; + mi_assert_internal(post >= 0); + if (post > 0) { + // don't use leftover bits at the end + mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post); + _mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL); + } + + mi_arena_add(arena); + return true; +} + +// Reserve a range of regular OS memory +int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept +{ + size = _mi_os_good_alloc_size(size); + bool large = allow_large; + void* start = _mi_os_alloc_aligned(size, MI_SEGMENT_ALIGN, commit, &large, &_mi_stats_main); + if (start==NULL) return ENOMEM; + if (!mi_manage_os_memory(start, size, (large || commit), large, true, -1)) { + _mi_os_free_ex(start, size, commit, &_mi_stats_main); + _mi_verbose_message("failed to reserve %zu k memory\n", _mi_divide_up(size,1024)); + return ENOMEM; + } + _mi_verbose_message("reserved %zu kb memory%s\n", _mi_divide_up(size,1024), large ? " (in large os pages)" : ""); + return 0; +} + /* ----------------------------------------------------------- Reserve a huge page arena. ----------------------------------------------------------- */ -#include // ENOMEM - // reserve at a specific numa node int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept { if (pages==0) return 0; @@ -286,35 +349,10 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec } _mi_verbose_message("numa node %i: reserved %zu gb huge pages (of the %zu gb requested)\n", numa_node, pages_reserved, pages); - size_t bcount = mi_block_count_of_size(hsize); - size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS); - size_t asize = sizeof(mi_arena_t) + (2*fields*sizeof(mi_bitmap_field_t)); - mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS? - if (arena == NULL) { + if (!mi_manage_os_memory(p, hsize, true, true, true, numa_node)) { _mi_os_free_huge_pages(p, hsize, &_mi_stats_main); return ENOMEM; } - arena->block_count = bcount; - arena->field_count = fields; - arena->start = (uint8_t*)p; - arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1) - arena->is_large = true; - arena->is_zero_init = true; - arena->is_committed = true; - arena->search_idx = 0; - arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap - arena->blocks_committed = NULL; - // the bitmaps are already zero initialized due to os_alloc - // just claim leftover blocks if needed - ptrdiff_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount; - mi_assert_internal(post >= 0); - if (post > 0) { - // don't use leftover bits at the end - mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post); - mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL); - } - - mi_arena_add(arena); return 0; } diff --git a/src/bitmap.c b/src/bitmap.c new file mode 100644 index 00000000..68ae3b2e --- /dev/null +++ b/src/bitmap.c @@ -0,0 +1,395 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2019,2020 Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +/* ---------------------------------------------------------------------------- +Concurrent bitmap that can set/reset sequences of bits atomically, +represeted as an array of fields where each field is a machine word (`uintptr_t`) + +There are two api's; the standard one cannot have sequences that cross +between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS). +(this is used in region allocation) + +The `_across` postfixed functions do allow sequences that can cross over +between the fields. (This is used in arena allocation) +---------------------------------------------------------------------------- */ + +#include "mimalloc.h" +#include "mimalloc-internal.h" +#include "bitmap.h" + +/* ----------------------------------------------------------- + Bitmap definition +----------------------------------------------------------- */ + +// The bit mask for a given number of blocks at a specified bit index. +static inline uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) { + mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS); + mi_assert_internal(count > 0); + if (count >= MI_BITMAP_FIELD_BITS) return MI_BITMAP_FIELD_FULL; + if (count == 0) return 0; + return ((((uintptr_t)1 << count) - 1) << bitidx); +} + + + +/* ----------------------------------------------------------- + Claim a bit sequence atomically +----------------------------------------------------------- */ + +// Try to atomically claim a sequence of `count` bits in a single +// field at `idx` in `bitmap`. Returns `true` on success. +bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx) +{ + mi_assert_internal(bitmap_idx != NULL); + mi_assert_internal(count <= MI_BITMAP_FIELD_BITS); + _Atomic(uintptr_t)* field = &bitmap[idx]; + uintptr_t map = mi_atomic_load_relaxed(field); + if (map==MI_BITMAP_FIELD_FULL) return false; // short cut + + // search for 0-bit sequence of length count + const uintptr_t mask = mi_bitmap_mask_(count, 0); + const size_t bitidx_max = MI_BITMAP_FIELD_BITS - count; + +#ifdef MI_HAVE_FAST_BITSCAN + size_t bitidx = mi_ctz(~map); // quickly find the first zero bit if possible +#else + size_t bitidx = 0; // otherwise start at 0 +#endif + uintptr_t m = (mask << bitidx); // invariant: m == mask shifted by bitidx + + // scan linearly for a free range of zero bits + while (bitidx <= bitidx_max) { + const uintptr_t mapm = map & m; + if (mapm == 0) { // are the mask bits free at bitidx? + mi_assert_internal((m >> bitidx) == mask); // no overflow? + const uintptr_t newmap = map | m; + mi_assert_internal((newmap^map) >> bitidx == mask); + if (!mi_atomic_cas_weak_acq_rel(field, &map, newmap)) { // TODO: use strong cas here? + // no success, another thread claimed concurrently.. keep going (with updated `map`) + continue; + } + else { + // success, we claimed the bits! + *bitmap_idx = mi_bitmap_index_create(idx, bitidx); + return true; + } + } + else { + // on to the next bit range +#ifdef MI_HAVE_FAST_BITSCAN + const size_t shift = (count == 1 ? 1 : mi_bsr(mapm) - bitidx + 1); + mi_assert_internal(shift > 0 && shift <= count); +#else + const size_t shift = 1; +#endif + bitidx += shift; + m <<= shift; + } + } + // no bits found + return false; +} + + +// Starts at idx, and wraps around to search in all `bitmap_fields` fields. +// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields. +bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) { + size_t idx = start_field_idx; + for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) { + if (idx >= bitmap_fields) idx = 0; // wrap + if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) { + return true; + } + } + return false; +} + +/* +// Find `count` bits of 0 and set them to 1 atomically; returns `true` on success. +// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never span fields. +bool _mi_bitmap_try_find_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t count, mi_bitmap_index_t* bitmap_idx) { + return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, 0, count, bitmap_idx); +} +*/ + +// Set `count` bits at `bitmap_idx` to 0 atomically +// Returns `true` if all `count` bits were 1 previously. +bool mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + const size_t idx = mi_bitmap_index_field(bitmap_idx); + const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); + const uintptr_t mask = mi_bitmap_mask_(count, bitidx); + mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); + // mi_assert_internal((bitmap[idx] & mask) == mask); + uintptr_t prev = mi_atomic_and_acq_rel(&bitmap[idx], ~mask); + return ((prev & mask) == mask); +} + + +// Set `count` bits at `bitmap_idx` to 1 atomically +// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. +bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero) { + const size_t idx = mi_bitmap_index_field(bitmap_idx); + const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); + const uintptr_t mask = mi_bitmap_mask_(count, bitidx); + mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); + //mi_assert_internal(any_zero != NULL || (bitmap[idx] & mask) == 0); + uintptr_t prev = mi_atomic_or_acq_rel(&bitmap[idx], mask); + if (any_zero != NULL) *any_zero = ((prev & mask) != mask); + return ((prev & mask) == 0); +} + +// Returns `true` if all `count` bits were 1. `any_ones` is `true` if there was at least one bit set to one. +static bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_ones) { + const size_t idx = mi_bitmap_index_field(bitmap_idx); + const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); + const uintptr_t mask = mi_bitmap_mask_(count, bitidx); + mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); + uintptr_t field = mi_atomic_load_relaxed(&bitmap[idx]); + if (any_ones != NULL) *any_ones = ((field & mask) != 0); + return ((field & mask) == mask); +} + +bool _mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + return mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, NULL); +} + +bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + bool any_ones; + mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, &any_ones); + return any_ones; +} + + +//-------------------------------------------------------------------------- +// the `_across` functions work on bitmaps where sequences can cross over +// between the fields. This is used in arena allocation +//-------------------------------------------------------------------------- + +// Try to atomically claim a sequence of `count` bits starting from the field +// at `idx` in `bitmap` and crossing into subsequent fields. Returns `true` on success. +static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t idx, const size_t count, const size_t retries, mi_bitmap_index_t* bitmap_idx) +{ + mi_assert_internal(bitmap_idx != NULL); + + // check initial trailing zeros + _Atomic(uintptr_t)* field = &bitmap[idx]; + uintptr_t map = mi_atomic_load_relaxed(field); + const size_t initial = mi_clz(map); // count of initial zeros starting at idx + mi_assert_internal(initial <= MI_BITMAP_FIELD_BITS); + if (initial == 0) return false; + if (initial >= count) return _mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx); // no need to cross fields + if (_mi_divide_up(count - initial, MI_BITMAP_FIELD_BITS) >= (bitmap_fields - idx)) return false; // not enough entries + + // scan ahead + size_t found = initial; + uintptr_t mask = 0; // mask bits for the final field + while(found < count) { + field++; + map = mi_atomic_load_relaxed(field); + const uintptr_t mask_bits = (found + MI_BITMAP_FIELD_BITS <= count ? MI_BITMAP_FIELD_BITS : (count - found)); + mask = mi_bitmap_mask_(mask_bits, 0); + if ((map & mask) != 0) return false; + found += mask_bits; + } + mi_assert_internal(field < &bitmap[bitmap_fields]); + + // found range of zeros up to the final field; mask contains mask in the final field + // now claim it atomically + _Atomic(uintptr_t)* const final_field = field; + const uintptr_t final_mask = mask; + _Atomic(uintptr_t)* const initial_field = &bitmap[idx]; + const uintptr_t initial_mask = mi_bitmap_mask_(initial, MI_BITMAP_FIELD_BITS - initial); + + // initial field + uintptr_t newmap; + field = initial_field; + map = mi_atomic_load_relaxed(field); + do { + newmap = map | initial_mask; + if ((map & initial_mask) != 0) { goto rollback; }; + } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)); + + // intermediate fields + while (++field < final_field) { + newmap = mi_bitmap_mask_(MI_BITMAP_FIELD_BITS, 0); + map = 0; + if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) { goto rollback; } + } + + // final field + mi_assert_internal(field == final_field); + map = mi_atomic_load_relaxed(field); + do { + newmap = map | final_mask; + if ((map & final_mask) != 0) { goto rollback; } + } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)); + + // claimed! + *bitmap_idx = mi_bitmap_index_create(idx, MI_BITMAP_FIELD_BITS - initial); + return true; + +rollback: + // roll back intermediate fields + while (--field > initial_field) { + newmap = 0; + map = mi_bitmap_mask_(MI_BITMAP_FIELD_BITS, 0); + mi_assert_internal(mi_atomic_load_relaxed(field) == map); + mi_atomic_store_release(field, newmap); + } + if (field == initial_field) { + map = mi_atomic_load_relaxed(field); + do { + mi_assert_internal((map & initial_mask) == initial_mask); + newmap = map & ~initial_mask; + } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)); + } + // retry? (we make a recursive call instead of goto to be able to use const declarations) + if (retries < 4) { + return mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, retries+1, bitmap_idx); + } + else { + return false; + } +} + + +// Find `count` bits of zeros and set them to 1 atomically; returns `true` on success. +// Starts at idx, and wraps around to search in all `bitmap_fields` fields. +bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) { + mi_assert_internal(count > 0); + if (count==1) return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, start_field_idx, count, bitmap_idx); + size_t idx = start_field_idx; + for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) { + if (idx >= bitmap_fields) idx = 0; // wrap + // try to claim inside the field + if (count <= MI_BITMAP_FIELD_BITS) { + if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) { + return true; + } + } + // try to claim across fields + if (mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, 0, bitmap_idx)) { + return true; + } + } + return false; +} + +// Helper for masks across fields; returns the mid count, post_mask may be 0 +static size_t mi_bitmap_mask_across(mi_bitmap_index_t bitmap_idx, size_t bitmap_fields, size_t count, uintptr_t* pre_mask, uintptr_t* mid_mask, uintptr_t* post_mask) { + UNUSED_RELEASE(bitmap_fields); + const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); + if (mi_likely(bitidx + count <= MI_BITMAP_FIELD_BITS)) { + *pre_mask = mi_bitmap_mask_(count, bitidx); + *mid_mask = 0; + *post_mask = 0; + mi_assert_internal(mi_bitmap_index_field(bitmap_idx) < bitmap_fields); + return 0; + } + else { + const size_t pre_bits = MI_BITMAP_FIELD_BITS - bitidx; + mi_assert_internal(pre_bits < count); + *pre_mask = mi_bitmap_mask_(pre_bits, bitidx); + count -= pre_bits; + const size_t mid_count = (count / MI_BITMAP_FIELD_BITS); + *mid_mask = MI_BITMAP_FIELD_FULL; + count %= MI_BITMAP_FIELD_BITS; + *post_mask = (count==0 ? 0 : mi_bitmap_mask_(count, 0)); + mi_assert_internal(mi_bitmap_index_field(bitmap_idx) + mid_count + (count==0 ? 0 : 1) < bitmap_fields); + return mid_count; + } +} + +// Set `count` bits at `bitmap_idx` to 0 atomically +// Returns `true` if all `count` bits were 1 previously. +bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + size_t idx = mi_bitmap_index_field(bitmap_idx); + uintptr_t pre_mask; + uintptr_t mid_mask; + uintptr_t post_mask; + size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask); + bool all_one = true; + _Atomic(uintptr_t)*field = &bitmap[idx]; + uintptr_t prev = mi_atomic_and_acq_rel(field++, ~pre_mask); + if ((prev & pre_mask) != pre_mask) all_one = false; + while(mid_count-- > 0) { + prev = mi_atomic_and_acq_rel(field++, ~mid_mask); + if ((prev & mid_mask) != mid_mask) all_one = false; + } + if (post_mask!=0) { + prev = mi_atomic_and_acq_rel(field, ~post_mask); + if ((prev & post_mask) != post_mask) all_one = false; + } + return all_one; +} + +// Set `count` bits at `bitmap_idx` to 1 atomically +// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. +bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero) { + size_t idx = mi_bitmap_index_field(bitmap_idx); + uintptr_t pre_mask; + uintptr_t mid_mask; + uintptr_t post_mask; + size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask); + bool all_zero = true; + bool any_zero = false; + _Atomic(uintptr_t)*field = &bitmap[idx]; + uintptr_t prev = mi_atomic_or_acq_rel(field++, pre_mask); + if ((prev & pre_mask) != 0) all_zero = false; + if ((prev & pre_mask) != pre_mask) any_zero = true; + while (mid_count-- > 0) { + prev = mi_atomic_or_acq_rel(field++, mid_mask); + if ((prev & mid_mask) != 0) all_zero = false; + if ((prev & mid_mask) != mid_mask) any_zero = true; + } + if (post_mask!=0) { + prev = mi_atomic_or_acq_rel(field, post_mask); + if ((prev & post_mask) != 0) all_zero = false; + if ((prev & post_mask) != post_mask) any_zero = true; + } + if (pany_zero != NULL) *pany_zero = any_zero; + return all_zero; +} + + +// Returns `true` if all `count` bits were 1. +// `any_ones` is `true` if there was at least one bit set to one. +static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_ones) { + size_t idx = mi_bitmap_index_field(bitmap_idx); + uintptr_t pre_mask; + uintptr_t mid_mask; + uintptr_t post_mask; + size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask); + bool all_ones = true; + bool any_ones = false; + _Atomic(uintptr_t)* field = &bitmap[idx]; + uintptr_t prev = mi_atomic_load_relaxed(field++); + if ((prev & pre_mask) != pre_mask) all_ones = false; + if ((prev & pre_mask) != 0) any_ones = true; + while (mid_count-- > 0) { + prev = mi_atomic_load_relaxed(field++); + if ((prev & pre_mask) != pre_mask) all_ones = false; + if ((prev & pre_mask) != 0) any_ones = true; + } + if (post_mask!=0) { + prev = mi_atomic_load_relaxed(field); + if ((prev & pre_mask) != pre_mask) all_ones = false; + if ((prev & pre_mask) != 0) any_ones = true; + } + if (pany_ones != NULL) *pany_ones = any_ones; + return all_ones; +} + +bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + return mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, NULL); +} + +bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + bool any_ones; + mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, &any_ones); + return any_ones; +} diff --git a/src/bitmap.h b/src/bitmap.h new file mode 100644 index 00000000..f7819803 --- /dev/null +++ b/src/bitmap.h @@ -0,0 +1,102 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2019,2020 Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +/* ---------------------------------------------------------------------------- +Concurrent bitmap that can set/reset sequences of bits atomically, +represeted as an array of fields where each field is a machine word (`uintptr_t`) + +There are two api's; the standard one cannot have sequences that cross +between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS). +(this is used in region allocation) + +The `_across` postfixed functions do allow sequences that can cross over +between the fields. (This is used in arena allocation) +---------------------------------------------------------------------------- */ +#pragma once +#ifndef MI_BITMAP_H +#define MI_BITMAP_H + +/* ----------------------------------------------------------- + Bitmap definition +----------------------------------------------------------- */ + +#define MI_BITMAP_FIELD_BITS (8*MI_INTPTR_SIZE) +#define MI_BITMAP_FIELD_FULL (~((uintptr_t)0)) // all bits set + +// An atomic bitmap of `uintptr_t` fields +typedef _Atomic(uintptr_t) mi_bitmap_field_t; +typedef mi_bitmap_field_t* mi_bitmap_t; + +// A bitmap index is the index of the bit in a bitmap. +typedef size_t mi_bitmap_index_t; + +// Create a bit index. +static inline mi_bitmap_index_t mi_bitmap_index_create(size_t idx, size_t bitidx) { + mi_assert_internal(bitidx < MI_BITMAP_FIELD_BITS); + return (idx*MI_BITMAP_FIELD_BITS) + bitidx; +} + +// Get the field index from a bit index. +static inline size_t mi_bitmap_index_field(mi_bitmap_index_t bitmap_idx) { + return (bitmap_idx / MI_BITMAP_FIELD_BITS); +} + +// Get the bit index in a bitmap field +static inline size_t mi_bitmap_index_bit_in_field(mi_bitmap_index_t bitmap_idx) { + return (bitmap_idx % MI_BITMAP_FIELD_BITS); +} + +// Get the full bit index +static inline size_t mi_bitmap_index_bit(mi_bitmap_index_t bitmap_idx) { + return bitmap_idx; +} + +/* ----------------------------------------------------------- + Claim a bit sequence atomically +----------------------------------------------------------- */ + +// Try to atomically claim a sequence of `count` bits in a single +// field at `idx` in `bitmap`. Returns `true` on success. +bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx); + +// Starts at idx, and wraps around to search in all `bitmap_fields` fields. +// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields. +bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx); + +// Set `count` bits at `bitmap_idx` to 0 atomically +// Returns `true` if all `count` bits were 1 previously. +bool mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); + +// Set `count` bits at `bitmap_idx` to 1 atomically +// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. +bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero); + +bool _mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); +bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); + + +//-------------------------------------------------------------------------- +// the `_across` functions work on bitmaps where sequences can cross over +// between the fields. This is used in arena allocation +//-------------------------------------------------------------------------- + +// Find `count` bits of zeros and set them to 1 atomically; returns `true` on success. +// Starts at idx, and wraps around to search in all `bitmap_fields` fields. +bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx); + +// Set `count` bits at `bitmap_idx` to 0 atomically +// Returns `true` if all `count` bits were 1 previously. +bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); + +// Set `count` bits at `bitmap_idx` to 1 atomically +// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. +bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero); + +bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); +bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); + +#endif diff --git a/src/bitmap.inc.c b/src/bitmap.inc.c deleted file mode 100644 index c3813a44..00000000 --- a/src/bitmap.inc.c +++ /dev/null @@ -1,240 +0,0 @@ -/* ---------------------------------------------------------------------------- -Copyright (c) 2019, Microsoft Research, Daan Leijen -This is free software; you can redistribute it and/or modify it under the -terms of the MIT license. A copy of the license can be found in the file -"LICENSE" at the root of this distribution. ------------------------------------------------------------------------------*/ - -/* ---------------------------------------------------------------------------- -This file is meant to be included in other files for efficiency. -It implements a bitmap that can set/reset sequences of bits atomically -and is used to concurrently claim memory ranges. - -A bitmap is an array of fields where each field is a machine word (`uintptr_t`) - -A current limitation is that the bit sequences cannot cross fields -and that the sequence must be smaller or equal to the bits in a field. ----------------------------------------------------------------------------- */ -#pragma once -#ifndef MI_BITMAP_C -#define MI_BITMAP_C - -#include "mimalloc.h" -#include "mimalloc-internal.h" - -/* ----------------------------------------------------------- - Bitmap definition ------------------------------------------------------------ */ - -#define MI_BITMAP_FIELD_BITS (8*MI_INTPTR_SIZE) -#define MI_BITMAP_FIELD_FULL (~((uintptr_t)0)) // all bits set - -// An atomic bitmap of `uintptr_t` fields -typedef volatile _Atomic(uintptr_t) mi_bitmap_field_t; -typedef mi_bitmap_field_t* mi_bitmap_t; - -// A bitmap index is the index of the bit in a bitmap. -typedef size_t mi_bitmap_index_t; - -// Create a bit index. -static inline mi_bitmap_index_t mi_bitmap_index_create(size_t idx, size_t bitidx) { - mi_assert_internal(bitidx < MI_BITMAP_FIELD_BITS); - return (idx*MI_BITMAP_FIELD_BITS) + bitidx; -} - -// Get the field index from a bit index. -static inline size_t mi_bitmap_index_field(mi_bitmap_index_t bitmap_idx) { - return (bitmap_idx / MI_BITMAP_FIELD_BITS); -} - -// Get the bit index in a bitmap field -static inline size_t mi_bitmap_index_bit_in_field(mi_bitmap_index_t bitmap_idx) { - return (bitmap_idx % MI_BITMAP_FIELD_BITS); -} - -// Get the full bit index -static inline size_t mi_bitmap_index_bit(mi_bitmap_index_t bitmap_idx) { - return bitmap_idx; -} - - -// The bit mask for a given number of blocks at a specified bit index. -static inline uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) { - mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS); - if (count == MI_BITMAP_FIELD_BITS) return MI_BITMAP_FIELD_FULL; - return ((((uintptr_t)1 << count) - 1) << bitidx); -} - - -/* ----------------------------------------------------------- - Use bit scan forward/reverse to quickly find the first zero bit if it is available ------------------------------------------------------------ */ -#if defined(_MSC_VER) -#define MI_HAVE_BITSCAN -#include -static inline size_t mi_bsf(uintptr_t x) { - if (x==0) return 8*MI_INTPTR_SIZE; - DWORD idx; - MI_64(_BitScanForward)(&idx, x); - return idx; -} -static inline size_t mi_bsr(uintptr_t x) { - if (x==0) return 8*MI_INTPTR_SIZE; - DWORD idx; - MI_64(_BitScanReverse)(&idx, x); - return idx; -} -#elif defined(__GNUC__) || defined(__clang__) -#include // LONG_MAX -#define MI_HAVE_BITSCAN -#if (INTPTR_MAX == LONG_MAX) -# define MI_L(x) x##l -#else -# define MI_L(x) x##ll -#endif -static inline size_t mi_bsf(uintptr_t x) { - return (x==0 ? 8*MI_INTPTR_SIZE : MI_L(__builtin_ctz)(x)); -} -static inline size_t mi_bsr(uintptr_t x) { - return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - MI_L(__builtin_clz)(x)); -} -#endif - -/* ----------------------------------------------------------- - Claim a bit sequence atomically ------------------------------------------------------------ */ - -// Try to atomically claim a sequence of `count` bits at in `idx` -// in the bitmap field. Returns `true` on success. -static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t bitmap_fields, const size_t count, mi_bitmap_index_t bitmap_idx) { - const size_t idx = mi_bitmap_index_field(bitmap_idx); - const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); - const uintptr_t mask = mi_bitmap_mask_(count, bitidx); - mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); - mi_assert_internal(bitidx + count <= MI_BITMAP_FIELD_BITS); - - uintptr_t field = mi_atomic_read_relaxed(&bitmap[idx]); - if ((field & mask) == 0) { // free? - if (mi_atomic_cas_strong(&bitmap[idx], (field|mask), field)) { - // claimed! - return true; - } - } - return false; -} - - -// Try to atomically claim a sequence of `count` bits in a single -// field at `idx` in `bitmap`. Returns `true` on success. -static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx) -{ - mi_assert_internal(bitmap_idx != NULL); - volatile _Atomic(uintptr_t)* field = &bitmap[idx]; - uintptr_t map = mi_atomic_read(field); - if (map==MI_BITMAP_FIELD_FULL) return false; // short cut - - // search for 0-bit sequence of length count - const uintptr_t mask = mi_bitmap_mask_(count, 0); - const size_t bitidx_max = MI_BITMAP_FIELD_BITS - count; - -#ifdef MI_HAVE_BITSCAN - size_t bitidx = mi_bsf(~map); // quickly find the first zero bit if possible -#else - size_t bitidx = 0; // otherwise start at 0 -#endif - uintptr_t m = (mask << bitidx); // invariant: m == mask shifted by bitidx - - // scan linearly for a free range of zero bits - while (bitidx <= bitidx_max) { - if ((map & m) == 0) { // are the mask bits free at bitidx? - mi_assert_internal((m >> bitidx) == mask); // no overflow? - const uintptr_t newmap = map | m; - mi_assert_internal((newmap^map) >> bitidx == mask); - if (!mi_atomic_cas_weak(field, newmap, map)) { // TODO: use strong cas here? - // no success, another thread claimed concurrently.. keep going - map = mi_atomic_read(field); - continue; - } - else { - // success, we claimed the bits! - *bitmap_idx = mi_bitmap_index_create(idx, bitidx); - return true; - } - } - else { - // on to the next bit range -#ifdef MI_HAVE_BITSCAN - const size_t shift = (count == 1 ? 1 : mi_bsr(map & m) - bitidx + 1); - mi_assert_internal(shift > 0 && shift <= count); -#else - const size_t shift = 1; -#endif - bitidx += shift; - m <<= shift; - } - } - // no bits found - return false; -} - - -// Find `count` bits of 0 and set them to 1 atomically; returns `true` on success. -// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never span fields. -static inline bool mi_bitmap_try_find_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t* bitmap_idx) { - for (size_t idx = 0; idx < bitmap_fields; idx++) { - if (mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) { - return true; - } - } - return false; -} - -// Set `count` bits at `bitmap_idx` to 0 atomically -// Returns `true` if all `count` bits were 1 previously. -static inline bool mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { - const size_t idx = mi_bitmap_index_field(bitmap_idx); - const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); - const uintptr_t mask = mi_bitmap_mask_(count, bitidx); - mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); - // mi_assert_internal((bitmap[idx] & mask) == mask); - uintptr_t prev = mi_atomic_and(&bitmap[idx], ~mask); - return ((prev & mask) == mask); -} - - -// Set `count` bits at `bitmap_idx` to 1 atomically -// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. -static inline bool mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero) { - const size_t idx = mi_bitmap_index_field(bitmap_idx); - const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); - const uintptr_t mask = mi_bitmap_mask_(count, bitidx); - mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); - //mi_assert_internal(any_zero != NULL || (bitmap[idx] & mask) == 0); - uintptr_t prev = mi_atomic_or(&bitmap[idx], mask); - if (any_zero != NULL) *any_zero = ((prev & mask) != mask); - return ((prev & mask) == 0); -} - -// Returns `true` if all `count` bits were 1. `any_ones` is `true` if there was at least one bit set to one. -static inline bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_ones) { - const size_t idx = mi_bitmap_index_field(bitmap_idx); - const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); - const uintptr_t mask = mi_bitmap_mask_(count, bitidx); - mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); - uintptr_t field = mi_atomic_read_relaxed(&bitmap[idx]); - if (any_ones != NULL) *any_ones = ((field & mask) != 0); - return ((field & mask) == mask); -} - -static inline bool mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { - return mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, NULL); -} - -static inline bool mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { - bool any_ones; - mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, &any_ones); - return any_ones; -} - - -#endif diff --git a/src/heap.c b/src/heap.c index 5d0d4b8a..54562d10 100644 --- a/src/heap.c +++ b/src/heap.c @@ -11,6 +11,9 @@ terms of the MIT license. A copy of the license can be found in the file #include // memset, memcpy +#if defined(_MSC_VER) && (_MSC_VER < 1920) +#pragma warning(disable:4204) // non-constant aggregate initializer +#endif /* ----------------------------------------------------------- Helpers @@ -111,7 +114,7 @@ static bool mi_heap_page_never_delayed_free(mi_heap_t* heap, mi_page_queue_t* pq static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) { - if (!mi_heap_is_initialized(heap)) return; + if (heap==NULL || !mi_heap_is_initialized(heap)) return; _mi_deferred_free(heap, collect >= MI_FORCE); // note: never reclaim on collect but leave it to threads that need storage to reclaim @@ -128,7 +131,6 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) _mi_abandoned_reclaim_all(heap, &heap->tld->segments); } - // if abandoning, mark all pages to no longer add to delayed_free if (collect == MI_ABANDON) { mi_heap_visit_pages(heap, &mi_heap_page_never_delayed_free, NULL, NULL); @@ -143,19 +145,17 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) // collect all pages owned by this thread mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL); - mi_assert_internal( collect != MI_ABANDON || mi_atomic_read_ptr(mi_block_t,&heap->thread_delayed_free) == NULL ); + mi_assert_internal( collect != MI_ABANDON || mi_atomic_load_ptr_acquire(mi_block_t,&heap->thread_delayed_free) == NULL ); // collect segment caches if (collect >= MI_FORCE) { _mi_segment_thread_collect(&heap->tld->segments); } - #ifndef NDEBUG - // collect regions + // collect regions on program-exit (or shared library unload) if (collect >= MI_FORCE && _mi_is_main_thread() && mi_heap_is_backing(heap)) { _mi_mem_collect(&heap->tld->os); } - #endif } void _mi_heap_collect_abandon(mi_heap_t* heap) { @@ -213,6 +213,7 @@ uintptr_t _mi_heap_random_next(mi_heap_t* heap) { // zero out the page queues static void mi_heap_reset_pages(mi_heap_t* heap) { + mi_assert_internal(heap != NULL); mi_assert_internal(mi_heap_is_initialized(heap)); // TODO: copy full empty heap instead? memset(&heap->pages_free_direct, 0, sizeof(heap->pages_free_direct)); @@ -228,6 +229,7 @@ static void mi_heap_reset_pages(mi_heap_t* heap) { static void mi_heap_free(mi_heap_t* heap) { mi_assert(heap != NULL); mi_assert_internal(mi_heap_is_initialized(heap)); + if (heap==NULL || !mi_heap_is_initialized(heap)) return; if (mi_heap_is_backing(heap)) return; // dont free the backing heap // reset default @@ -272,17 +274,20 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_ const size_t bsize = mi_page_block_size(page); if (bsize > MI_LARGE_OBJ_SIZE_MAX) { if (bsize > MI_HUGE_OBJ_SIZE_MAX) { - _mi_stat_decrease(&heap->tld->stats.giant, bsize); + mi_heap_stat_decrease(heap, giant, bsize); } else { - _mi_stat_decrease(&heap->tld->stats.huge, bsize); + mi_heap_stat_decrease(heap, huge, bsize); } } -#if (MI_STAT>1) +#if (MI_STAT) _mi_page_free_collect(page, false); // update used count const size_t inuse = page->used; if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { - mi_heap_stat_decrease(heap, normal[_mi_bin(bsize)], inuse); + mi_heap_stat_decrease(heap, normal, bsize * inuse); +#if (MI_STAT>1) + mi_heap_stat_decrease(heap, normal_bins[_mi_bin(bsize)], inuse); +#endif } mi_heap_stat_decrease(heap, malloc, bsize * inuse); // todo: off for aligned blocks... #endif @@ -310,7 +315,7 @@ void mi_heap_destroy(mi_heap_t* heap) { mi_assert(mi_heap_is_initialized(heap)); mi_assert(heap->no_reclaim); mi_assert_expensive(mi_heap_is_valid(heap)); - if (!mi_heap_is_initialized(heap)) return; + if (heap==NULL || !mi_heap_is_initialized(heap)) return; if (!heap->no_reclaim) { // don't free in case it may contain reclaimed pages mi_heap_delete(heap); @@ -354,7 +359,7 @@ static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) { // turns out to be ok as `_mi_heap_delayed_free` only visits the list and calls a // the regular `_mi_free_delayed_block` which is safe. _mi_heap_delayed_free(from); - mi_assert_internal(from->thread_delayed_free == NULL); + mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_block_t,&from->thread_delayed_free) == NULL); // and reset the `from` heap mi_heap_reset_pages(from); @@ -366,7 +371,7 @@ void mi_heap_delete(mi_heap_t* heap) mi_assert(heap != NULL); mi_assert(mi_heap_is_initialized(heap)); mi_assert_expensive(mi_heap_is_valid(heap)); - if (!mi_heap_is_initialized(heap)) return; + if (heap==NULL || !mi_heap_is_initialized(heap)) return; if (!mi_heap_is_backing(heap)) { // tranfer still used pages to the backing heap @@ -381,8 +386,9 @@ void mi_heap_delete(mi_heap_t* heap) } mi_heap_t* mi_heap_set_default(mi_heap_t* heap) { + mi_assert(heap != NULL); mi_assert(mi_heap_is_initialized(heap)); - if (!mi_heap_is_initialized(heap)) return NULL; + if (heap==NULL || !mi_heap_is_initialized(heap)) return NULL; mi_assert_expensive(mi_heap_is_valid(heap)); mi_heap_t* old = mi_get_default_heap(); _mi_heap_set_default_direct(heap); @@ -408,7 +414,7 @@ static mi_heap_t* mi_heap_of_block(const void* p) { bool mi_heap_contains_block(mi_heap_t* heap, const void* p) { mi_assert(heap != NULL); - if (!mi_heap_is_initialized(heap)) return false; + if (heap==NULL || !mi_heap_is_initialized(heap)) return false; return (heap == mi_heap_of_block(p)); } @@ -426,7 +432,7 @@ static bool mi_heap_page_check_owned(mi_heap_t* heap, mi_page_queue_t* pq, mi_pa bool mi_heap_check_owned(mi_heap_t* heap, const void* p) { mi_assert(heap != NULL); - if (!mi_heap_is_initialized(heap)) return false; + if (heap==NULL || !mi_heap_is_initialized(heap)) return false; if (((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0) return false; // only aligned pointers bool found = false; mi_heap_visit_pages(heap, &mi_heap_page_check_owned, (void*)p, &found); diff --git a/src/init.c b/src/init.c index 132043e8..6948039f 100644 --- a/src/init.c +++ b/src/init.c @@ -73,8 +73,8 @@ const mi_page_t _mi_page_empty = { MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ - MI_STAT_COUNT_NULL(), \ - { 0, 0 }, { 0, 0 }, { 0, 0 }, \ + MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } \ MI_STAT_COUNT_END_NULL() @@ -105,10 +105,6 @@ const mi_heap_t _mi_heap_empty = { // the thread-local default heap for allocation mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; - -#define tld_main_stats ((mi_stats_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,stats))) -#define tld_main_os ((mi_os_tld_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,os))) - extern mi_heap_t _mi_heap_main; static mi_tld_t tld_main = { @@ -116,9 +112,9 @@ static mi_tld_t tld_main = { &_mi_heap_main, &_mi_heap_main, { { NULL, NULL }, {NULL ,NULL}, {NULL ,NULL, 0}, 0, 0, 0, 0, 0, 0, NULL, - tld_main_stats, tld_main_os + &tld_main.stats, &tld_main.os }, // segments - { 0, tld_main_stats }, // os + { 0, &tld_main.stats }, // os { MI_STATS_NULL } // stats }; @@ -180,10 +176,15 @@ static bool _mi_heap_init(void) { } else { // use `_mi_os_alloc` to allocate directly from the OS - mi_thread_data_t* td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t),&_mi_stats_main); // Todo: more efficient allocation? + mi_thread_data_t* td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &_mi_stats_main); // Todo: more efficient allocation? if (td == NULL) { - _mi_error_message(ENOMEM, "failed to allocate thread local heap memory\n"); - return false; + // if this fails, try once more. (issue #257) + td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &_mi_stats_main); + if (td == NULL) { + // really out of memory + _mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t)); + return false; + } } // OS allocated so already zero initialized mi_tld_t* tld = &td->tld; @@ -200,7 +201,7 @@ static bool _mi_heap_init(void) { tld->segments.stats = &tld->stats; tld->segments.os = &tld->os; tld->os.stats = &tld->stats; - _mi_heap_set_default_direct(heap); + _mi_heap_set_default_direct(heap); } return false; } @@ -234,9 +235,8 @@ static bool _mi_heap_done(mi_heap_t* heap) { _mi_heap_collect_abandon(heap); } - // merge stats - _mi_stats_done(&heap->tld->stats); + _mi_stats_done(&heap->tld->stats); // free if not the main thread if (heap != &_mi_heap_main) { @@ -284,7 +284,7 @@ static void _mi_thread_done(mi_heap_t* default_heap); // nothing to do as it is done in DllMain #elif defined(_WIN32) && !defined(MI_SHARED_LIB) // use thread local storage keys to detect thread ending - #include + #include #include #if (_WIN32_WINNT < 0x600) // before Windows Vista WINBASEAPI DWORD WINAPI FlsAlloc( _In_opt_ PFLS_CALLBACK_FUNCTION lpCallback ); @@ -336,17 +336,13 @@ void mi_thread_init(void) mi_attr_noexcept { // ensure our process has started already mi_process_init(); - + // initialize the thread local default heap // (this will call `_mi_heap_set_default_direct` and thus set the // fiber/pthread key to a non-zero value, ensuring `_mi_thread_done` is called) if (_mi_heap_init()) return; // returns true if already initialized - // don't further initialize for the main thread - if (_mi_is_main_thread()) return; - - _mi_stat_increase(&mi_get_default_heap()->tld->stats.threads, 1); - + _mi_stat_increase(&_mi_stats_main.threads, 1); //_mi_verbose_message("thread init: 0x%zx\n", _mi_thread_id()); } @@ -355,14 +351,11 @@ void mi_thread_done(void) mi_attr_noexcept { } static void _mi_thread_done(mi_heap_t* heap) { + _mi_stat_decrease(&_mi_stats_main.threads, 1); + // check thread-id as on Windows shutdown with FLS the main (exit) thread may call this on thread-local heaps... if (heap->thread_id != _mi_thread_id()) return; - - // stats - if (!_mi_is_main_thread() && mi_heap_is_initialized(heap)) { - _mi_stat_decrease(&heap->tld->stats.threads, 1); - } - + // abandon the thread local heap if (_mi_heap_done(heap)) return; // returns true if already ran } @@ -403,11 +396,11 @@ static bool os_preloading = true; // true until this module is initialized static bool mi_redirected = false; // true if malloc redirects to mi_malloc // Returns true if this module has not been initialized; Don't use C runtime routines until it returns false. -bool _mi_preloading() { +bool _mi_preloading(void) { return os_preloading; } -bool mi_is_redirected() mi_attr_noexcept { +bool mi_is_redirected(void) mi_attr_noexcept { return mi_redirected; } @@ -429,7 +422,7 @@ mi_decl_export void _mi_redirect_entry(DWORD reason) { } } __declspec(dllimport) bool mi_allocator_init(const char** message); -__declspec(dllimport) void mi_allocator_done(); +__declspec(dllimport) void mi_allocator_done(void); #ifdef __cplusplus } #endif @@ -438,7 +431,7 @@ static bool mi_allocator_init(const char** message) { if (message != NULL) *message = NULL; return true; } -static void mi_allocator_done() { +static void mi_allocator_done(void) { // nothing to do } #endif @@ -485,6 +478,10 @@ void mi_process_init(void) mi_attr_noexcept { if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { size_t pages = mi_option_get(mi_option_reserve_huge_os_pages); mi_reserve_huge_os_pages_interleave(pages, 0, pages*500); + } + if (mi_option_is_enabled(mi_option_reserve_os_memory)) { + long ksize = mi_option_get(mi_option_reserve_os_memory); + if (ksize > 0) mi_reserve_os_memory((size_t)ksize*KiB, true, true); } } @@ -501,11 +498,15 @@ static void mi_process_done(void) { FlsSetValue(mi_fls_key, NULL); // don't call main-thread callback FlsFree(mi_fls_key); // call thread-done on all threads to prevent dangling callback pointer if statically linked with a DLL; Issue #208 #endif - #ifndef NDEBUG - mi_collect(true); + + #if (MI_DEBUG != 0) || !defined(MI_SHARED_LIB) + // free all memory if possible on process exit. This is not needed for a stand-alone process + // but should be done if mimalloc is statically linked into another shared library which + // is repeatedly loaded/unloaded, see issue #281. + mi_collect(true /* force */ ); #endif - if (mi_option_is_enabled(mi_option_show_stats) || - mi_option_is_enabled(mi_option_verbose)) { + + if (mi_option_is_enabled(mi_option_show_stats) || mi_option_is_enabled(mi_option_verbose)) { mi_stats_print(NULL); } mi_allocator_done(); diff --git a/src/options.c b/src/options.c index 1a4633ee..c489c65e 100644 --- a/src/options.c +++ b/src/options.c @@ -14,7 +14,13 @@ terms of the MIT license. A copy of the license can be found in the file #include // toupper #include -static uintptr_t mi_max_error_count = 16; // stop outputting errors after this +#ifdef _MSC_VER +#pragma warning(disable:4996) // strncpy, strncat +#endif + + +static uintptr_t mi_max_error_count = 16; // stop outputting errors after this +static uintptr_t mi_max_warning_count = 16; // stop outputting warnings after this static void mi_add_stderr_output(); @@ -60,7 +66,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(verbose) }, // the following options are experimental and not all combinations make sense. - { 1, UNINIT, MI_OPTION(eager_commit) }, // commit on demand + { 1, UNINIT, MI_OPTION(eager_commit) }, // commit per segment directly (4MiB) (but see also `eager_commit_delay`) #if defined(_WIN32) || (MI_INTPTR_SIZE <= 4) // and other OS's without overcommit? { 0, UNINIT, MI_OPTION(eager_region_commit) }, { 1, UNINIT, MI_OPTION(reset_decommits) }, // reset decommits memory @@ -69,7 +75,8 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(reset_decommits) }, // reset uses MADV_FREE/MADV_DONTNEED #endif { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's - { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, + { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, // per 1GiB huge pages + { 0, UNINIT, MI_OPTION(reserve_os_memory) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread { 1, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free { 0, UNINIT, MI_OPTION(abandoned_page_reset) },// reset free page memory when a thread terminates @@ -77,12 +84,15 @@ static mi_option_desc_t options[_mi_option_last] = #if defined(__NetBSD__) { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed #else - { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed + { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand) #endif { 100, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. + { 0, UNINIT, MI_OPTION(limit_os_alloc) }, // 1 = do not use OS memory for allocation (but only reserved arenas) { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose - { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output + { 16, UNINIT, MI_OPTION(max_errors) }, // maximum errors that are output + { 16, UNINIT, MI_OPTION(max_warnings) } // maximum warnings that are output + }; static void mi_option_init(mi_option_desc_t* desc); @@ -100,6 +110,7 @@ void _mi_options_init(void) { } } mi_max_error_count = mi_option_get(mi_option_max_errors); + mi_max_warning_count = mi_option_get(mi_option_max_warnings); } long mi_option_get(mi_option_t option) { @@ -165,7 +176,7 @@ static void mi_out_stderr(const char* msg, void* arg) { // an output function is registered it is called immediately with // the output up to that point. #ifndef MI_MAX_DELAY_OUTPUT -#define MI_MAX_DELAY_OUTPUT (32*1024) +#define MI_MAX_DELAY_OUTPUT ((uintptr_t)(32*1024)) #endif static char out_buf[MI_MAX_DELAY_OUTPUT+1]; static _Atomic(uintptr_t) out_len; @@ -173,11 +184,11 @@ static _Atomic(uintptr_t) out_len; static void mi_out_buf(const char* msg, void* arg) { UNUSED(arg); if (msg==NULL) return; - if (mi_atomic_read_relaxed(&out_len)>=MI_MAX_DELAY_OUTPUT) return; + if (mi_atomic_load_relaxed(&out_len)>=MI_MAX_DELAY_OUTPUT) return; size_t n = strlen(msg); if (n==0) return; // claim space - uintptr_t start = mi_atomic_add(&out_len, n); + uintptr_t start = mi_atomic_add_acq_rel(&out_len, n); if (start >= MI_MAX_DELAY_OUTPUT) return; // check bound if (start+n >= MI_MAX_DELAY_OUTPUT) { @@ -189,7 +200,7 @@ static void mi_out_buf(const char* msg, void* arg) { static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf, void* arg) { if (out==NULL) return; // claim (if `no_more_buf == true`, no more output will be added after this point) - size_t count = mi_atomic_add(&out_len, (no_more_buf ? MI_MAX_DELAY_OUTPUT : 1)); + size_t count = mi_atomic_add_acq_rel(&out_len, (no_more_buf ? MI_MAX_DELAY_OUTPUT : 1)); // and output the current contents if (count>MI_MAX_DELAY_OUTPUT) count = MI_MAX_DELAY_OUTPUT; out_buf[count] = 0; @@ -215,19 +226,18 @@ static void mi_out_buf_stderr(const char* msg, void* arg) { // Should be atomic but gives errors on many platforms as generally we cannot cast a function pointer to a uintptr_t. // For now, don't register output from multiple threads. -#pragma warning(suppress:4180) static mi_output_fun* volatile mi_out_default; // = NULL -static volatile _Atomic(void*) mi_out_arg; // = NULL +static _Atomic(void*) mi_out_arg; // = NULL static mi_output_fun* mi_out_get_default(void** parg) { - if (parg != NULL) { *parg = mi_atomic_read_ptr(void,&mi_out_arg); } + if (parg != NULL) { *parg = mi_atomic_load_ptr_acquire(void,&mi_out_arg); } mi_output_fun* out = mi_out_default; return (out == NULL ? &mi_out_buf : out); } void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept { mi_out_default = (out == NULL ? &mi_out_stderr : out); // stop using the delayed output buffer - mi_atomic_write_ptr(void,&mi_out_arg, arg); + mi_atomic_store_ptr_release(void,&mi_out_arg, arg); if (out!=NULL) mi_out_buf_flush(out,true,arg); // output all the delayed output now } @@ -241,14 +251,15 @@ static void mi_add_stderr_output() { // -------------------------------------------------------- // Messages, all end up calling `_mi_fputs`. // -------------------------------------------------------- -static volatile _Atomic(uintptr_t) error_count; // = 0; // when MAX_ERROR_COUNT stop emitting errors and warnings +static _Atomic(uintptr_t) error_count; // = 0; // when >= max_error_count stop emitting errors +static _Atomic(uintptr_t) warning_count; // = 0; // when >= max_warning_count stop emitting warnings // When overriding malloc, we may recurse into mi_vfprintf if an allocation // inside the C runtime causes another message. static mi_decl_thread bool recurse = false; static bool mi_recurse_enter(void) { - #ifdef MI_TLS_RECURSE_GUARD + #if defined(__MACH__) || defined(MI_TLS_RECURSE_GUARD) if (_mi_preloading()) return true; #endif if (recurse) return false; @@ -257,7 +268,7 @@ static bool mi_recurse_enter(void) { } static void mi_recurse_exit(void) { - #ifdef MI_TLS_RECURSE_GUARD + #if defined(__MACH__) || defined(MI_TLS_RECURSE_GUARD) if (_mi_preloading()) return; #endif recurse = false; @@ -313,13 +324,13 @@ void _mi_verbose_message(const char* fmt, ...) { static void mi_show_error_message(const char* fmt, va_list args) { if (!mi_option_is_enabled(mi_option_show_errors) && !mi_option_is_enabled(mi_option_verbose)) return; - if (mi_atomic_increment(&error_count) > mi_max_error_count) return; + if (mi_atomic_increment_acq_rel(&error_count) > mi_max_error_count) return; mi_vfprintf(NULL, NULL, "mimalloc: error: ", fmt, args); } void _mi_warning_message(const char* fmt, ...) { if (!mi_option_is_enabled(mi_option_show_errors) && !mi_option_is_enabled(mi_option_verbose)) return; - if (mi_atomic_increment(&error_count) > mi_max_error_count) return; + if (mi_atomic_increment_acq_rel(&warning_count) > mi_max_warning_count) return; va_list args; va_start(args,fmt); mi_vfprintf(NULL, NULL, "mimalloc: warning: ", fmt, args); @@ -339,7 +350,7 @@ void _mi_assert_fail(const char* assertion, const char* fname, unsigned line, co // -------------------------------------------------------- static mi_error_fun* volatile mi_error_handler; // = NULL -static volatile _Atomic(void*) mi_error_arg; // = NULL +static _Atomic(void*) mi_error_arg; // = NULL static void mi_error_default(int err) { UNUSED(err); @@ -365,7 +376,7 @@ static void mi_error_default(int err) { void mi_register_error(mi_error_fun* fun, void* arg) { mi_error_handler = fun; // can be NULL - mi_atomic_write_ptr(void,&mi_error_arg, arg); + mi_atomic_store_ptr_release(void,&mi_error_arg, arg); } void _mi_error_message(int err, const char* fmt, ...) { @@ -376,7 +387,7 @@ void _mi_error_message(int err, const char* fmt, ...) { va_end(args); // and call the error handler which may abort (or return normally) if (mi_error_handler != NULL) { - mi_error_handler(err, mi_atomic_read_ptr(void,&mi_error_arg)); + mi_error_handler(err, mi_atomic_load_ptr_acquire(void,&mi_error_arg)); } else { mi_error_default(err); @@ -389,33 +400,73 @@ void _mi_error_message(int err, const char* fmt, ...) { static void mi_strlcpy(char* dest, const char* src, size_t dest_size) { dest[0] = 0; - #pragma warning(suppress:4996) strncpy(dest, src, dest_size - 1); dest[dest_size - 1] = 0; } static void mi_strlcat(char* dest, const char* src, size_t dest_size) { - #pragma warning(suppress:4996) strncat(dest, src, dest_size - 1); dest[dest_size - 1] = 0; } +static inline int mi_strnicmp(const char* s, const char* t, size_t n) { + if (n==0) return 0; + for (; *s != 0 && *t != 0 && n > 0; s++, t++, n--) { + if (toupper(*s) != toupper(*t)) break; + } + return (n==0 ? 0 : *s - *t); +} + #if defined _WIN32 // On Windows use GetEnvironmentVariable instead of getenv to work // reliably even when this is invoked before the C runtime is initialized. // i.e. when `_mi_preloading() == true`. // Note: on windows, environment names are not case sensitive. -#include +#include static bool mi_getenv(const char* name, char* result, size_t result_size) { result[0] = 0; size_t len = GetEnvironmentVariableA(name, result, (DWORD)result_size); return (len > 0 && len < result_size); } -#else +#elif !defined(MI_USE_ENVIRON) || (MI_USE_ENVIRON!=0) +// On Posix systemsr use `environ` to acces environment variables +// even before the C runtime is initialized. +#if defined(__APPLE__) && defined(__has_include) && __has_include() +#include +static char** mi_get_environ(void) { + return (*_NSGetEnviron()); +} +#else +extern char** environ; +static char** mi_get_environ(void) { + return environ; +} +#endif static bool mi_getenv(const char* name, char* result, size_t result_size) { + if (name==NULL) return false; + const size_t len = strlen(name); + if (len == 0) return false; + char** env = mi_get_environ(); + if (env == NULL) return false; + // compare up to 256 entries + for (int i = 0; i < 256 && env[i] != NULL; i++) { + const char* s = env[i]; + if (mi_strnicmp(name, s, len) == 0 && s[len] == '=') { // case insensitive + // found it + mi_strlcpy(result, s + len + 1, result_size); + return true; + } + } + return false; +} +#else +// fallback: use standard C `getenv` but this cannot be used while initializing the C runtime +static bool mi_getenv(const char* name, char* result, size_t result_size) { + // cannot call getenv() when still initializing the C runtime. + if (_mi_preloading()) return false; const char* s = getenv(name); if (s == NULL) { - // in unix environments we check the upper case name too. + // we check the upper case name too. char buf[64+1]; size_t len = strlen(name); if (len >= sizeof(buf)) len = sizeof(buf) - 1; @@ -434,11 +485,8 @@ static bool mi_getenv(const char* name, char* result, size_t result_size) { } } #endif -static void mi_option_init(mi_option_desc_t* desc) { - #ifndef _WIN32 - // cannot call getenv() when still initializing the C runtime. - if (_mi_preloading()) return; - #endif + +static void mi_option_init(mi_option_desc_t* desc) { // Read option value from the environment char buf[64+1]; mi_strlcpy(buf, "mimalloc_", sizeof(buf)); @@ -462,6 +510,14 @@ static void mi_option_init(mi_option_desc_t* desc) { else { char* end = buf; long value = strtol(buf, &end, 10); + if (desc->option == mi_option_reserve_os_memory) { + // this option is interpreted in KiB to prevent overflow of `long` + if (*end == 'K') { end++; } + else if (*end == 'M') { value *= KiB; end++; } + else if (*end == 'G') { value *= MiB; end++; } + else { value = (value + KiB - 1) / KiB; } + if (*end == 'B') { end++; } + } if (*end == 0) { desc->value = value; desc->init = INITIALIZED; @@ -471,9 +527,9 @@ static void mi_option_init(mi_option_desc_t* desc) { desc->init = DEFAULTED; } } + mi_assert_internal(desc->init != UNINIT); } - else { + else if (!_mi_preloading()) { desc->init = DEFAULTED; } - mi_assert_internal(desc->init != UNINIT); } diff --git a/src/os.c b/src/os.c index f33cfbc3..e2c93d72 100644 --- a/src/os.c +++ b/src/os.c @@ -8,27 +8,51 @@ terms of the MIT license. A copy of the license can be found in the file #define _DEFAULT_SOURCE // ensure mmap flags are defined #endif +#if defined(__sun) +// illumos provides new mman.h api when any of these are defined +// otherwise the old api based on caddr_t which predates the void pointers one. +// stock solaris provides only the former, chose to atomically to discard those +// flags only here rather than project wide tough. +#undef _XOPEN_SOURCE +#undef _POSIX_C_SOURCE +#endif #include "mimalloc.h" #include "mimalloc-internal.h" #include "mimalloc-atomic.h" #include // strerror +#ifdef _MSC_VER +#pragma warning(disable:4996) // strerror +#endif + #if defined(_WIN32) -#include +#include #elif defined(__wasi__) // stdlib.h is all we need, and has already been included in mimalloc.h #else #include // mmap #include // sysconf #if defined(__linux__) +#include +#if defined(__GLIBC__) #include // linux mmap flags +#else +#include +#endif #endif #if defined(__APPLE__) +#include +#if !TARGET_IOS_IPHONE && !TARGET_IOS_SIMULATOR #include #endif #endif +#if defined(__HAIKU__) +#define madvise posix_madvise +#define MADV_DONTNEED POSIX_MADV_DONTNEED +#endif +#endif /* ----------------------------------------------------------- Initialization. @@ -90,6 +114,7 @@ size_t _mi_os_good_alloc_size(size_t size) { // We use VirtualAlloc2 for aligned allocation, but it is only supported on Windows 10 and Windows Server 2016. // So, we need to look it up dynamically to run on older systems. (use __stdcall for 32-bit compatibility) // NtAllocateVirtualAllocEx is used for huge OS page allocation (1GiB) +// // We hide MEM_EXTENDED_PARAMETER to compile with older SDK's. #include typedef PVOID (__stdcall *PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ void*, ULONG); @@ -97,6 +122,17 @@ typedef NTSTATUS (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T* static PVirtualAlloc2 pVirtualAlloc2 = NULL; static PNtAllocateVirtualMemoryEx pNtAllocateVirtualMemoryEx = NULL; +// Similarly, GetNumaProcesorNodeEx is only supported since Windows 7 +#if (_WIN32_WINNT < 0x601) // before Win7 +typedef struct _PROCESSOR_NUMBER { WORD Group; BYTE Number; BYTE Reserved; } PROCESSOR_NUMBER, *PPROCESSOR_NUMBER; +#endif +typedef VOID (__stdcall *PGetCurrentProcessorNumberEx)(PPROCESSOR_NUMBER ProcNumber); +typedef BOOL (__stdcall *PGetNumaProcessorNodeEx)(PPROCESSOR_NUMBER Processor, PUSHORT NodeNumber); +typedef BOOL (__stdcall* PGetNumaNodeProcessorMaskEx)(USHORT Node, PGROUP_AFFINITY ProcessorMask); +static PGetCurrentProcessorNumberEx pGetCurrentProcessorNumberEx = NULL; +static PGetNumaProcessorNodeEx pGetNumaProcessorNodeEx = NULL; +static PGetNumaNodeProcessorMaskEx pGetNumaNodeProcessorMaskEx = NULL; + static bool mi_win_enable_large_os_pages() { if (large_os_page_size > 0) return true; @@ -147,11 +183,20 @@ void _mi_os_init(void) { if (pVirtualAlloc2==NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2"); FreeLibrary(hDll); } + // NtAllocateVirtualMemoryEx is used for huge page allocation hDll = LoadLibrary(TEXT("ntdll.dll")); if (hDll != NULL) { pNtAllocateVirtualMemoryEx = (PNtAllocateVirtualMemoryEx)(void (*)(void))GetProcAddress(hDll, "NtAllocateVirtualMemoryEx"); FreeLibrary(hDll); } + // Try to use Win7+ numa API + hDll = LoadLibrary(TEXT("kernel32.dll")); + if (hDll != NULL) { + pGetCurrentProcessorNumberEx = (PGetCurrentProcessorNumberEx)(void (*)(void))GetProcAddress(hDll, "GetCurrentProcessorNumberEx"); + pGetNumaProcessorNodeEx = (PGetNumaProcessorNodeEx)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNodeEx"); + pGetNumaNodeProcessorMaskEx = (PGetNumaNodeProcessorMaskEx)(void (*)(void))GetProcAddress(hDll, "GetNumaNodeProcessorMaskEx"); + FreeLibrary(hDll); + } if (mi_option_is_enabled(mi_option_large_os_pages) || mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { mi_win_enable_large_os_pages(); } @@ -192,7 +237,6 @@ static bool mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats if (was_committed) _mi_stat_decrease(&stats->committed, size); _mi_stat_decrease(&stats->reserved, size); if (err) { - #pragma warning(suppress:4996) _mi_warning_message("munmap failed: %s, addr 0x%8li, size %lu\n", strerror(errno), (size_t)addr, size); return false; } @@ -236,15 +280,15 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DWORD flags, bool large_only, bool allow_large, bool* is_large) { mi_assert_internal(!(large_only && !allow_large)); - static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0; + static _Atomic(uintptr_t) large_page_try_ok; // = 0; void* p = NULL; if ((large_only || use_large_os_page(size, try_alignment)) && allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0) { - uintptr_t try_ok = mi_atomic_read(&large_page_try_ok); + uintptr_t try_ok = mi_atomic_load_acquire(&large_page_try_ok); if (!large_only && try_ok > 0) { // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive. // therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times. - mi_atomic_cas_weak(&large_page_try_ok, try_ok - 1, try_ok); + mi_atomic_cas_strong_acq_rel(&large_page_try_ok, &try_ok, try_ok - 1); } else { // large OS pages must always reserve and commit. @@ -253,7 +297,7 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, if (large_only) return p; // fall back to non-large page allocation on error (`p == NULL`). if (p == NULL) { - mi_atomic_write(&large_page_try_ok,10); // on error, don't try again for the next N allocations + mi_atomic_store_release(&large_page_try_ok,10UL); // on error, don't try again for the next N allocations } } } @@ -262,7 +306,7 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, p = mi_win_virtual_allocx(addr, size, try_alignment, flags); } if (p == NULL) { - _mi_warning_message("unable to allocate memory: error code: %i, addr: %p, size: 0x%x, large only: %d, allow_large: %d\n", GetLastError(), addr, size, large_only, allow_large); + _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: %i, address: %p, large only: %d, allow large: %d)\n", size, GetLastError(), addr, large_only, allow_large); } return p; } @@ -314,7 +358,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro int fd = -1; #if defined(MAP_ALIGNED) // BSD if (try_alignment > 0) { - size_t n = _mi_bsr(try_alignment); + size_t n = mi_bsr(try_alignment); if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) { // alignment is a power of 2 and 4096 <= alignment <= 1GiB flags |= MAP_ALIGNED(n); } @@ -330,14 +374,14 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro fd = VM_MAKE_TAG(os_tag); #endif if ((large_only || use_large_os_page(size, try_alignment)) && allow_large) { - static volatile _Atomic(uintptr_t) large_page_try_ok; // = 0; - uintptr_t try_ok = mi_atomic_read(&large_page_try_ok); + static _Atomic(uintptr_t) large_page_try_ok; // = 0; + uintptr_t try_ok = mi_atomic_load_acquire(&large_page_try_ok); if (!large_only && try_ok > 0) { // If the OS is not configured for large OS pages, or the user does not have // enough permission, the `mmap` will always fail (but it might also fail for other reasons). // Therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times // to avoid too many failing calls to mmap. - mi_atomic_cas_weak(&large_page_try_ok, try_ok - 1, try_ok); + mi_atomic_cas_strong_acq_rel(&large_page_try_ok, &try_ok, try_ok - 1); } else { int lflags = flags & ~MAP_NORESERVE; // using NORESERVE on huge pages seems to fail on Linux @@ -377,7 +421,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro #endif if (large_only) return p; if (p == NULL) { - mi_atomic_write(&large_page_try_ok, 10); // on error, don't try again for the next N allocations + mi_atomic_store_release(&large_page_try_ok, (uintptr_t)10); // on error, don't try again for the next N allocations } } } @@ -391,13 +435,26 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro // though since properly aligned allocations will already use large pages if available // in that case -- in particular for our large regions (in `memory.c`). // However, some systems only allow THP if called with explicit `madvise`, so - // when large OS pages are enabled for mimalloc, we call `madvice` anyways. + // when large OS pages are enabled for mimalloc, we call `madvise` anyways. if (allow_large && use_large_os_page(size, try_alignment)) { if (madvise(p, size, MADV_HUGEPAGE) == 0) { *is_large = true; // possibly }; } #endif + #if defined(__sun) + if (allow_large && use_large_os_page(size, try_alignment)) { + struct memcntl_mha cmd = {0}; + cmd.mha_pagesize = large_os_page_size; + cmd.mha_cmd = MHA_MAPSIZE_VA; + if (memcntl(p, size, MC_HAT_ADVISE, (caddr_t)&cmd, 0, 0) == 0) { + *is_large = true; + } + } + #endif + } + if (p == NULL) { + _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: %i, address: %p, large only: %d, allow large: %d)\n", size, errno, addr, large_only, allow_large); } return p; } @@ -406,21 +463,22 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro // On 64-bit systems, we can do efficient aligned allocation by using // the 4TiB to 30TiB area to allocate them. #if (MI_INTPTR_SIZE >= 8) && (defined(_WIN32) || (defined(MI_OS_USE_MMAP) && !defined(MAP_ALIGNED))) -static volatile mi_decl_cache_align _Atomic(uintptr_t) aligned_base; +static mi_decl_cache_align _Atomic(uintptr_t) aligned_base; // Return a 4MiB aligned address that is probably available static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { if (try_alignment == 0 || try_alignment > MI_SEGMENT_SIZE) return NULL; if ((size%MI_SEGMENT_SIZE) != 0) return NULL; - uintptr_t hint = mi_atomic_add(&aligned_base, size); + uintptr_t hint = mi_atomic_add_acq_rel(&aligned_base, size); if (hint == 0 || hint > ((intptr_t)30<<40)) { // try to wrap around after 30TiB (area after 32TiB is used for huge OS pages) uintptr_t init = ((uintptr_t)4 << 40); // start at 4TiB area #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode uintptr_t r = _mi_heap_random_next(mi_get_default_heap()); init = init + (MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)); // (randomly 20 bits)*4MiB == 0 to 4TiB #endif - mi_atomic_cas_strong(&aligned_base, init, hint + size); - hint = mi_atomic_add(&aligned_base, size); // this may still give 0 or > 30TiB but that is ok, it is a hint after all + uintptr_t expected = hint + size; + mi_atomic_cas_strong_acq_rel(&aligned_base, &expected, init); + hint = mi_atomic_add_acq_rel(&aligned_base, size); // this may still give 0 or > 30TiB but that is ok, it is a hint after all } if (hint%try_alignment != 0) return NULL; return (void*)hint; @@ -544,14 +602,18 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, OS API: alloc, free, alloc_aligned ----------------------------------------------------------- */ -void* _mi_os_alloc(size_t size, mi_stats_t* stats) { +void* _mi_os_alloc(size_t size, mi_stats_t* tld_stats) { + UNUSED(tld_stats); + mi_stats_t* stats = &_mi_stats_main; if (size == 0) return NULL; size = _mi_os_good_alloc_size(size); bool is_large = false; return mi_os_mem_alloc(size, 0, true, false, &is_large, stats); } -void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats) { +void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* tld_stats) { + UNUSED(tld_stats); + mi_stats_t* stats = &_mi_stats_main; if (size == 0 || p == NULL) return; size = _mi_os_good_alloc_size(size); mi_os_mem_free(p, size, was_committed, stats); @@ -561,8 +623,9 @@ void _mi_os_free(void* p, size_t size, mi_stats_t* stats) { _mi_os_free_ex(p, size, true, stats); } -void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld) +void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_stats_t* tld_stats) { + UNUSED(tld_stats); if (size == 0) return NULL; size = _mi_os_good_alloc_size(size); alignment = _mi_align_up(alignment, _mi_os_page_size()); @@ -571,7 +634,7 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* lar allow_large = *large; *large = false; } - return mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, (large!=NULL?large:&allow_large), tld->stats); + return mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, (large!=NULL?large:&allow_large), &_mi_stats_main /*tld->stats*/ ); } @@ -628,11 +691,11 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr)) int err = 0; if (commit) { - _mi_stat_increase(&stats->committed, csize); + _mi_stat_increase(&stats->committed, size); // use size for precise commit vs. decommit _mi_stat_counter_increase(&stats->commit_calls, 1); } else { - _mi_stat_decrease(&stats->committed, csize); + _mi_stat_decrease(&stats->committed, size); } #if defined(_WIN32) @@ -658,6 +721,9 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ // for commit, just change the protection err = mprotect(start, csize, (PROT_READ | PROT_WRITE)); if (err != 0) { err = errno; } + #if defined(MADV_FREE_REUSE) + while ((err = madvise(start, csize, MADV_FREE_REUSE)) != 0 && errno == EAGAIN) { errno = 0; } + #endif } #else err = mprotect(start, csize, (commit ? (PROT_READ | PROT_WRITE) : PROT_NONE)); @@ -671,16 +737,20 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ return (err == 0); } -bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) { +bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) { + UNUSED(tld_stats); + mi_stats_t* stats = &_mi_stats_main; return mi_os_commitx(addr, size, true, false /* liberal */, is_zero, stats); } -bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats) { +bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* tld_stats) { + UNUSED(tld_stats); + mi_stats_t* stats = &_mi_stats_main; bool is_zero; return mi_os_commitx(addr, size, false, true /* conservative */, &is_zero, stats); } -bool _mi_os_commit_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) { +static bool mi_os_commit_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) { return mi_os_commitx(addr, size, true, true /* conservative */, is_zero, stats); } @@ -715,12 +785,19 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) if (p != start) return false; #else #if defined(MADV_FREE) - static int advice = MADV_FREE; - int err = madvise(start, csize, advice); - if (err != 0 && errno == EINVAL && advice == MADV_FREE) { - // if MADV_FREE is not supported, fall back to MADV_DONTNEED from now on - advice = MADV_DONTNEED; - err = madvise(start, csize, advice); + #if defined(MADV_FREE_REUSABLE) + #define KK_MADV_FREE_INITIAL MADV_FREE_REUSABLE + #else + #define KK_MADV_FREE_INITIAL MADV_FREE + #endif + static _Atomic(uintptr_t) advice = ATOMIC_VAR_INIT(KK_MADV_FREE_INITIAL); + int oadvice = (int)mi_atomic_load_relaxed(&advice); + int err; + while ((err = madvise(start, csize, oadvice)) != 0 && errno == EAGAIN) { errno = 0; }; + if (err != 0 && errno == EINVAL && oadvice == KK_MADV_FREE_INITIAL) { + // if MADV_FREE/MADV_FREE_REUSABLE is not supported, fall back to MADV_DONTNEED from now on + mi_atomic_store_release(&advice, (uintptr_t)MADV_DONTNEED); + err = madvise(start, csize, MADV_DONTNEED); } #elif defined(__wasi__) int err = 0; @@ -740,7 +817,9 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) // but may be used later again. This will release physical memory // pages and reduce swapping while keeping the memory committed. // We page align to a conservative area inside the range to reset. -bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) { +bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats) { + UNUSED(tld_stats); + mi_stats_t* stats = &_mi_stats_main; if (mi_option_is_enabled(mi_option_reset_decommits)) { return _mi_os_decommit(addr, size, stats); } @@ -749,9 +828,11 @@ bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) { } } -bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) { +bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) { + UNUSED(tld_stats); + mi_stats_t* stats = &_mi_stats_main; if (mi_option_is_enabled(mi_option_reset_decommits)) { - return _mi_os_commit_unreset(addr, size, is_zero, stats); // re-commit it (conservatively!) + return mi_os_commit_unreset(addr, size, is_zero, stats); // re-commit it (conservatively!) } else { *is_zero = false; @@ -876,7 +957,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) return VirtualAlloc(addr, size, flags, PAGE_READWRITE); } -#elif defined(MI_OS_USE_MMAP) && (MI_INTPTR_SIZE >= 8) +#elif defined(MI_OS_USE_MMAP) && (MI_INTPTR_SIZE >= 8) && !defined(__HAIKU__) #include #ifndef MPOL_PREFERRED #define MPOL_PREFERRED 1 @@ -926,9 +1007,9 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { uintptr_t start = 0; uintptr_t end = 0; - uintptr_t expected; + uintptr_t huge_start = mi_atomic_load_relaxed(&mi_huge_start); do { - start = expected = mi_atomic_read_relaxed(&mi_huge_start); + start = huge_start; if (start == 0) { // Initialize the start address after the 32TiB area start = ((uintptr_t)32 << 40); // 32TiB virtual start address @@ -939,7 +1020,7 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { } end = start + size; mi_assert_internal(end % MI_SEGMENT_SIZE == 0); - } while (!mi_atomic_cas_strong(&mi_huge_start, end, expected)); + } while (!mi_atomic_cas_strong_acq_rel(&mi_huge_start, &huge_start, end)); if (total_size != NULL) *total_size = size; return (uint8_t*)start; @@ -1019,24 +1100,50 @@ void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) { /* ---------------------------------------------------------------------------- Support NUMA aware allocation -----------------------------------------------------------------------------*/ -#ifdef _WIN32 - #if (_WIN32_WINNT < 0x601) // before Win7 - typedef struct _PROCESSOR_NUMBER { WORD Group; BYTE Number; BYTE Reserved; } PROCESSOR_NUMBER, *PPROCESSOR_NUMBER; - WINBASEAPI VOID WINAPI GetCurrentProcessorNumberEx(_Out_ PPROCESSOR_NUMBER ProcNumber); - WINBASEAPI BOOL WINAPI GetNumaProcessorNodeEx(_In_ PPROCESSOR_NUMBER Processor, _Out_ PUSHORT NodeNumber); - #endif +#ifdef _WIN32 static size_t mi_os_numa_nodex() { - PROCESSOR_NUMBER pnum; USHORT numa_node = 0; - GetCurrentProcessorNumberEx(&pnum); - GetNumaProcessorNodeEx(&pnum,&numa_node); + if (pGetCurrentProcessorNumberEx != NULL && pGetNumaProcessorNodeEx != NULL) { + // Extended API is supported + PROCESSOR_NUMBER pnum; + (*pGetCurrentProcessorNumberEx)(&pnum); + USHORT nnode = 0; + BOOL ok = (*pGetNumaProcessorNodeEx)(&pnum, &nnode); + if (ok) numa_node = nnode; + } + else { + // Vista or earlier, use older API that is limited to 64 processors. Issue #277 + DWORD pnum = GetCurrentProcessorNumber(); + UCHAR nnode = 0; + BOOL ok = GetNumaProcessorNode((UCHAR)pnum, &nnode); + if (ok) numa_node = nnode; + } return numa_node; } static size_t mi_os_numa_node_countx(void) { ULONG numa_max = 0; GetNumaHighestNodeNumber(&numa_max); - return (numa_max + 1); + // find the highest node number that has actual processors assigned to it. Issue #282 + while(numa_max > 0) { + if (pGetNumaNodeProcessorMaskEx != NULL) { + // Extended API is supported + GROUP_AFFINITY affinity; + if ((*pGetNumaNodeProcessorMaskEx)((USHORT)numa_max, &affinity)) { + if (affinity.Mask != 0) break; // found the maximum non-empty node + } + } + else { + // Vista or earlier, use older API that is limited to 64 processors. + ULONGLONG mask; + if (GetNumaNodeProcessorMask((UCHAR)numa_max, &mask)) { + if (mask != 0) break; // found the maximum non-empty node + }; + } + // max node was invalid or had no processor assigned, try again + numa_max--; + } + return ((size_t)numa_max + 1); } #elif defined(__linux__) #include // getcpu diff --git a/src/page-queue.c b/src/page-queue.c index ea213019..57e3d6a5 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -49,50 +49,6 @@ static inline bool mi_page_queue_is_special(const mi_page_queue_t* pq) { Bins ----------------------------------------------------------- */ -// Bit scan reverse: return the index of the highest bit. -static inline uint8_t mi_bsr32(uint32_t x); - -#if defined(_MSC_VER) -#include -static inline uint8_t mi_bsr32(uint32_t x) { - uint32_t idx; - _BitScanReverse((DWORD*)&idx, x); - return (uint8_t)idx; -} -#elif defined(__GNUC__) || defined(__clang__) -static inline uint8_t mi_bsr32(uint32_t x) { - return (31 - __builtin_clz(x)); -} -#else -static inline uint8_t mi_bsr32(uint32_t x) { - // de Bruijn multiplication, see - static const uint8_t debruijn[32] = { - 31, 0, 22, 1, 28, 23, 18, 2, 29, 26, 24, 10, 19, 7, 3, 12, - 30, 21, 27, 17, 25, 9, 6, 11, 20, 16, 8, 5, 15, 4, 14, 13, - }; - x |= x >> 1; - x |= x >> 2; - x |= x >> 4; - x |= x >> 8; - x |= x >> 16; - x++; - return debruijn[(x*0x076be629) >> 27]; -} -#endif - -// Bit scan reverse: return the index of the highest bit. -uint8_t _mi_bsr(uintptr_t x) { - if (x == 0) return 0; -#if MI_INTPTR_SIZE==8 - uint32_t hi = (x >> 32); - return (hi == 0 ? mi_bsr32((uint32_t)x) : 32 + mi_bsr32(hi)); -#elif MI_INTPTR_SIZE==4 - return mi_bsr32(x); -#else -# error "define bsr for non-32 or 64-bit platforms" -#endif -} - // Return the bin for a given field size. // Returns MI_BIN_HUGE if the size is too large. // We use `wsize` for the size in "machine word sizes", @@ -125,7 +81,7 @@ extern inline uint8_t _mi_bin(size_t size) { #endif wsize--; // find the highest bit - uint8_t b = mi_bsr32((uint32_t)wsize); + uint8_t b = (uint8_t)mi_bsr(wsize); // note: wsize != 0 // and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation). // - adjust with 3 because we use do not round the first 8 sizes // which each get an exact bin @@ -260,7 +216,7 @@ static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) { heap->page_count--; page->next = NULL; page->prev = NULL; - // mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL); + // mi_atomic_store_ptr_release(mi_atomic_cast(void*, &page->heap), NULL); mi_page_set_in_full(page,false); } @@ -274,7 +230,7 @@ static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_ (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); mi_page_set_in_full(page, mi_page_queue_is_full(queue)); - // mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), heap); + // mi_atomic_store_ptr_release(mi_atomic_cast(void*, &page->heap), heap); page->next = queue->first; page->prev = NULL; if (queue->first != NULL) { @@ -341,7 +297,7 @@ size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue for (mi_page_t* page = append->first; page != NULL; page = page->next) { // inline `mi_page_set_heap` to avoid wrong assertion during absorption; // in this case it is ok to be delayed freeing since both "to" and "from" heap are still alive. - mi_atomic_write(&page->xheap, (uintptr_t)heap); + mi_atomic_store_release(&page->xheap, (uintptr_t)heap); // set the flag to delayed free (not overriding NEVER_DELAYED_FREE) which has as a // side effect that it spins until any DELAYED_FREEING is finished. This ensures // that after appending only the new heap will be used for delayed free operations. diff --git a/src/page.c b/src/page.c index 08aa88c7..4b7e9ffb 100644 --- a/src/page.c +++ b/src/page.c @@ -122,11 +122,11 @@ bool _mi_page_is_valid(mi_page_t* page) { #endif void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never) { - mi_thread_free_t tfree; mi_thread_free_t tfreex; mi_delayed_t old_delay; + mi_thread_free_t tfree; do { - tfree = mi_atomic_read(&page->xthread_free); // note: must acquire as we can break this loop and not do a CAS + tfree = mi_atomic_load_acquire(&page->xthread_free); // note: must acquire as we can break/repeat this loop and not do a CAS; tfreex = mi_tf_set_delayed(tfree, delay); old_delay = mi_tf_delayed(tfree); if (mi_unlikely(old_delay == MI_DELAYED_FREEING)) { @@ -140,7 +140,7 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool overrid break; // leave never-delayed flag set } } while ((old_delay == MI_DELAYED_FREEING) || - !mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree)); + !mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex)); } /* ----------------------------------------------------------- @@ -154,13 +154,12 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool overrid static void _mi_page_thread_free_collect(mi_page_t* page) { mi_block_t* head; - mi_thread_free_t tfree; mi_thread_free_t tfreex; + mi_thread_free_t tfree = mi_atomic_load_relaxed(&page->xthread_free); do { - tfree = mi_atomic_read_relaxed(&page->xthread_free); head = mi_tf_block(tfree); tfreex = mi_tf_set_block(tfree,NULL); - } while (!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree)); + } while (!mi_atomic_cas_weak_acq_rel(&page->xthread_free, &tfree, tfreex)); // return if the list is empty if (head == NULL) return; @@ -273,11 +272,9 @@ static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) { (put there by other threads if they deallocated in a full page) ----------------------------------------------------------- */ void _mi_heap_delayed_free(mi_heap_t* heap) { - // take over the list (note: no atomic exchange is it is often NULL) - mi_block_t* block; - do { - block = mi_atomic_read_ptr_relaxed(mi_block_t,&heap->thread_delayed_free); - } while (block != NULL && !mi_atomic_cas_ptr_weak(mi_block_t,&heap->thread_delayed_free, NULL, block)); + // take over the list (note: no atomic exchange since it is often NULL) + mi_block_t* block = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free); + while (block != NULL && !mi_atomic_cas_ptr_weak_acq_rel(mi_block_t, &heap->thread_delayed_free, &block, NULL)) { /* nothing */ }; // and free them all while(block != NULL) { @@ -286,11 +283,10 @@ void _mi_heap_delayed_free(mi_heap_t* heap) { if (!_mi_free_delayed_block(block)) { // we might already start delayed freeing while another thread has not yet // reset the delayed_freeing flag; in that case delay it further by reinserting. - mi_block_t* dfree; + mi_block_t* dfree = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free); do { - dfree = mi_atomic_read_ptr_relaxed(mi_block_t,&heap->thread_delayed_free); mi_block_set_nextx(heap, block, dfree, heap->keys); - } while (!mi_atomic_cas_ptr_weak(mi_block_t,&heap->thread_delayed_free, block, dfree)); + } while (!mi_atomic_cas_ptr_weak_release(mi_block_t,&heap->thread_delayed_free, &dfree, block)); } block = next; } @@ -709,14 +705,17 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) { mi_page_queue_t* pq = mi_page_queue(heap,size); mi_page_t* page = pq->first; if (page != NULL) { - if ((MI_SECURE >= 3) && page->capacity < page->reserved && ((_mi_heap_random_next(heap) & 1) == 1)) { - // in secure mode, we extend half the time to increase randomness + #if (MI_SECURE>=3) // in secure mode, we extend half the time to increase randomness + if (page->capacity < page->reserved && ((_mi_heap_random_next(heap) & 1) == 1)) { mi_page_extend_free(heap, page, heap->tld); mi_assert_internal(mi_page_immediate_available(page)); } - else { + else + #endif + { _mi_page_free_collect(page,false); } + if (mi_page_immediate_available(page)) { page->retire_expire = 0; return page; // fast path @@ -734,20 +733,20 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) { ----------------------------------------------------------- */ static mi_deferred_free_fun* volatile deferred_free = NULL; -static volatile _Atomic(void*) deferred_arg; // = NULL +static _Atomic(void*) deferred_arg; // = NULL void _mi_deferred_free(mi_heap_t* heap, bool force) { heap->tld->heartbeat++; if (deferred_free != NULL && !heap->tld->recurse) { heap->tld->recurse = true; - deferred_free(force, heap->tld->heartbeat, mi_atomic_read_ptr_relaxed(void,&deferred_arg)); + deferred_free(force, heap->tld->heartbeat, mi_atomic_load_ptr_relaxed(void,&deferred_arg)); heap->tld->recurse = false; } } void mi_register_deferred_free(mi_deferred_free_fun* fn, void* arg) mi_attr_noexcept { deferred_free = fn; - mi_atomic_write_ptr(void,&deferred_arg, arg); + mi_atomic_store_ptr_release(void,&deferred_arg, arg); } @@ -792,7 +791,7 @@ static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size) mi_attr_noexcept { const size_t req_size = size - MI_PADDING_SIZE; // correct for padding_size in case of an overflow on `size` if (mi_unlikely(req_size > (MI_LARGE_OBJ_SIZE_MAX - MI_PADDING_SIZE) )) { if (mi_unlikely(req_size > PTRDIFF_MAX)) { // we don't allocate more than PTRDIFF_MAX (see ) - _mi_error_message(EOVERFLOW, "allocation request is too large (%zu b requested)\n", req_size); + _mi_error_message(EOVERFLOW, "allocation request is too large (%zu bytes)\n", req_size); return NULL; } else { @@ -816,6 +815,7 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept if (mi_unlikely(!mi_heap_is_initialized(heap))) { mi_thread_init(); // calls `_mi_heap_init` in turn heap = mi_get_default_heap(); + if (mi_unlikely(!mi_heap_is_initialized(heap))) { return NULL; } } mi_assert_internal(mi_heap_is_initialized(heap)); @@ -833,7 +833,8 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept } if (mi_unlikely(page == NULL)) { // out of memory - _mi_error_message(ENOMEM, "cannot allocate memory (%zu bytes requested)\n", size); + const size_t req_size = size - MI_PADDING_SIZE; // correct for padding_size in case of an overflow on `size` + _mi_error_message(ENOMEM, "unable to allocate memory (%zu bytes)\n", req_size); return NULL; } diff --git a/src/random.c b/src/random.c index b3dbf4f8..b9485ea0 100644 --- a/src/random.c +++ b/src/random.c @@ -155,30 +155,40 @@ uintptr_t _mi_random_next(mi_random_ctx_t* ctx) { /* ---------------------------------------------------------------------------- To initialize a fresh random context we rely on the OS: -- Windows : BCryptGenRandom +- Windows : BCryptGenRandom (or RtlGenRandom) - osX,bsd,wasi: arc4random_buf - Linux : getrandom,/dev/urandom If we cannot get good randomness, we fall back to weak randomness based on a timer and ASLR. -----------------------------------------------------------------------------*/ #if defined(_WIN32) + +#if !defined(MI_USE_RTLGENRANDOM) +// We prefer BCryptGenRandom over RtlGenRandom #pragma comment (lib,"bcrypt.lib") #include static bool os_random_buf(void* buf, size_t buf_len) { return (BCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0); } -/* -#define SystemFunction036 NTAPI SystemFunction036 -#include -#undef SystemFunction036 -static bool os_random_buf(void* buf, size_t buf_len) { - RtlGenRandom(buf, (ULONG)buf_len); - return true; +#else +// Use (unofficial) RtlGenRandom +#pragma comment (lib,"advapi32.lib") +#define RtlGenRandom SystemFunction036 +#ifdef __cplusplus +extern "C" { +#endif +BOOLEAN NTAPI RtlGenRandom(PVOID RandomBuffer, ULONG RandomBufferLength); +#ifdef __cplusplus } -*/ +#endif +static bool os_random_buf(void* buf, size_t buf_len) { + return (RtlGenRandom(buf, (ULONG)buf_len) != 0); +} +#endif + #elif defined(ANDROID) || defined(XP_DARWIN) || defined(__APPLE__) || defined(__DragonFly__) || \ defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ - defined(__wasi__) + defined(__sun) || defined(__wasi__) #include static bool os_random_buf(void* buf, size_t buf_len) { arc4random_buf(buf, buf_len); @@ -200,12 +210,12 @@ static bool os_random_buf(void* buf, size_t buf_len) { #ifndef GRND_NONBLOCK #define GRND_NONBLOCK (1) #endif - static volatile _Atomic(uintptr_t) no_getrandom; // = 0 - if (mi_atomic_read(&no_getrandom)==0) { + static _Atomic(uintptr_t) no_getrandom; // = 0 + if (mi_atomic_load_acquire(&no_getrandom)==0) { ssize_t ret = syscall(SYS_getrandom, buf, buf_len, GRND_NONBLOCK); if (ret >= 0) return (buf_len == (size_t)ret); if (ret != ENOSYS) return false; - mi_atomic_write(&no_getrandom,1); // don't call again, and fall back to /dev/urandom + mi_atomic_store_release(&no_getrandom, 1UL); // don't call again, and fall back to /dev/urandom } #endif int flags = O_RDONLY; @@ -234,7 +244,7 @@ static bool os_random_buf(void* buf, size_t buf_len) { #endif #if defined(_WIN32) -#include +#include #elif defined(__APPLE__) #include #else @@ -243,6 +253,7 @@ static bool os_random_buf(void* buf, size_t buf_len) { uintptr_t _os_random_weak(uintptr_t extra_seed) { uintptr_t x = (uintptr_t)&_os_random_weak ^ extra_seed; // ASLR makes the address random + #if defined(_WIN32) LARGE_INTEGER pcount; QueryPerformanceCounter(&pcount); diff --git a/src/region.c b/src/region.c index ae3a799a..663859c8 100644 --- a/src/region.c +++ b/src/region.c @@ -37,7 +37,7 @@ Possible issues: #include // memset -#include "bitmap.inc.c" +#include "bitmap.h" // Internal raw OS interface size_t _mi_os_large_page_size(); @@ -50,8 +50,8 @@ bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); // arena.c void _mi_arena_free(void* p, size_t size, size_t memid, bool all_committed, mi_stats_t* stats); -void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); -void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); +void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld); +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld); @@ -77,7 +77,8 @@ typedef union mi_region_info_u { uintptr_t value; struct { bool valid; // initialized? - bool is_large; // allocated in fixed large/huge OS pages + bool is_large:1; // allocated in fixed large/huge OS pages + bool is_pinned:1; // pinned memory cannot be decommitted short numa_node; // the associated NUMA node (where -1 means no associated node) } x; } mi_region_info_t; @@ -86,21 +87,21 @@ typedef union mi_region_info_u { // A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with // a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block. typedef struct mem_region_s { - volatile _Atomic(uintptr_t) info; // mi_region_info_t.value - volatile _Atomic(void*) start; // start of the memory area - mi_bitmap_field_t in_use; // bit per in-use block - mi_bitmap_field_t dirty; // track if non-zero per block - mi_bitmap_field_t commit; // track if committed per block - mi_bitmap_field_t reset; // track if reset per block - volatile _Atomic(uintptr_t) arena_memid; // if allocated from a (huge page) arena - uintptr_t padding; // round to 8 fields + _Atomic(uintptr_t) info; // mi_region_info_t.value + _Atomic(void*) start; // start of the memory area + mi_bitmap_field_t in_use; // bit per in-use block + mi_bitmap_field_t dirty; // track if non-zero per block + mi_bitmap_field_t commit; // track if committed per block + mi_bitmap_field_t reset; // track if reset per block + _Atomic(uintptr_t) arena_memid; // if allocated from a (huge page) arena + uintptr_t padding; // round to 8 fields } mem_region_t; // The region map static mem_region_t regions[MI_REGION_MAX]; // Allocated regions -static volatile _Atomic(uintptr_t) regions_count; // = 0; +static _Atomic(uintptr_t) regions_count; // = 0; /* ---------------------------------------------------------------------------- @@ -123,9 +124,9 @@ static size_t mi_good_commit_size(size_t size) { // Return if a pointer points into a region reserved by us. bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { if (p==NULL) return false; - size_t count = mi_atomic_read_relaxed(®ions_count); + size_t count = mi_atomic_load_relaxed(®ions_count); for (size_t i = 0; i < count; i++) { - uint8_t* start = mi_atomic_read_ptr_relaxed(uint8_t,®ions[i].start); + uint8_t* start = (uint8_t*)mi_atomic_load_ptr_relaxed(uint8_t, ®ions[i].start); if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true; } return false; @@ -133,7 +134,7 @@ bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { static void* mi_region_blocks_start(const mem_region_t* region, mi_bitmap_index_t bit_idx) { - uint8_t* start = mi_atomic_read_ptr(uint8_t,®ion->start); + uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t, &((mem_region_t*)region)->start); mi_assert_internal(start != NULL); return (start + (bit_idx * MI_SEGMENT_SIZE)); } @@ -171,22 +172,23 @@ static bool mi_memid_is_arena(size_t id, mem_region_t** region, mi_bitmap_index_ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) { // not out of regions yet? - if (mi_atomic_read_relaxed(®ions_count) >= MI_REGION_MAX - 1) return false; + if (mi_atomic_load_relaxed(®ions_count) >= MI_REGION_MAX - 1) return false; // try to allocate a fresh region from the OS bool region_commit = (commit && mi_option_is_enabled(mi_option_eager_region_commit)); bool region_large = (commit && allow_large); bool is_zero = false; + bool is_pinned = false; size_t arena_memid = 0; - void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, ®ion_commit, ®ion_large, &is_zero, &arena_memid, tld); + void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, ®ion_commit, ®ion_large, &is_pinned, &is_zero, &arena_memid, tld); if (start == NULL) return false; mi_assert_internal(!(region_large && !allow_large)); mi_assert_internal(!region_large || region_commit); // claim a fresh slot - const uintptr_t idx = mi_atomic_increment(®ions_count); + const uintptr_t idx = mi_atomic_increment_acq_rel(®ions_count); if (idx >= MI_REGION_MAX) { - mi_atomic_decrement(®ions_count); + mi_atomic_decrement_acq_rel(®ions_count); _mi_arena_free(start, MI_REGION_SIZE, arena_memid, region_commit, tld->stats); _mi_warning_message("maximum regions used: %zu GiB (perhaps recompile with a larger setting for MI_HEAP_REGION_MAX_SIZE)", _mi_divide_up(MI_HEAP_REGION_MAX_SIZE, GiB)); return false; @@ -195,21 +197,22 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, // allocated, initialize and claim the initial blocks mem_region_t* r = ®ions[idx]; r->arena_memid = arena_memid; - mi_atomic_write(&r->in_use, 0); - mi_atomic_write(&r->dirty, (is_zero ? 0 : MI_BITMAP_FIELD_FULL)); - mi_atomic_write(&r->commit, (region_commit ? MI_BITMAP_FIELD_FULL : 0)); - mi_atomic_write(&r->reset, 0); + mi_atomic_store_release(&r->in_use, (uintptr_t)0); + mi_atomic_store_release(&r->dirty, (is_zero ? 0 : MI_BITMAP_FIELD_FULL)); + mi_atomic_store_release(&r->commit, (region_commit ? MI_BITMAP_FIELD_FULL : 0)); + mi_atomic_store_release(&r->reset, (uintptr_t)0); *bit_idx = 0; - mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL); - mi_atomic_write_ptr(uint8_t*,&r->start, start); + _mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL); + mi_atomic_store_ptr_release(void,&r->start, start); // and share it mi_region_info_t info; info.value = 0; // initialize the full union to zero info.x.valid = true; info.x.is_large = region_large; + info.x.is_pinned = is_pinned; info.x.numa_node = (short)_mi_os_numa_node(tld); - mi_atomic_write(&r->info, info.value); // now make it available to others + mi_atomic_store_release(&r->info, info.value); // now make it available to others *region = r; return true; } @@ -221,7 +224,7 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool allow_large ) { // initialized at all? mi_region_info_t info; - info.value = mi_atomic_read_relaxed(®ion->info); + info.value = mi_atomic_load_relaxed(&((mem_region_t*)region)->info); if (info.value==0) return false; // numa correct @@ -240,7 +243,7 @@ static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, boo static bool mi_region_try_claim(int numa_node, size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) { // try all regions for a free slot - const size_t count = mi_atomic_read(®ions_count); + const size_t count = mi_atomic_load_relaxed(®ions_count); // monotonic, so ok to be relaxed size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? Starting at 0 seems to increase latency though for (size_t visited = 0; visited < count; visited++, idx++) { if (idx >= count) idx = 0; // wrap around @@ -248,7 +251,7 @@ static bool mi_region_try_claim(int numa_node, size_t blocks, bool allow_large, // if this region suits our demand (numa node matches, large OS page matches) if (mi_region_is_suitable(r, numa_node, allow_large)) { // then try to atomically claim a segment(s) in this region - if (mi_bitmap_try_find_claim_field(&r->in_use, 0, blocks, bit_idx)) { + if (_mi_bitmap_try_find_claim_field(&r->in_use, 0, blocks, bit_idx)) { tld->region_idx = idx; // remember the last found position *region = r; return true; @@ -259,16 +262,16 @@ static bool mi_region_try_claim(int numa_node, size_t blocks, bool allow_large, } -static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) +static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { mi_assert_internal(blocks <= MI_BITMAP_FIELD_BITS); mem_region_t* region; mi_bitmap_index_t bit_idx; const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld)); // try to claim in existing regions - if (!mi_region_try_claim(numa_node, blocks, *is_large, ®ion, &bit_idx, tld)) { + if (!mi_region_try_claim(numa_node, blocks, *large, ®ion, &bit_idx, tld)) { // otherwise try to allocate a fresh region and claim in there - if (!mi_region_try_alloc_os(blocks, *commit, *is_large, ®ion, &bit_idx, tld)) { + if (!mi_region_try_alloc_os(blocks, *commit, *large, ®ion, &bit_idx, tld)) { // out of regions or memory return NULL; } @@ -277,41 +280,46 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo // ------------------------------------------------ // found a region and claimed `blocks` at `bit_idx`, initialize them now mi_assert_internal(region != NULL); - mi_assert_internal(mi_bitmap_is_claimed(®ion->in_use, 1, blocks, bit_idx)); + mi_assert_internal(_mi_bitmap_is_claimed(®ion->in_use, 1, blocks, bit_idx)); mi_region_info_t info; - info.value = mi_atomic_read(®ion->info); - uint8_t* start = mi_atomic_read_ptr(uint8_t,®ion->start); - mi_assert_internal(!(info.x.is_large && !*is_large)); + info.value = mi_atomic_load_acquire(®ion->info); + uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t,®ion->start); + mi_assert_internal(!(info.x.is_large && !*large)); mi_assert_internal(start != NULL); - *is_zero = mi_bitmap_claim(®ion->dirty, 1, blocks, bit_idx, NULL); - *is_large = info.x.is_large; - *memid = mi_memid_create(region, bit_idx); + *is_zero = _mi_bitmap_claim(®ion->dirty, 1, blocks, bit_idx, NULL); + *large = info.x.is_large; + *is_pinned = info.x.is_pinned; + *memid = mi_memid_create(region, bit_idx); void* p = start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE); // commit if (*commit) { // ensure commit bool any_uncommitted; - mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, &any_uncommitted); + _mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, &any_uncommitted); if (any_uncommitted) { - mi_assert_internal(!info.x.is_large); - bool commit_zero; - _mi_mem_commit(p, blocks * MI_SEGMENT_SIZE, &commit_zero, tld); - if (commit_zero) *is_zero = true; + mi_assert_internal(!info.x.is_large && !info.x.is_pinned); + bool commit_zero = false; + if (!_mi_mem_commit(p, blocks * MI_SEGMENT_SIZE, &commit_zero, tld)) { + // failed to commit! unclaim and return + mi_bitmap_unclaim(®ion->in_use, 1, blocks, bit_idx); + return NULL; + } + if (commit_zero) *is_zero = true; } } else { // no need to commit, but check if already fully committed - *commit = mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx); + *commit = _mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx); } - mi_assert_internal(!*commit || mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx)); + mi_assert_internal(!*commit || _mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx)); // unreset reset blocks - if (mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)) { + if (_mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)) { // some blocks are still reset - mi_assert_internal(!info.x.is_large); + mi_assert_internal(!info.x.is_large && !info.x.is_pinned); mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit || mi_option_get(mi_option_eager_commit_delay) > 0); mi_bitmap_unclaim(®ion->reset, 1, blocks, bit_idx); if (*commit || !mi_option_is_enabled(mi_option_reset_decommits)) { // only if needed @@ -320,7 +328,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo if (reset_zero) *is_zero = true; } } - mi_assert_internal(!mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)); + mi_assert_internal(!_mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)); #if (MI_DEBUG>=2) if (*commit) { ((uint8_t*)p)[0] = 0; } @@ -338,12 +346,13 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bo // Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`. // (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`) -void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) +void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { mi_assert_internal(memid != NULL && tld != NULL); mi_assert_internal(size > 0); *memid = 0; *is_zero = false; + *is_pinned = false; bool default_large = false; if (large==NULL) large = &default_large; // ensure `large != NULL` if (size == 0) return NULL; @@ -354,14 +363,14 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l size_t arena_memid; const size_t blocks = mi_region_block_count(size); if (blocks <= MI_REGION_MAX_OBJ_BLOCKS && alignment <= MI_SEGMENT_ALIGN) { - p = mi_region_try_alloc(blocks, commit, large, is_zero, memid, tld); + p = mi_region_try_alloc(blocks, commit, large, is_pinned, is_zero, memid, tld); if (p == NULL) { _mi_warning_message("unable to allocate from region: size %zu\n", size); } } if (p == NULL) { // and otherwise fall back to the OS - p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_zero, &arena_memid, tld); + p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_pinned, is_zero, &arena_memid, tld); *memid = mi_memid_create_from_arena(arena_memid); } @@ -400,7 +409,7 @@ void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_re const size_t blocks = mi_region_block_count(size); mi_assert_internal(blocks + bit_idx <= MI_BITMAP_FIELD_BITS); mi_region_info_t info; - info.value = mi_atomic_read(®ion->info); + info.value = mi_atomic_load_acquire(®ion->info); mi_assert_internal(info.value != 0); void* blocks_start = mi_region_blocks_start(region, bit_idx); mi_assert_internal(blocks_start == p); // not a pointer in our area? @@ -409,21 +418,21 @@ void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_re // committed? if (full_commit && (size % MI_SEGMENT_SIZE) == 0) { - mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, NULL); + _mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, NULL); } if (any_reset) { // set the is_reset bits if any pages were reset - mi_bitmap_claim(®ion->reset, 1, blocks, bit_idx, NULL); + _mi_bitmap_claim(®ion->reset, 1, blocks, bit_idx, NULL); } // reset the blocks to reduce the working set. - if (!info.x.is_large && mi_option_is_enabled(mi_option_segment_reset) + if (!info.x.is_large && !info.x.is_pinned && mi_option_is_enabled(mi_option_segment_reset) && (mi_option_is_enabled(mi_option_eager_commit) || mi_option_is_enabled(mi_option_reset_decommits))) // cannot reset halfway committed segments, use only `option_page_reset` instead { bool any_unreset; - mi_bitmap_claim(®ion->reset, 1, blocks, bit_idx, &any_unreset); + _mi_bitmap_claim(®ion->reset, 1, blocks, bit_idx, &any_unreset); if (any_unreset) { _mi_abandoned_await_readers(); // ensure no more pending write (in case reset = decommit) _mi_mem_reset(p, blocks * MI_SEGMENT_SIZE, tld); @@ -442,23 +451,21 @@ void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_re -----------------------------------------------------------------------------*/ void _mi_mem_collect(mi_os_tld_t* tld) { // free every region that has no segments in use. - uintptr_t rcount = mi_atomic_read_relaxed(®ions_count); + uintptr_t rcount = mi_atomic_load_relaxed(®ions_count); for (size_t i = 0; i < rcount; i++) { mem_region_t* region = ®ions[i]; - if (mi_atomic_read_relaxed(®ion->info) != 0) { + if (mi_atomic_load_relaxed(®ion->info) != 0) { // if no segments used, try to claim the whole region - uintptr_t m; - do { - m = mi_atomic_read_relaxed(®ion->in_use); - } while(m == 0 && !mi_atomic_cas_weak(®ion->in_use, MI_BITMAP_FIELD_FULL, 0 )); + uintptr_t m = mi_atomic_load_relaxed(®ion->in_use); + while (m == 0 && !mi_atomic_cas_weak_release(®ion->in_use, &m, MI_BITMAP_FIELD_FULL)) { /* nothing */ }; if (m == 0) { // on success, free the whole region - uint8_t* start = mi_atomic_read_ptr(uint8_t,®ions[i].start); - size_t arena_memid = mi_atomic_read_relaxed(®ions[i].arena_memid); - uintptr_t commit = mi_atomic_read_relaxed(®ions[i].commit); + uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t,®ions[i].start); + size_t arena_memid = mi_atomic_load_relaxed(®ions[i].arena_memid); + uintptr_t commit = mi_atomic_load_relaxed(®ions[i].commit); memset(®ions[i], 0, sizeof(mem_region_t)); // and release the whole region - mi_atomic_write(®ion->info, 0); + mi_atomic_store_release(®ion->info, (uintptr_t)0); if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { _mi_abandoned_await_readers(); // ensure no pending reads _mi_arena_free(start, MI_REGION_SIZE, arena_memid, (~commit == 0), tld->stats); diff --git a/src/segment.c b/src/segment.c index 9190034a..fb8e0fe1 100644 --- a/src/segment.c +++ b/src/segment.c @@ -198,26 +198,32 @@ static void mi_segment_protect(mi_segment_t* segment, bool protect, mi_os_tld_t* // add/remove guard pages if (MI_SECURE != 0) { // in secure mode, we set up a protected page in between the segment info and the page data - const size_t os_page_size = _mi_os_page_size(); - mi_assert_internal((segment->segment_info_size - os_page_size) >= (sizeof(mi_segment_t) + ((segment->capacity - 1) * sizeof(mi_page_t)))); - mi_assert_internal(((uintptr_t)segment + segment->segment_info_size) % os_page_size == 0); - mi_segment_protect_range((uint8_t*)segment + segment->segment_info_size - os_page_size, os_page_size, protect); + const size_t os_psize = _mi_os_page_size(); + mi_assert_internal((segment->segment_info_size - os_psize) >= (sizeof(mi_segment_t) + ((segment->capacity - 1) * sizeof(mi_page_t)))); + mi_assert_internal(((uintptr_t)segment + segment->segment_info_size) % os_psize == 0); + mi_segment_protect_range((uint8_t*)segment + segment->segment_info_size - os_psize, os_psize, protect); if (MI_SECURE <= 1 || segment->capacity == 1) { // and protect the last (or only) page too mi_assert_internal(MI_SECURE <= 1 || segment->page_kind >= MI_PAGE_LARGE); - uint8_t* start = (uint8_t*)segment + segment->segment_size - os_page_size; + uint8_t* start = (uint8_t*)segment + segment->segment_size - os_psize; if (protect && !segment->mem_is_committed) { - // ensure secure page is committed - _mi_mem_commit(start, os_page_size, NULL, tld); + if (protect) { + // ensure secure page is committed + if (_mi_mem_commit(start, os_psize, NULL, tld)) { // if this fails that is ok (as it is an unaccessible page) + mi_segment_protect_range(start, os_psize, protect); + } + } + } + else { + mi_segment_protect_range(start, os_psize, protect); } - mi_segment_protect_range(start, os_page_size, protect); } else { // or protect every page const size_t page_size = mi_segment_page_size(segment); for (size_t i = 0; i < segment->capacity; i++) { if (segment->pages[i].is_committed) { - mi_segment_protect_range((uint8_t*)segment + (i+1)*page_size - os_page_size, os_page_size, protect); + mi_segment_protect_range((uint8_t*)segment + (i+1)*page_size - os_psize, os_psize, protect); } } } @@ -231,7 +237,7 @@ static void mi_segment_protect(mi_segment_t* segment, bool protect, mi_os_tld_t* static void mi_page_reset(mi_segment_t* segment, mi_page_t* page, size_t size, mi_segments_tld_t* tld) { mi_assert_internal(page->is_committed); if (!mi_option_is_enabled(mi_option_page_reset)) return; - if (segment->mem_is_fixed || page->segment_in_use || !page->is_committed || page->is_reset) return; + if (segment->mem_is_pinned || page->segment_in_use || !page->is_committed || page->is_reset) return; size_t psize; void* start = mi_segment_raw_page_start(segment, page, &psize); page->is_reset = true; @@ -240,19 +246,23 @@ static void mi_page_reset(mi_segment_t* segment, mi_page_t* page, size_t size, m if (reset_size > 0) _mi_mem_reset(start, reset_size, tld->os); } -static void mi_page_unreset(mi_segment_t* segment, mi_page_t* page, size_t size, mi_segments_tld_t* tld) +static bool mi_page_unreset(mi_segment_t* segment, mi_page_t* page, size_t size, mi_segments_tld_t* tld) { mi_assert_internal(page->is_reset); mi_assert_internal(page->is_committed); - mi_assert_internal(!segment->mem_is_fixed); - if (segment->mem_is_fixed || !page->is_committed || !page->is_reset) return; + mi_assert_internal(!segment->mem_is_pinned); + if (segment->mem_is_pinned || !page->is_committed || !page->is_reset) return true; page->is_reset = false; size_t psize; uint8_t* start = mi_segment_raw_page_start(segment, page, &psize); size_t unreset_size = (size == 0 || size > psize ? psize : size); bool is_zero = false; - if (unreset_size > 0) _mi_mem_unreset(start, unreset_size, &is_zero, tld->os); + bool ok = true; + if (unreset_size > 0) { + ok = _mi_mem_unreset(start, unreset_size, &is_zero, tld->os); + } if (is_zero) page->is_zero_init = true; + return ok; } @@ -280,7 +290,7 @@ static void mi_pages_reset_add(mi_segment_t* segment, mi_page_t* page, mi_segmen mi_assert_expensive(!mi_pages_reset_contains(page, tld)); mi_assert_internal(_mi_page_segment(page)==segment); if (!mi_option_is_enabled(mi_option_page_reset)) return; - if (segment->mem_is_fixed || page->segment_in_use || !page->is_committed || page->is_reset) return; + if (segment->mem_is_pinned || page->segment_in_use || !page->is_committed || page->is_reset) return; if (mi_option_get(mi_option_reset_delay) == 0) { // reset immediately? @@ -320,7 +330,7 @@ static void mi_pages_reset_remove(mi_page_t* page, mi_segments_tld_t* tld) { } static void mi_pages_reset_remove_all_in_segment(mi_segment_t* segment, bool force_reset, mi_segments_tld_t* tld) { - if (segment->mem_is_fixed) return; // never reset in huge OS pages + if (segment->mem_is_pinned) return; // never reset in huge OS pages for (size_t i = 0; i < segment->capacity; i++) { mi_page_t* page = &segment->pages[i]; if (!page->segment_in_use && page->is_committed && !page->is_reset) { @@ -375,11 +385,13 @@ static uint8_t* mi_segment_raw_page_start(const mi_segment_t* segment, const mi_ psize -= segment->segment_info_size; } - if (MI_SECURE > 1 || (MI_SECURE == 1 && page->segment_idx == segment->capacity - 1)) { - // secure == 1: the last page has an os guard page at the end - // secure > 1: every page has an os guard page +#if (MI_SECURE > 1) // every page has an os guard page + psize -= _mi_os_page_size(); +#elif (MI_SECURE==1) // the last page has an os guard page at the end + if (page->segment_idx == segment->capacity - 1) { psize -= _mi_os_page_size(); } +#endif if (page_size != NULL) *page_size = psize; mi_assert_internal(page->xblock_size == 0 || _mi_ptr_page(p) == page); @@ -428,7 +440,7 @@ static size_t mi_segment_size(size_t capacity, size_t required, size_t* pre_size guardsize = page_size; required = _mi_align_up(required, page_size); } -; + if (info_size != NULL) *info_size = isize; if (pre_size != NULL) *pre_size = isize + guardsize; return (required==0 ? MI_SEGMENT_SIZE : _mi_align_up( required + isize + 2*guardsize, MI_PAGE_HUGE_ALIGN) ); @@ -454,7 +466,7 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se segment->thread_id = 0; mi_segments_track_size(-((long)segment_size),tld); if (MI_SECURE != 0) { - mi_assert_internal(!segment->mem_is_fixed); + mi_assert_internal(!segment->mem_is_pinned); mi_segment_protect(segment, false, tld->os); // ensure no more guard pages are set } @@ -468,7 +480,6 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se if (any_reset && mi_option_is_enabled(mi_option_reset_decommits)) { fully_committed = false; } - _mi_mem_free(segment, segment_size, segment->memid, fully_committed, any_reset, tld->os); } @@ -584,7 +595,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ else { if (MI_SECURE!=0) { - mi_assert_internal(!segment->mem_is_fixed); + mi_assert_internal(!segment->mem_is_pinned); mi_segment_protect(segment, false, tld->os); // reset protection if the page kind differs } // different page kinds; unreset any reset pages, and unprotect @@ -603,8 +614,11 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ // ensure the initial info is committed if (segment->capacity < capacity) { bool commit_zero = false; - _mi_mem_commit(segment, pre_size, &commit_zero, tld->os); + bool ok = _mi_mem_commit(segment, pre_size, &commit_zero, tld->os); if (commit_zero) is_zero = true; + if (!ok) { + return NULL; + } } } } @@ -612,10 +626,12 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ // Allocate the segment from the OS size_t memid; bool mem_large = (!eager_delayed && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy - segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_zero, &memid, os_tld); + bool is_pinned = false; + segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_pinned, &is_zero, &memid, os_tld); if (segment == NULL) return NULL; // failed to allocate if (!commit) { // ensure the initial info is committed + mi_assert_internal(!mem_large && !is_pinned); bool commit_zero = false; bool ok = _mi_mem_commit(segment, pre_size, &commit_zero, tld->os); if (commit_zero) is_zero = true; @@ -626,12 +642,13 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ } } segment->memid = memid; - segment->mem_is_fixed = mem_large; - segment->mem_is_committed = commit; + segment->mem_is_pinned = (mem_large || is_pinned); + segment->mem_is_committed = commit; mi_segments_track_size((long)segment_size, tld); } mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); - + mi_assert_internal(segment->mem_is_pinned ? segment->mem_is_committed : true); + mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL); // tsan if (!pages_still_good) { // zero the segment info (but not the `mem` fields) ptrdiff_t ofs = offsetof(mi_segment_t, next); @@ -715,7 +732,7 @@ static bool mi_segment_page_claim(mi_segment_t* segment, mi_page_t* page, mi_seg mi_pages_reset_remove(page, tld); // check commit if (!page->is_committed) { - mi_assert_internal(!segment->mem_is_fixed); + mi_assert_internal(!segment->mem_is_pinned); mi_assert_internal(!page->is_reset); size_t psize; uint8_t* start = mi_segment_raw_page_start(segment, page, &psize); @@ -732,7 +749,13 @@ static bool mi_segment_page_claim(mi_segment_t* segment, mi_page_t* page, mi_seg segment->used++; // check reset if (page->is_reset) { - mi_page_unreset(segment, page, 0, tld); // todo: only unreset the part that was reset? + mi_assert_internal(!segment->mem_is_pinned); + bool ok = mi_page_unreset(segment, page, 0, tld); + if (!ok) { + page->segment_in_use = false; + segment->used--; + return false; + } } mi_assert_internal(page->segment_in_use); mi_assert_internal(segment->used <= segment->capacity); @@ -791,7 +814,7 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, bool a mi_pages_reset_add(segment, page, tld); } - page->capacity = 0; // after reset there can be zero'd now + page->capacity = 0; // after reset these can be zero'd now page->reserved = 0; } @@ -867,84 +890,97 @@ static mi_tagged_segment_t mi_tagged_segment(mi_segment_t* segment, mi_tagged_se // This is a list of visited abandoned pages that were full at the time. // this list migrates to `abandoned` when that becomes NULL. The use of // this list reduces contention and the rate at which segments are visited. -static mi_decl_cache_align volatile _Atomic(mi_segment_t*) abandoned_visited; // = NULL +static mi_decl_cache_align _Atomic(mi_segment_t*) abandoned_visited; // = NULL // The abandoned page list (tagged as it supports pop) -static mi_decl_cache_align volatile _Atomic(mi_tagged_segment_t) abandoned; // = NULL +static mi_decl_cache_align _Atomic(mi_tagged_segment_t) abandoned; // = NULL + +// Maintain these for debug purposes (these counts may be a bit off) +static mi_decl_cache_align _Atomic(uintptr_t) abandoned_count; +static mi_decl_cache_align _Atomic(uintptr_t) abandoned_visited_count; // We also maintain a count of current readers of the abandoned list // in order to prevent resetting/decommitting segment memory if it might // still be read. -static mi_decl_cache_align volatile _Atomic(uintptr_t) abandoned_readers; // = 0 +static mi_decl_cache_align _Atomic(uintptr_t) abandoned_readers; // = 0 // Push on the visited list static void mi_abandoned_visited_push(mi_segment_t* segment) { mi_assert_internal(segment->thread_id == 0); - mi_assert_internal(segment->abandoned_next == NULL); + mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t,&segment->abandoned_next) == NULL); mi_assert_internal(segment->next == NULL && segment->prev == NULL); mi_assert_internal(segment->used > 0); - mi_segment_t* anext; + mi_segment_t* anext = mi_atomic_load_ptr_relaxed(mi_segment_t, &abandoned_visited); do { - anext = mi_atomic_read_ptr_relaxed(mi_segment_t, &abandoned_visited); - segment->abandoned_next = anext; - } while (!mi_atomic_cas_ptr_weak(mi_segment_t, &abandoned_visited, segment, anext)); + mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, anext); + } while (!mi_atomic_cas_ptr_weak_release(mi_segment_t, &abandoned_visited, &anext, segment)); + mi_atomic_increment_relaxed(&abandoned_visited_count); } // Move the visited list to the abandoned list. static bool mi_abandoned_visited_revisit(void) { // quick check if the visited list is empty - if (mi_atomic_read_ptr_relaxed(mi_segment_t,&abandoned_visited)==NULL) return false; + if (mi_atomic_load_ptr_relaxed(mi_segment_t, &abandoned_visited) == NULL) return false; // grab the whole visited list - mi_segment_t* first = mi_atomic_exchange_ptr(mi_segment_t, &abandoned_visited, NULL); + mi_segment_t* first = mi_atomic_exchange_ptr_acq_rel(mi_segment_t, &abandoned_visited, NULL); if (first == NULL) return false; // first try to swap directly if the abandoned list happens to be NULL - const mi_tagged_segment_t ts = mi_atomic_read_relaxed(&abandoned); mi_tagged_segment_t afirst; + mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned); if (mi_tagged_segment_ptr(ts)==NULL) { + uintptr_t count = mi_atomic_load_relaxed(&abandoned_visited_count); afirst = mi_tagged_segment(first, ts); - if (mi_atomic_cas_strong(&abandoned, afirst, ts)) return true; + if (mi_atomic_cas_strong_acq_rel(&abandoned, &ts, afirst)) { + mi_atomic_add_relaxed(&abandoned_count, count); + mi_atomic_sub_relaxed(&abandoned_visited_count, count); + return true; + } } // find the last element of the visited list: O(n) mi_segment_t* last = first; - while (last->abandoned_next != NULL) { - last = last->abandoned_next; + mi_segment_t* next; + while ((next = mi_atomic_load_ptr_relaxed(mi_segment_t, &last->abandoned_next)) != NULL) { + last = next; } // and atomically prepend to the abandoned list // (no need to increase the readers as we don't access the abandoned segments) - mi_tagged_segment_t anext; + mi_tagged_segment_t anext = mi_atomic_load_relaxed(&abandoned); + uintptr_t count; do { - anext = mi_atomic_read_relaxed(&abandoned); - last->abandoned_next = mi_tagged_segment_ptr(anext); + count = mi_atomic_load_relaxed(&abandoned_visited_count); + mi_atomic_store_ptr_release(mi_segment_t, &last->abandoned_next, mi_tagged_segment_ptr(anext)); afirst = mi_tagged_segment(first, anext); - } while (!mi_atomic_cas_weak(&abandoned, afirst, anext)); + } while (!mi_atomic_cas_weak_release(&abandoned, &anext, afirst)); + mi_atomic_add_relaxed(&abandoned_count, count); + mi_atomic_sub_relaxed(&abandoned_visited_count, count); return true; } // Push on the abandoned list. static void mi_abandoned_push(mi_segment_t* segment) { mi_assert_internal(segment->thread_id == 0); - mi_assert_internal(segment->abandoned_next == NULL); + mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL); mi_assert_internal(segment->next == NULL && segment->prev == NULL); mi_assert_internal(segment->used > 0); - mi_tagged_segment_t ts; mi_tagged_segment_t next; + mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned); do { - ts = mi_atomic_read_relaxed(&abandoned); - segment->abandoned_next = mi_tagged_segment_ptr(ts); + mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, mi_tagged_segment_ptr(ts)); next = mi_tagged_segment(segment, ts); - } while (!mi_atomic_cas_weak(&abandoned, next, ts)); + } while (!mi_atomic_cas_weak_release(&abandoned, &ts, next)); + mi_atomic_increment_relaxed(&abandoned_count); } // Wait until there are no more pending reads on segments that used to be in the abandoned list void _mi_abandoned_await_readers(void) { uintptr_t n; do { - n = mi_atomic_read(&abandoned_readers); + n = mi_atomic_load_acquire(&abandoned_readers); if (n != 0) mi_atomic_yield(); } while (n != 0); } @@ -953,7 +989,7 @@ void _mi_abandoned_await_readers(void) { static mi_segment_t* mi_abandoned_pop(void) { mi_segment_t* segment; // Check efficiently if it is empty (or if the visited list needs to be moved) - mi_tagged_segment_t ts = mi_atomic_read_relaxed(&abandoned); + mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned); segment = mi_tagged_segment_ptr(ts); if (mi_likely(segment == NULL)) { if (mi_likely(!mi_abandoned_visited_revisit())) { // try to swap in the visited list on NULL @@ -964,19 +1000,21 @@ static mi_segment_t* mi_abandoned_pop(void) { // Do a pop. We use a reader count to prevent // a segment to be decommitted while a read is still pending, // and a tagged pointer to prevent A-B-A link corruption. - // (this is called from `memory.c:_mi_mem_free` for example) - mi_atomic_increment(&abandoned_readers); // ensure no segment gets decommitted + // (this is called from `region.c:_mi_mem_free` for example) + mi_atomic_increment_relaxed(&abandoned_readers); // ensure no segment gets decommitted mi_tagged_segment_t next = 0; + ts = mi_atomic_load_acquire(&abandoned); do { - ts = mi_atomic_read_relaxed(&abandoned); segment = mi_tagged_segment_ptr(ts); if (segment != NULL) { - next = mi_tagged_segment(segment->abandoned_next, ts); // note: reads the segment's `abandoned_next` field so should not be decommitted + mi_segment_t* anext = mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next); + next = mi_tagged_segment(anext, ts); // note: reads the segment's `abandoned_next` field so should not be decommitted } - } while (segment != NULL && !mi_atomic_cas_weak(&abandoned, next, ts)); - mi_atomic_decrement(&abandoned_readers); // release reader lock + } while (segment != NULL && !mi_atomic_cas_weak_acq_rel(&abandoned, &ts, next)); + mi_atomic_decrement_relaxed(&abandoned_readers); // release reader lock if (segment != NULL) { - segment->abandoned_next = NULL; + mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL); + mi_atomic_decrement_relaxed(&abandoned_count); } return segment; } @@ -988,7 +1026,7 @@ static mi_segment_t* mi_abandoned_pop(void) { static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(segment->used == segment->abandoned); mi_assert_internal(segment->used > 0); - mi_assert_internal(segment->abandoned_next == NULL); + mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL); mi_assert_expensive(mi_segment_is_valid(segment, tld)); // remove the segment from the free page queue if needed @@ -1001,8 +1039,8 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { _mi_stat_increase(&tld->stats->segments_abandoned, 1); mi_segments_track_size(-((long)segment->segment_size), tld); segment->thread_id = 0; - segment->abandoned_next = NULL; segment->abandoned_visits = 0; + mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL); mi_abandoned_push(segment); } @@ -1066,7 +1104,7 @@ static bool mi_segment_check_free(mi_segment_t* segment, size_t block_size, bool // Reclaim a segment; returns NULL if the segment was freed // set `right_page_reclaimed` to `true` if it reclaimed a page of the right `block_size` that was not full. static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, size_t requested_block_size, bool* right_page_reclaimed, mi_segments_tld_t* tld) { - mi_assert_internal(segment->abandoned_next == NULL); + mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL); if (right_page_reclaimed != NULL) { *right_page_reclaimed = false; } segment->thread_id = _mi_thread_id(); @@ -1283,28 +1321,27 @@ void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block // huge page segments are always abandoned and can be freed immediately by any thread mi_assert_internal(segment->page_kind==MI_PAGE_HUGE); mi_assert_internal(segment == _mi_page_segment(page)); - mi_assert_internal(mi_atomic_read_relaxed(&segment->thread_id)==0); + mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id)==0); // claim it and free mi_heap_t* heap = mi_heap_get_default(); // issue #221; don't use the internal get_default_heap as we need to ensure the thread is initialized. // paranoia: if this it the last reference, the cas should always succeed - if (mi_atomic_cas_strong(&segment->thread_id, heap->thread_id, 0)) { + uintptr_t expected_tid = 0; + if (mi_atomic_cas_strong_acq_rel(&segment->thread_id, &expected_tid, heap->thread_id)) { mi_block_set_next(page, block, page->free); page->free = block; page->used--; page->is_zero = false; mi_assert(page->used == 0); mi_tld_t* tld = heap->tld; - const size_t bsize = mi_page_usable_block_size(page); - if (bsize > MI_HUGE_OBJ_SIZE_MAX) { - _mi_stat_decrease(&tld->stats.giant, bsize); - } - else { - _mi_stat_decrease(&tld->stats.huge, bsize); - } mi_segments_track_size((long)segment->segment_size, &tld->segments); _mi_segment_page_free(page, true, &tld->segments); } +#if (MI_DEBUG!=0) + else { + mi_assert_internal(false); + } +#endif } /* ----------------------------------------------------------- diff --git a/src/static.c b/src/static.c index bf86166d..346aced1 100644 --- a/src/static.c +++ b/src/static.c @@ -4,7 +4,14 @@ This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ +#ifndef _DEFAULT_SOURCE #define _DEFAULT_SOURCE +#endif +#if defined(__sun) +// same remarks as os.c for the static's context. +#undef _XOPEN_SOURCE +#undef _POSIX_C_SOURCE +#endif #include "mimalloc.h" #include "mimalloc-internal.h" @@ -16,6 +23,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "stats.c" #include "random.c" #include "os.c" +#include "bitmap.c" #include "arena.c" #include "region.c" #include "segment.c" diff --git a/src/stats.c b/src/stats.c index 478f8229..091ad173 100644 --- a/src/stats.c +++ b/src/stats.c @@ -11,6 +11,9 @@ terms of the MIT license. A copy of the license can be found in the file #include // fputs, stderr #include // memset +#if defined(_MSC_VER) && (_MSC_VER < 1920) +#pragma warning(disable:4204) // non-constant aggregate initializer +#endif /* ----------------------------------------------------------- Statistics operations @@ -26,13 +29,13 @@ static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) { if (mi_is_in_main(stat)) { // add atomically (for abandoned pages) - mi_atomic_addi64(&stat->current,amount); - if (stat->current > stat->peak) stat->peak = stat->current; // racing.. it's ok + int64_t current = mi_atomic_addi64_relaxed(&stat->current, amount); + mi_atomic_maxi64_relaxed(&stat->peak, current + amount); if (amount > 0) { - mi_atomic_addi64(&stat->allocated,amount); + mi_atomic_addi64_relaxed(&stat->allocated,amount); } else { - mi_atomic_addi64(&stat->freed, -amount); + mi_atomic_addi64_relaxed(&stat->freed, -amount); } } else { @@ -50,8 +53,8 @@ static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) { void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount) { if (mi_is_in_main(stat)) { - mi_atomic_addi64( &stat->count, 1 ); - mi_atomic_addi64( &stat->total, (int64_t)amount ); + mi_atomic_addi64_relaxed( &stat->count, 1 ); + mi_atomic_addi64_relaxed( &stat->total, (int64_t)amount ); } else { stat->count++; @@ -70,17 +73,18 @@ void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount) { // must be thread safe as it is called from stats_merge static void mi_stat_add(mi_stat_count_t* stat, const mi_stat_count_t* src, int64_t unit) { if (stat==src) return; - mi_atomic_addi64( &stat->allocated, src->allocated * unit); - mi_atomic_addi64( &stat->current, src->current * unit); - mi_atomic_addi64( &stat->freed, src->freed * unit); - // peak scores do not work across threads.. - mi_atomic_addi64( &stat->peak, src->peak * unit); + if (src->allocated==0 && src->freed==0) return; + mi_atomic_addi64_relaxed( &stat->allocated, src->allocated * unit); + mi_atomic_addi64_relaxed( &stat->current, src->current * unit); + mi_atomic_addi64_relaxed( &stat->freed, src->freed * unit); + // peak scores do not work across threads.. + mi_atomic_addi64_relaxed( &stat->peak, src->peak * unit); } static void mi_stat_counter_add(mi_stat_counter_t* stat, const mi_stat_counter_t* src, int64_t unit) { if (stat==src) return; - mi_atomic_addi64( &stat->total, src->total * unit); - mi_atomic_addi64( &stat->count, src->count * unit); + mi_atomic_addi64_relaxed( &stat->total, src->total * unit); + mi_atomic_addi64_relaxed( &stat->count, src->count * unit); } // must be thread safe as it is called from stats_merge @@ -99,6 +103,7 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) { mi_stat_add(&stats->malloc, &src->malloc, 1); mi_stat_add(&stats->segments_cache, &src->segments_cache, 1); + mi_stat_add(&stats->normal, &src->normal, 1); mi_stat_add(&stats->huge, &src->huge, 1); mi_stat_add(&stats->giant, &src->giant, 1); @@ -108,12 +113,13 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) { mi_stat_counter_add(&stats->page_no_retire, &src->page_no_retire, 1); mi_stat_counter_add(&stats->searches, &src->searches, 1); + mi_stat_counter_add(&stats->normal_count, &src->normal_count, 1); mi_stat_counter_add(&stats->huge_count, &src->huge_count, 1); mi_stat_counter_add(&stats->giant_count, &src->giant_count, 1); #if MI_STAT>1 for (size_t i = 0; i <= MI_BIN_HUGE; i++) { - if (src->normal[i].allocated > 0 || src->normal[i].freed > 0) { - mi_stat_add(&stats->normal[i], &src->normal[i], 1); + if (src->normal_bins[i].allocated > 0 || src->normal_bins[i].freed > 0) { + mi_stat_add(&stats->normal_bins[i], &src->normal_bins[i], 1); } } #endif @@ -145,7 +151,7 @@ static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, void* const int64_t tens = (n / (divider/10)); const long whole = (long)(tens/10); const long frac1 = (long)(tens%10); - snprintf(buf, len, "%ld.%ld %s%s", whole, frac1, magnitude, suffix); + snprintf(buf, len, "%ld.%ld %s%s", whole, (frac1 < 0 ? -frac1 : frac1), magnitude, suffix); } _mi_fprintf(out, arg, (fmt==NULL ? "%11s" : fmt), buf); } @@ -166,6 +172,7 @@ static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t mi_print_amount(stat->peak, unit, out, arg); mi_print_amount(stat->allocated, unit, out, arg); mi_print_amount(stat->freed, unit, out, arg); + mi_print_amount(stat->current, unit, out, arg); mi_print_amount(unit, 1, out, arg); mi_print_count(stat->allocated, unit, out, arg); if (stat->allocated > stat->freed) @@ -177,6 +184,7 @@ static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t mi_print_amount(stat->peak, -1, out, arg); mi_print_amount(stat->allocated, -1, out, arg); mi_print_amount(stat->freed, -1, out, arg); + mi_print_amount(stat->current, -1, out, arg); if (unit==-1) { _mi_fprintf(out, arg, "%22s", ""); } @@ -192,6 +200,8 @@ static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t else { mi_print_amount(stat->peak, 1, out, arg); mi_print_amount(stat->allocated, 1, out, arg); + _mi_fprintf(out, arg, "%11s", " "); // no freed + mi_print_amount(stat->current, 1, out, arg); _mi_fprintf(out, arg, "\n"); } } @@ -211,24 +221,21 @@ static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* static void mi_print_header(mi_output_fun* out, void* arg ) { - _mi_fprintf(out, arg, "%10s: %10s %10s %10s %10s %10s\n", "heap stats", "peak ", "total ", "freed ", "unit ", "count "); + _mi_fprintf(out, arg, "%10s: %10s %10s %10s %10s %10s %10s\n", "heap stats", "peak ", "total ", "freed ", "current ", "unit ", "count "); } #if MI_STAT>1 -static void mi_stats_print_bins(mi_stat_count_t* all, const mi_stat_count_t* bins, size_t max, const char* fmt, mi_output_fun* out, void* arg) { +static void mi_stats_print_bins(const mi_stat_count_t* bins, size_t max, const char* fmt, mi_output_fun* out, void* arg) { bool found = false; char buf[64]; for (size_t i = 0; i <= max; i++) { if (bins[i].allocated > 0) { found = true; int64_t unit = _mi_bin_size((uint8_t)i); - snprintf(buf, 64, "%s %3zu", fmt, i); - mi_stat_add(all, &bins[i], unit); + snprintf(buf, 64, "%s %3lu", fmt, (long)i); mi_stat_print(&bins[i], buf, unit, out, arg); } } - //snprintf(buf, 64, "%s all", fmt); - //mi_stat_print(all, buf, 1); if (found) { _mi_fprintf(out, arg, "\n"); mi_print_header(out, arg); @@ -272,31 +279,34 @@ static void mi_buffered_out(const char* msg, void* arg) { // Print statistics //------------------------------------------------------------ -static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit); +static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults); -static void _mi_stats_print(mi_stats_t* stats, mi_msecs_t elapsed, mi_output_fun* out0, void* arg0) mi_attr_noexcept { +static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) mi_attr_noexcept { // wrap the output function to be line buffered char buf[256]; - buffered_t buffer = { out0, arg0, buf, 0, 255 }; + buffered_t buffer = { out0, arg0, NULL, 0, 255 }; + buffer.buf = buf; mi_output_fun* out = &mi_buffered_out; void* arg = &buffer; // and print using that mi_print_header(out,arg); #if MI_STAT>1 - mi_stat_count_t normal = { 0,0,0,0 }; - mi_stats_print_bins(&normal, stats->normal, MI_BIN_HUGE, "normal",out,arg); - mi_stat_print(&normal, "normal", 1, out, arg); + mi_stats_print_bins(stats->normal_bins, MI_BIN_HUGE, "normal",out,arg); + #endif + #if MI_STAT + mi_stat_print(&stats->normal, "normal", (stats->normal_count.count == 0 ? 1 : -(stats->normal.allocated / stats->normal_count.count)), out, arg); mi_stat_print(&stats->huge, "huge", (stats->huge_count.count == 0 ? 1 : -(stats->huge.allocated / stats->huge_count.count)), out, arg); mi_stat_print(&stats->giant, "giant", (stats->giant_count.count == 0 ? 1 : -(stats->giant.allocated / stats->giant_count.count)), out, arg); mi_stat_count_t total = { 0,0,0,0 }; - mi_stat_add(&total, &normal, 1); + mi_stat_add(&total, &stats->normal, 1); mi_stat_add(&total, &stats->huge, 1); mi_stat_add(&total, &stats->giant, 1); mi_stat_print(&total, "total", 1, out, arg); - _mi_fprintf(out, arg, "malloc requested: "); - mi_print_amount(stats->malloc.allocated, 1, out, arg); - _mi_fprintf(out, arg, "\n\n"); + #endif + #if MI_STAT>1 + mi_stat_print(&stats->malloc, "malloc req", 1, out, arg); + _mi_fprintf(out, arg, "\n"); #endif mi_stat_print(&stats->reserved, "reserved", 1, out, arg); mi_stat_print(&stats->committed, "committed", 1, out, arg); @@ -314,25 +324,28 @@ static void _mi_stats_print(mi_stats_t* stats, mi_msecs_t elapsed, mi_output_fun mi_stat_print(&stats->threads, "threads", -1, out, arg); mi_stat_counter_print_avg(&stats->searches, "searches", out, arg); _mi_fprintf(out, arg, "%10s: %7i\n", "numa nodes", _mi_os_numa_node_count()); - if (elapsed > 0) _mi_fprintf(out, arg, "%10s: %7ld.%03ld s\n", "elapsed", elapsed/1000, elapsed%1000); - + + mi_msecs_t elapsed; mi_msecs_t user_time; mi_msecs_t sys_time; + size_t current_rss; size_t peak_rss; - size_t page_faults; - size_t page_reclaim; + size_t current_commit; size_t peak_commit; - mi_process_info(&user_time, &sys_time, &peak_rss, &page_faults, &page_reclaim, &peak_commit); - _mi_fprintf(out, arg, "%10s: user: %ld.%03ld s, system: %ld.%03ld s, faults: %lu, reclaims: %lu, rss: ", "process", user_time/1000, user_time%1000, sys_time/1000, sys_time%1000, (unsigned long)page_faults, (unsigned long)page_reclaim ); + size_t page_faults; + mi_stat_process_info(&elapsed, &user_time, &sys_time, ¤t_rss, &peak_rss, ¤t_commit, &peak_commit, &page_faults); + _mi_fprintf(out, arg, "%10s: %7ld.%03ld s\n", "elapsed", elapsed/1000, elapsed%1000); + _mi_fprintf(out, arg, "%10s: user: %ld.%03ld s, system: %ld.%03ld s, faults: %lu, rss: ", "process", + user_time/1000, user_time%1000, sys_time/1000, sys_time%1000, (unsigned long)page_faults ); mi_printf_amount((int64_t)peak_rss, 1, out, arg, "%s"); if (peak_commit > 0) { - _mi_fprintf(out, arg, ", commit charge: "); + _mi_fprintf(out, arg, ", commit: "); mi_printf_amount((int64_t)peak_commit, 1, out, arg, "%s"); } _mi_fprintf(out, arg, "\n"); } -static mi_msecs_t mi_time_start; // = 0 +static mi_msecs_t mi_process_start; // = 0 static mi_stats_t* mi_stats_get_default(void) { mi_heap_t* heap = mi_heap_get_default(); @@ -350,7 +363,7 @@ void mi_stats_reset(void) mi_attr_noexcept { mi_stats_t* stats = mi_stats_get_default(); if (stats != &_mi_stats_main) { memset(stats, 0, sizeof(mi_stats_t)); } memset(&_mi_stats_main, 0, sizeof(mi_stats_t)); - mi_time_start = _mi_clock_start(); + if (mi_process_start == 0) { mi_process_start = _mi_clock_start(); }; } void mi_stats_merge(void) mi_attr_noexcept { @@ -362,9 +375,8 @@ void _mi_stats_done(mi_stats_t* stats) { // called from `mi_thread_done` } void mi_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept { - mi_msecs_t elapsed = _mi_clock_end(mi_time_start); mi_stats_merge_from(mi_stats_get_default()); - _mi_stats_print(&_mi_stats_main, elapsed, out, arg); + _mi_stats_print(&_mi_stats_main, out, arg); } void mi_stats_print(void* out) mi_attr_noexcept { @@ -373,8 +385,7 @@ void mi_stats_print(void* out) mi_attr_noexcept { } void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept { - mi_msecs_t elapsed = _mi_clock_end(mi_time_start); - _mi_stats_print(mi_stats_get_default(), elapsed, out, arg); + _mi_stats_print(mi_stats_get_default(), out, arg); } @@ -382,7 +393,7 @@ void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept { // Basic timer for convenience; use milli-seconds to avoid doubles // ---------------------------------------------------------------- #ifdef _WIN32 -#include +#include static mi_msecs_t mi_to_msecs(LARGE_INTEGER t) { static LARGE_INTEGER mfreq; // = 0 if (mfreq.QuadPart == 0LL) { @@ -437,7 +448,7 @@ mi_msecs_t _mi_clock_end(mi_msecs_t start) { // -------------------------------------------------------- #if defined(_WIN32) -#include +#include #include #pragma comment(lib,"psapi.lib") @@ -448,7 +459,10 @@ static mi_msecs_t filetime_msecs(const FILETIME* ftime) { mi_msecs_t msecs = (i.QuadPart / 10000); // FILETIME is in 100 nano seconds return msecs; } -static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) { + +static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) +{ + *elapsed = _mi_clock_end(mi_process_start); FILETIME ct; FILETIME ut; FILETIME st; @@ -456,16 +470,16 @@ static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_r GetProcessTimes(GetCurrentProcess(), &ct, &et, &st, &ut); *utime = filetime_msecs(&ut); *stime = filetime_msecs(&st); - PROCESS_MEMORY_COUNTERS info; GetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info)); - *peak_rss = (size_t)info.PeakWorkingSetSize; - *page_faults = (size_t)info.PageFaultCount; - *peak_commit = (size_t)info.PeakPagefileUsage; - *page_reclaim = 0; + *current_rss = (size_t)info.WorkingSetSize; + *peak_rss = (size_t)info.PeakWorkingSetSize; + *current_commit = (size_t)info.PagefileUsage; + *peak_commit = (size_t)info.PeakPagefileUsage; + *page_faults = (size_t)info.PageFaultCount; } -#elif defined(__unix__) || defined(__unix) || defined(unix) || (defined(__APPLE__) && defined(__MACH__)) +#elif defined(__unix__) || defined(__unix) || defined(unix) || (defined(__APPLE__) && defined(__MACH__)) || defined(__HAIKU__) #include #include #include @@ -474,23 +488,48 @@ static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_r #include #endif +#if defined(__HAIKU__) +#include +#endif + static mi_msecs_t timeval_secs(const struct timeval* tv) { return ((mi_msecs_t)tv->tv_sec * 1000L) + ((mi_msecs_t)tv->tv_usec / 1000L); } -static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) { +static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) +{ + *elapsed = _mi_clock_end(mi_process_start); struct rusage rusage; getrusage(RUSAGE_SELF, &rusage); -#if defined(__APPLE__) && defined(__MACH__) - *peak_rss = rusage.ru_maxrss; -#else - *peak_rss = rusage.ru_maxrss * 1024; -#endif - *page_faults = rusage.ru_majflt; - *page_reclaim = rusage.ru_minflt; - *peak_commit = 0; *utime = timeval_secs(&rusage.ru_utime); *stime = timeval_secs(&rusage.ru_stime); +#if !defined(__HAIKU__) + *page_faults = rusage.ru_majflt; +#endif + // estimate commit using our stats + *peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak)); + *current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current)); + *current_rss = *current_commit; // estimate +#if defined(__HAIKU__) + // Haiku does not have (yet?) a way to + // get these stats per process + thread_info tid; + area_info mem; + ssize_t c; + get_thread_info(find_thread(0), &tid); + while (get_next_area_info(tid.team, &c, &mem) == B_OK) { + *peak_rss += mem.ram_size; + } +#elif defined(__APPLE__) && defined(__MACH__) + *peak_rss = rusage.ru_maxrss; // BSD reports in bytes + struct mach_task_basic_info info; + mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT; + if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info, &infoCount) == KERN_SUCCESS) { + *current_rss = (size_t)info.resident_size; + } +#else + *peak_rss = rusage.ru_maxrss * 1024; // Linux reports in KiB +#endif } #else @@ -499,12 +538,38 @@ static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_r #pragma message("define a way to get process info") #endif -static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) { - *peak_rss = 0; +static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) +{ + *elapsed = _mi_clock_end(mi_process_start); + *peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak)); + *current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current)); + *peak_rss = *peak_commit; + *current_rss = *current_commit; *page_faults = 0; - *page_reclaim = 0; - *peak_commit = 0; *utime = 0; *stime = 0; } #endif + + +mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept +{ + mi_msecs_t elapsed = 0; + mi_msecs_t utime = 0; + mi_msecs_t stime = 0; + size_t current_rss0 = 0; + size_t peak_rss0 = 0; + size_t current_commit0 = 0; + size_t peak_commit0 = 0; + size_t page_faults0 = 0; + mi_stat_process_info(&elapsed,&utime, &stime, ¤t_rss0, &peak_rss0, ¤t_commit0, &peak_commit0, &page_faults0); + if (elapsed_msecs!=NULL) *elapsed_msecs = (elapsed < 0 ? 0 : (elapsed < (mi_msecs_t)PTRDIFF_MAX ? (size_t)elapsed : PTRDIFF_MAX)); + if (user_msecs!=NULL) *user_msecs = (utime < 0 ? 0 : (utime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)utime : PTRDIFF_MAX)); + if (system_msecs!=NULL) *system_msecs = (stime < 0 ? 0 : (stime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)stime : PTRDIFF_MAX)); + if (current_rss!=NULL) *current_rss = current_rss0; + if (peak_rss!=NULL) *peak_rss = peak_rss0; + if (current_commit!=NULL) *current_commit = current_commit0; + if (peak_commit!=NULL) *peak_commit = peak_commit0; + if (page_faults!=NULL) *page_faults = page_faults0; +} + diff --git a/test/main-override-static.c b/test/main-override-static.c index 9243fd21..221db7e8 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -11,38 +11,54 @@ static void double_free1(); static void double_free2(); static void corrupt_free(); static void block_overflow1(); +static void invalid_free(); +static void test_aslr(void); +static void test_process_info(void); +static void test_reserved(void); +static void negative_stat(void); int main() { mi_version(); - + mi_stats_reset(); // detect double frees and heap corruption // double_free1(); // double_free2(); // corrupt_free(); - block_overflow1(); - + // block_overflow1(); + // test_aslr(); + // invalid_free(); + // test_reserved(); + // negative_stat(); + void* p1 = malloc(78); void* p2 = malloc(24); free(p1); p1 = mi_malloc(8); - //char* s = strdup("hello\n"); + char* s = strdup("hello\n"); free(p2); + p2 = malloc(16); p1 = realloc(p1, 32); free(p1); free(p2); - //free(s); - //mi_collect(true); - + free(s); + /* now test if override worked by allocating/freeing across the api's*/ //p1 = mi_malloc(32); //free(p1); //p2 = malloc(32); //mi_free(p2); + mi_collect(true); mi_stats_print(NULL); + // test_process_info(); return 0; } +static void invalid_free() { + free((void*)0xBADBEEF); + realloc((void*)0xBADBEEF,10); +} + static void block_overflow1() { uint8_t* p = (uint8_t*)mi_malloc(17); p[18] = 0; @@ -114,3 +130,53 @@ static void corrupt_free() { malloc(SZ); } } + +static void test_aslr(void) { + void* p[256]; + p[0] = malloc(378200); + p[1] = malloc(1134626); + printf("p1: %p, p2: %p\n", p[0], p[1]); +} + +static void test_process_info(void) { + size_t elapsed = 0; + size_t user_msecs = 0; + size_t system_msecs = 0; + size_t current_rss = 0; + size_t peak_rss = 0; + size_t current_commit = 0; + size_t peak_commit = 0; + size_t page_faults = 0; + for (int i = 0; i < 100000; i++) { + void* p = calloc(100,10); + free(p); + } + mi_process_info(&elapsed, &user_msecs, &system_msecs, ¤t_rss, &peak_rss, ¤t_commit, &peak_commit, &page_faults); + printf("\n\n*** process info: elapsed %3zd.%03zd s, user: %3zd.%03zd s, rss: %zd b, commit: %zd b\n\n", elapsed/1000, elapsed%1000, user_msecs/1000, user_msecs%1000, peak_rss, peak_commit); +} + +static void test_reserved(void) { +#define KiB 1024ULL +#define MiB (KiB*KiB) +#define GiB (MiB*KiB) + mi_reserve_os_memory(4*GiB, false, true); + void* p1 = malloc(100); + void* p2 = malloc(100000); + void* p3 = malloc(2*GiB); + void* p4 = malloc(1*GiB + 100000); + free(p1); + free(p2); + free(p3); + p3 = malloc(1*GiB); + free(p4); +} + + + +static void negative_stat(void) { + int* p = mi_malloc(60000); + mi_stats_print_out(NULL, NULL); + *p = 100; + mi_free(p); + mi_stats_print_out(NULL, NULL); +} \ No newline at end of file diff --git a/test/main-override.cpp b/test/main-override.cpp index 8743fd0f..fe5403d1 100644 --- a/test/main-override.cpp +++ b/test/main-override.cpp @@ -14,10 +14,12 @@ #include #include +#ifdef _WIN32 #include +#endif #ifdef _WIN32 -#include +#include static void msleep(unsigned long msecs) { Sleep(msecs); } #else #include diff --git a/test/test-stress.c b/test/test-stress.c index 7d8993a0..c4247abe 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -20,7 +20,6 @@ terms of the MIT license. #include #include #include -#include // > mimalloc-test-stress [THREADS] [SCALE] [ITER] // @@ -43,6 +42,7 @@ static size_t use_one_size = 0; // use single object size of `N * s #define custom_realloc(p,s) realloc(p,s) #define custom_free(p) free(p) #else +#include #define custom_calloc(n,s) mi_calloc(n,s) #define custom_realloc(p,s) mi_realloc(p,s) #define custom_free(p) mi_free(p) @@ -123,7 +123,7 @@ static void free_items(void* p) { static void stress(intptr_t tid) { //bench_start_thread(); - uintptr_t r = (tid * 43); // rand(); + uintptr_t r = ((tid + 1) * 43); // rand(); const size_t max_item_shift = 5; // 128 const size_t max_item_retained_shift = max_item_shift + 2; size_t allocs = 100 * ((size_t)SCALE) * (tid % 8 + 1); // some threads do more @@ -189,7 +189,7 @@ static void test_stress(void) { } } // mi_collect(false); -#ifndef NDEBUG +#if !defined(NDEBUG) || defined(MI_TSAN) if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); } #endif } @@ -217,7 +217,7 @@ static void test_leak(void) { } #endif -int main(int argc, char** argv) { +int main(int argc, char** argv) { // > mimalloc-test-stress [THREADS] [SCALE] [ITER] if (argc >= 2) { char* end; @@ -235,6 +235,7 @@ int main(int argc, char** argv) { if (n > 0) ITER = n; } printf("Using %d threads with a %d%% load-per-thread and %d iterations\n", THREADS, SCALE, ITER); + //mi_reserve_os_memory(1024*1024*1024ULL, false, true); //int res = mi_reserve_huge_os_pages(4,1); //printf("(reserve huge: %i\n)", res); @@ -250,7 +251,9 @@ int main(int argc, char** argv) { #endif // mi_collect(true); +#ifndef USE_STD_MALLOC mi_stats_print(NULL); +#endif //bench_end_program(); return 0; } @@ -260,7 +263,7 @@ static void (*thread_entry_fun)(intptr_t) = &stress; #ifdef _WIN32 -#include +#include static DWORD WINAPI thread_entry(LPVOID param) { thread_entry_fun((intptr_t)param); @@ -272,7 +275,7 @@ static void run_os_threads(size_t nthreads, void (*fun)(intptr_t)) { DWORD* tids = (DWORD*)custom_calloc(nthreads,sizeof(DWORD)); HANDLE* thandles = (HANDLE*)custom_calloc(nthreads,sizeof(HANDLE)); for (uintptr_t i = 0; i < nthreads; i++) { - thandles[i] = CreateThread(0, 4096, &thread_entry, (void*)(i), 0, &tids[i]); + thandles[i] = CreateThread(0, 8*1024, &thread_entry, (void*)(i), 0, &tids[i]); } for (size_t i = 0; i < nthreads; i++) { WaitForSingleObject(thandles[i], INFINITE); @@ -305,7 +308,7 @@ static void run_os_threads(size_t nthreads, void (*fun)(intptr_t)) { pthread_t* threads = (pthread_t*)custom_calloc(nthreads,sizeof(pthread_t)); memset(threads, 0, sizeof(pthread_t) * nthreads); //pthread_setconcurrency(nthreads); - for (uintptr_t i = 0; i < nthreads; i++) { + for (size_t i = 0; i < nthreads; i++) { pthread_create(&threads[i], NULL, &thread_entry, (void*)i); } for (size_t i = 0; i < nthreads; i++) {