diff --git a/CMakeLists.txt b/CMakeLists.txt index 467fad95..b60e64a4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.0) project(libmimalloc C CXX) -include("cmake/mimalloc-config-version.cmake") + set(CMAKE_C_STANDARD 11) set(CMAKE_CXX_STANDARD 17) @@ -14,10 +14,14 @@ option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanis option(MI_BUILD_TESTS "Build test executables" ON) option(MI_CHECK_FULL "Use full internal invariant checking in DEBUG mode (deprecated, use MI_DEBUG_FULL instead)" OFF) +include("cmake/mimalloc-config-version.cmake") + set(mi_sources src/stats.c + src/random.c src/os.c - src/memory.c + src/arena.c + src/region.c src/segment.c src/page.c src/alloc.c @@ -50,7 +54,7 @@ endif() # Process options # ----------------------------------------------------------------------------- -if(CMAKE_C_COMPILER_ID MATCHES "MSVC") +if(CMAKE_C_COMPILER_ID MATCHES "MSVC|Intel") set(MI_USE_CXX "ON") endif() @@ -92,27 +96,38 @@ endif() if(MI_USE_CXX MATCHES "ON") message(STATUS "Use the C++ compiler to compile (MI_USE_CXX=ON)") set_source_files_properties(${mi_sources} PROPERTIES LANGUAGE CXX ) - set_source_files_properties(src/static.c test/test-api.c PROPERTIES LANGUAGE CXX ) + set_source_files_properties(src/static.c test/test-api.c test/test-stress PROPERTIES LANGUAGE CXX ) + if(CMAKE_CXX_COMPILER_ID MATCHES "AppleClang|Clang") + list(APPEND mi_cflags -Wno-deprecated) + endif() + if(CMAKE_CXX_COMPILER_ID MATCHES "Intel") + list(APPEND mi_cflags -Kc++) + endif() endif() # Compiler flags if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU") - list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas) + list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas -fvisibility=hidden) + if(CMAKE_C_COMPILER_ID MATCHES "GNU") + list(APPEND mi_cflags -Wno-invalid-memory-model) + endif() +endif() + +if(CMAKE_C_COMPILER_ID MATCHES "Intel") + list(APPEND mi_cflags -Wall -fvisibility=hidden) +endif() + +if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel") if(MI_LOCAL_DYNAMIC_TLS MATCHES "ON") list(APPEND mi_cflags -ftls-model=local-dynamic) else() list(APPEND mi_cflags -ftls-model=initial-exec) endif() - if(CMAKE_C_COMPILER_ID MATCHES "GNU") - list(APPEND mi_cflags -Wno-invalid-memory-model) - list(APPEND mi_cflags -fvisibility=hidden) - list(APPEND mi_cflags -fbranch-target-load-optimize) - endif() endif() # extra needed libraries if(WIN32) - list(APPEND mi_libraries psapi shell32 user32) + list(APPEND mi_libraries psapi shell32 user32 bcrypt) else() list(APPEND mi_libraries pthread) find_library(LIBRT rt) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 41d67f86..6c7bad96 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -13,16 +13,31 @@ jobs: pool: vmImage: windows-2019 + strategy: + matrix: + Debug: + BuildType: debug + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON + Release: + BuildType: release + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release + Secure: + BuildType: secure + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON steps: - task: CMake@1 inputs: - workingDirectory: 'build' - cmakeArgs: .. + workingDirectory: $(BuildType) + cmakeArgs: .. $(cmakeExtraArgs) - task: MSBuild@1 inputs: - solution: build/libmimalloc.sln - - upload: $(Build.SourcesDirectory)/build - artifact: windows + solution: $(BuildType)/libmimalloc.sln + - script: | + cd $(BuildType) + ctest + displayName: CTest + - upload: $(Build.SourcesDirectory)/$(BuildType) + artifact: mimalloc-windows-$(BuildType) - job: displayName: Linux @@ -46,6 +61,11 @@ jobs: CXX: g++ BuildType: secure cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON + Debug++: + CC: gcc + CXX: g++ + BuildType: debug-cxx + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -DMI_USE_CXX=ON Debug Clang: CC: clang CXX: clang++ @@ -61,32 +81,47 @@ jobs: CXX: clang++ BuildType: secure-clang cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON - + Debug++ Clang: + CC: clang + CXX: clang++ + BuildType: debug-clang-cxx + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -DMI_USE_CXX=ON steps: - task: CMake@1 inputs: workingDirectory: $(BuildType) cmakeArgs: .. $(cmakeExtraArgs) - - script: make -j$(nproc) -C $(BuildType) displayName: Make - - script: make test -C $(BuildType) - displayName: Ctest - + displayName: CTest - upload: $(Build.SourcesDirectory)/$(BuildType) - artifact: ubuntu-$(BuildType) + artifact: mimalloc-ubuntu-$(BuildType) - job: displayName: macOS pool: vmImage: macOS-10.14 + strategy: + matrix: + Debug: + BuildType: debug + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON + Release: + BuildType: release + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release + Secure: + BuildType: secure + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON steps: - task: CMake@1 inputs: - workingDirectory: 'build' - cmakeArgs: .. - - script: make -j$(sysctl -n hw.ncpu) -C build - - upload: $(Build.SourcesDirectory)/build - artifact: macos + workingDirectory: $(BuildType) + cmakeArgs: .. $(cmakeExtraArgs) + - script: make -j$(sysctl -n hw.ncpu) -C $(BuildType) + displayName: Make + - script: make test -C $(BuildType) + displayName: CTest + - upload: $(Build.SourcesDirectory)/$(BuildType) + artifact: mimalloc-macos-$(BuildType) diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake index 9d78b5a0..5137be80 100644 --- a/cmake/mimalloc-config-version.cmake +++ b/cmake/mimalloc-config-version.cmake @@ -1,5 +1,5 @@ set(mi_version_major 1) -set(mi_version_minor 2) +set(mi_version_minor 5) set(mi_version ${mi_version_major}.${mi_version_minor}) set(PACKAGE_VERSION ${mi_version}) diff --git a/doc/bench-c5-18xlarge-2020-01-20-a.svg b/doc/bench-c5-18xlarge-2020-01-20-a.svg new file mode 100644 index 00000000..0e550935 --- /dev/null +++ b/doc/bench-c5-18xlarge-2020-01-20-a.svg @@ -0,0 +1,886 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/bench-c5-18xlarge-2020-01-20-b.svg b/doc/bench-c5-18xlarge-2020-01-20-b.svg new file mode 100644 index 00000000..22bfa5c2 --- /dev/null +++ b/doc/bench-c5-18xlarge-2020-01-20-b.svg @@ -0,0 +1,1184 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/bench-c5-18xlarge-2020-01-20-rss-a.svg b/doc/bench-c5-18xlarge-2020-01-20-rss-a.svg new file mode 100644 index 00000000..6b15ebe5 --- /dev/null +++ b/doc/bench-c5-18xlarge-2020-01-20-rss-a.svg @@ -0,0 +1,756 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/bench-c5-18xlarge-2020-01-20-rss-b.svg b/doc/bench-c5-18xlarge-2020-01-20-rss-b.svg new file mode 100644 index 00000000..e3eb774c --- /dev/null +++ b/doc/bench-c5-18xlarge-2020-01-20-rss-b.svg @@ -0,0 +1,1027 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/bench-r5a-12xlarge-2020-01-16-a.svg b/doc/bench-r5a-12xlarge-2020-01-16-a.svg new file mode 100644 index 00000000..b110ff47 --- /dev/null +++ b/doc/bench-r5a-12xlarge-2020-01-16-a.svg @@ -0,0 +1,867 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/bench-r5a-12xlarge-2020-01-16-b.svg b/doc/bench-r5a-12xlarge-2020-01-16-b.svg new file mode 100644 index 00000000..f7a3287e --- /dev/null +++ b/doc/bench-r5a-12xlarge-2020-01-16-b.svg @@ -0,0 +1,1156 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/doxyfile b/doc/doxyfile index 11d71667..91adbeb8 100644 --- a/doc/doxyfile +++ b/doc/doxyfile @@ -38,7 +38,7 @@ PROJECT_NAME = mi-malloc # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = 1.0 +PROJECT_NUMBER = 1.4 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a @@ -1235,7 +1235,7 @@ HTML_EXTRA_STYLESHEET = mimalloc-doxygen.css # files will be copied as-is; there are no commands or markers available. # This tag requires that the tag GENERATE_HTML is set to YES. -HTML_EXTRA_FILES = +HTML_EXTRA_FILES = # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen # will adjust the colors in the style sheet and background images according to diff --git a/doc/mimalloc-doc.h b/doc/mimalloc-doc.h index 5ad5a1e6..3f24a623 100644 --- a/doc/mimalloc-doc.h +++ b/doc/mimalloc-doc.h @@ -26,7 +26,7 @@ without code changes, for example, on Unix you can use it as: Notable aspects of the design include: -- __small and consistent__: the library is less than 3500 LOC using simple and +- __small and consistent__: the library is less than 6k LOC using simple and consistent data structures. This makes it very suitable to integrate and adapt in other projects. For runtime systems it provides hooks for a monotonic _heartbeat_ and deferred freeing (for @@ -74,6 +74,8 @@ Further information: - \ref typed - \ref analysis - \ref options +- \ref posix +- \ref cpp */ @@ -297,10 +299,17 @@ size_t mi_good_size(size_t size); void mi_collect(bool force); /// Print the main statistics. -/// @param out Output function. Use \a NULL for outputting to \a stderr. +/// @param out Ignored, outputs to the registered output function or stderr by default. /// /// Most detailed when using a debug build. -void mi_stats_print(mi_output_fun* out); +void mi_stats_print(void* out); + +/// Print the main statistics. +/// @param out An output function or \a NULL for the default. +/// @param arg Optional argument passed to \a out (if not \a NULL) +/// +/// Most detailed when using a debug build. +void mi_stats_print(mi_output_fun* out, void* arg); /// Reset statistics. void mi_stats_reset(void); @@ -320,20 +329,23 @@ void mi_thread_init(void); void mi_thread_done(void); /// Print out heap statistics for this thread. -/// @param out Output function. Use \a NULL for outputting to \a stderr. +/// @param out An output function or \a NULL for the default. +/// @param arg Optional argument passed to \a out (if not \a NULL) /// /// Most detailed when using a debug build. -void mi_thread_stats_print(mi_output_fun* out); +void mi_thread_stats_print_out(mi_output_fun* out, void* arg); /// Type of deferred free functions. /// @param force If \a true all outstanding items should be freed. /// @param heartbeat A monotonically increasing count. +/// @param arg Argument that was passed at registration to hold extra state. /// /// @see mi_register_deferred_free -typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat); +typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat, void* arg); /// Register a deferred free function. /// @param deferred_free Address of a deferred free-ing function or \a NULL to unregister. +/// @param arg Argument that will be passed on to the deferred free function. /// /// Some runtime systems use deferred free-ing, for example when using /// reference counting to limit the worst case free time. @@ -346,20 +358,46 @@ typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat); /// to be called deterministically after some number of allocations /// (regardless of freeing or available free memory). /// At most one \a deferred_free function can be active. -void mi_register_deferred_free(mi_deferred_free_fun* deferred_free); +void mi_register_deferred_free(mi_deferred_free_fun* deferred_free, void* arg); /// Type of output functions. /// @param msg Message to output. +/// @param arg Argument that was passed at registration to hold extra state. /// /// @see mi_register_output() -typedef void (mi_output_fun)(const char* msg); +typedef void (mi_output_fun)(const char* msg, void* arg); /// Register an output function. -/// @param out The output function, use `NULL` to output to stdout. +/// @param out The output function, use `NULL` to output to stderr. +/// @param arg Argument that will be passed on to the output function. /// /// The `out` function is called to output any information from mimalloc, /// like verbose or warning messages. -void mi_register_output(mi_output_fun* out) mi_attr_noexcept; +void mi_register_output(mi_output_fun* out, void* arg); + +/// Type of error callback functions. +/// @param err Error code (see mi_register_error() for a complete list). +/// @param arg Argument that was passed at registration to hold extra state. +/// +/// @see mi_register_error() +typedef void (mi_error_fun)(int err, void* arg); + +/// Register an error callback function. +/// @param errfun The error function that is called on an error (use \a NULL for default) +/// @param arg Extra argument that will be passed on to the error function. +/// +/// The \a errfun function is called on an error in mimalloc after emitting +/// an error message (through the output function). It as always legal to just +/// return from the \a errfun function in which case allocation functions generally +/// return \a NULL or ignore the condition. The default function only calls abort() +/// when compiled in secure mode with an \a EFAULT error. The possible error +/// codes are: +/// * \a EAGAIN: Double free was detected (only in debug and secure mode). +/// * \a EFAULT: Corrupted free list or meta-data was detected (only in debug and secure mode). +/// * \a ENOMEM: Not enough memory available to satisfy the request. +/// * \a EOVERFLOW: Too large a request, for example in mi_calloc(), the \a count and \a size parameters are too large. +/// * \a EINVAL: Trying to free or re-allocate an invalid pointer. +void mi_register_error(mi_error_fun* errfun, void* arg); /// Is a pointer part of our heap? /// @param p The pointer to check. @@ -367,18 +405,35 @@ void mi_register_output(mi_output_fun* out) mi_attr_noexcept; /// This function is relatively fast. bool mi_is_in_heap_region(const void* p); -/// Reserve \a pages of huge OS pages (1GiB) but stops after at most `max_secs` seconds. + +/// Reserve \a pages of huge OS pages (1GiB) evenly divided over \a numa_nodes nodes, +/// but stops after at most `timeout_msecs` seconds. /// @param pages The number of 1GiB pages to reserve. -/// @param max_secs Maximum number of seconds to try reserving. -/// @param pages_reserved If not \a NULL, it is set to the actual number of pages that were reserved. +/// @param numa_nodes The number of nodes do evenly divide the pages over, or 0 for using the actual number of NUMA nodes. +/// @param timeout_msecs Maximum number of milli-seconds to try reserving, or 0 for no timeout. /// @returns 0 if successfull, \a ENOMEM if running out of memory, or \a ETIMEDOUT if timed out. /// /// The reserved memory is used by mimalloc to satisfy allocations. -/// May quit before \a max_secs are expired if it estimates it will take more than -/// 1.5 times \a max_secs. The time limit is needed because on some operating systems +/// May quit before \a timeout_msecs are expired if it estimates it will take more than +/// 1.5 times \a timeout_msecs. The time limit is needed because on some operating systems /// it can take a long time to reserve contiguous memory if the physical memory is /// fragmented. -int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved); +int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs); + +/// Reserve \a pages of huge OS pages (1GiB) at a specific \a numa_node, +/// but stops after at most `timeout_msecs` seconds. +/// @param pages The number of 1GiB pages to reserve. +/// @param numa_node The NUMA node where the memory is reserved (start at 0). +/// @param timeout_msecs Maximum number of milli-seconds to try reserving, or 0 for no timeout. +/// @returns 0 if successfull, \a ENOMEM if running out of memory, or \a ETIMEDOUT if timed out. +/// +/// The reserved memory is used by mimalloc to satisfy allocations. +/// May quit before \a timeout_msecs are expired if it estimates it will take more than +/// 1.5 times \a timeout_msecs. The time limit is needed because on some operating systems +/// it can take a long time to reserve contiguous memory if the physical memory is +/// fragmented. +int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs); + /// Is the C runtime \a malloc API redirected? /// @returns \a true if all malloc API calls are redirected to mimalloc. @@ -569,7 +624,10 @@ void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, siz /// \defgroup typed Typed Macros /// -/// Typed allocation macros +/// Typed allocation macros. For example: +/// ``` +/// int* p = mi_malloc_tp(int) +/// ``` /// /// \{ @@ -702,13 +760,14 @@ typedef enum mi_option_e { mi_option_eager_region_commit, ///< Eagerly commit large (256MiB) memory regions (enabled by default, except on Windows) mi_option_large_os_pages, ///< Use large OS pages (2MiB in size) if possible mi_option_reserve_huge_os_pages, ///< The number of huge OS pages (1GiB in size) to reserve at the start of the program. - mi_option_segment_cache, ///< The number of segments per thread to keep cached. - mi_option_page_reset, ///< Reset page memory when it becomes free. - mi_option_cache_reset, ///< Reset segment memory when a segment is cached. + mi_option_segment_cache, ///< The number of segments per thread to keep cached. + mi_option_page_reset, ///< Reset page memory after \a mi_option_reset_delay milliseconds when it becomes free. + mi_option_segment_reset, ///< Experimental + mi_option_reset_delay, ///< Delay in milli-seconds before resetting a page (100ms by default) + mi_option_use_numa_nodes, ///< Pretend there are at most N NUMA nodes mi_option_reset_decommits, ///< Experimental mi_option_eager_commit_delay, ///< Experimental - mi_option_segment_reset, ///< Experimental - mi_option_os_tag, ///< OS tag to assign to mimalloc'd memory + mi_option_os_tag, ///< OS tag to assign to mimalloc'd memory _mi_option_last } mi_option_t; @@ -751,18 +810,51 @@ void mi_free_size(void* p, size_t size); void mi_free_size_aligned(void* p, size_t size, size_t alignment); void mi_free_aligned(void* p, size_t alignment); -/// raise `std::bad_alloc` exception on failure. +/// \} + +/// \defgroup cpp C++ wrappers +/// +/// `mi_` prefixed implementations of various allocation functions +/// that use C++ semantics on out-of-memory, generally calling +/// `std::get_new_handler` and raising a `std::bad_alloc` exception on failure. +/// +/// Note: use the `mimalloc-new-delete.h` header to override the \a new +/// and \a delete operators globally. The wrappers here are mostly +/// for convience for library writers that need to interface with +/// mimalloc from C++. +/// +/// \{ + +/// like mi_malloc(), but when out of memory, use `std::get_new_handler` and raise `std::bad_alloc` exception on failure. void* mi_new(std::size_t n) noexcept(false); -/// raise `std::bad_alloc` exception on failure. +/// like mi_mallocn(), but when out of memory, use `std::get_new_handler` and raise `std::bad_alloc` exception on failure. +void* mi_new_n(size_t count, size_t size) noexcept(false); + +/// like mi_malloc_aligned(), but when out of memory, use `std::get_new_handler` and raise `std::bad_alloc` exception on failure. void* mi_new_aligned(std::size_t n, std::align_val_t alignment) noexcept(false); -/// return `NULL` on failure. +/// like `mi_malloc`, but when out of memory, use `std::get_new_handler` but return \a NULL on failure. void* mi_new_nothrow(size_t n); -`` -/// return `NULL` on failure. + +/// like `mi_malloc_aligned`, but when out of memory, use `std::get_new_handler` but return \a NULL on failure. void* mi_new_aligned_nothrow(size_t n, size_t alignment); +/// like mi_realloc(), but when out of memory, use `std::get_new_handler` and raise `std::bad_alloc` exception on failure. +void* mi_new_realloc(void* p, size_t newsize); + +/// like mi_reallocn(), but when out of memory, use `std::get_new_handler` and raise `std::bad_alloc` exception on failure. +void* mi_new_reallocn(void* p, size_t newcount, size_t size); + +/// \a std::allocator implementation for mimalloc for use in STL containers. +/// For example: +/// ``` +/// std::vector > vec; +/// vec.push_back(1); +/// vec.pop_back(); +/// ``` +template struct mi_stl_allocator { } + /// \} /*! \page build Building @@ -774,7 +866,7 @@ git clone https://github.com/microsoft/mimalloc ## Windows -Open `ide/vs2017/mimalloc.sln` in Visual Studio 2017 and build. +Open `ide/vs2019/mimalloc.sln` in Visual Studio 2019 and build (or `ide/vs2017/mimalloc.sln`). The `mimalloc` project builds a static library (in `out/msvc-x64`), while the `mimalloc-override` project builds a DLL for overriding malloc in the entire program. @@ -826,6 +918,7 @@ Notes: /*! \page using Using the library +### Build The preferred usage is including ``, linking with the shared- or static library, and using the `mi_malloc` API exclusively for allocation. For example, @@ -849,6 +942,19 @@ target_link_libraries(myapp PUBLIC mimalloc-static) ``` to link with the static library. See `test\CMakeLists.txt` for an example. +### C++ +For best performance in C++ programs, it is also recommended to override the +global `new` and `delete` operators. For convience, mimalloc provides +[`mimalloc-new-delete.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) which does this for you -- just include it in a single(!) source file in your project. + +In C++, mimalloc also provides the `mi_stl_allocator` struct which implements the `std::allocator` +interface. For example: +``` +std::vector> vec; +vec.push_back(some_struct()); +``` + +### Statistics You can pass environment variables to print verbose messages (`MIMALLOC_VERBOSE=1`) and statistics (`MIMALLOC_SHOW_STATS=1`) (in the debug version): @@ -897,20 +1003,33 @@ See \ref overrides for more info. /*! \page environment Environment Options -You can set further options either programmatically -(using [`mi_option_set`](https://microsoft.github.io/mimalloc/group__options.html)), +You can set further options either programmatically (using [`mi_option_set`](https://microsoft.github.io/mimalloc/group__options.html)), or via environment variables. - `MIMALLOC_SHOW_STATS=1`: show statistics when the program terminates. - `MIMALLOC_VERBOSE=1`: show verbose messages. - `MIMALLOC_SHOW_ERRORS=1`: show error and warning messages. +- `MIMALLOC_PAGE_RESET=1`: reset (or purge) OS pages when not in use. This can reduce + memory fragmentation in long running (server) programs. If performance is impacted, + `MIMALLOC_RESET_DELAY=`_msecs_ can be set higher (100ms by default) to make the page + reset occur less frequently. - `MIMALLOC_LARGE_OS_PAGES=1`: use large OS pages when available; for some workloads this can significantly improve performance. Use `MIMALLOC_VERBOSE` to check if the large OS pages are enabled -- usually one needs - to explicitly allow large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). + to explicitly allow large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). However, sometimes + the OS is very slow to reserve contiguous physical memory for large OS pages so use with care on systems that + can have fragmented memory (for that reason, we generally recommend to use `MIMALLOC_RESERVE_HUGE_OS_PAGES` instead when possible). - `MIMALLOC_EAGER_REGION_COMMIT=1`: on Windows, commit large (256MiB) regions eagerly. On Windows, these regions show in the working set even though usually just a small part is committed to physical memory. This is why it turned off by default on Windows as it looks not good in the task manager. However, in reality it is always better to turn it on as it improves performance and has no other drawbacks. +- `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where N is the number of 1GiB huge OS pages. This reserves the huge pages at + startup and can give quite a performance improvement on long running workloads. Usually it is better to not use + `MIMALLOC_LARGE_OS_PAGES` in combination with this setting. Just like large OS pages, use with care as reserving + contiguous physical memory can take a long time when memory is fragmented. + Note that we usually need to explicitly enable huge OS pages (as on [Windows][windows-huge] and [Linux][linux-huge])). With huge OS pages, it may be beneficial to set the setting + `MIMALLOC_EAGER_COMMIT_DELAY=N` (with usually `N` as 1) to delay the initial `N` segments + of a thread to not allocate in the huge OS pages; this prevents threads that are short lived + and allocate just a little to take up space in the huge OS page area (which cannot be reset). [linux-huge]: https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/5/html/tuning_and_optimizing_red_hat_enterprise_linux_for_oracle_9i_and_10g_databases/sect-oracle_9i_and_10g_tuning_guide-large_memory_optimization_big_pages_and_huge_pages-configuring_huge_pages_in_red_hat_enterprise_linux_4_or_5 [windows-huge]: https://docs.microsoft.com/en-us/sql/database-engine/configure-windows/enable-the-lock-pages-in-memory-option-windows?view=sql-server-2017 @@ -960,25 +1079,28 @@ Note: unfortunately, at this time, dynamic overriding on macOS seems broken but ### Windows -On Windows you need to link your program explicitly with the mimalloc -DLL and use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch). +Overriding on Windows is robust but requires that you link your program explicitly with +the mimalloc DLL and use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch). Moreover, you need to ensure the `mimalloc-redirect.dll` (or `mimalloc-redirect32.dll`) is available -in the same folder as the mimalloc DLL at runtime (as it as referred to by the mimalloc DLL). -The redirection DLL's ensure all calls to the C runtime malloc API get redirected to mimalloc. +in the same folder as the main `mimalloc-override.dll` at runtime (as it is a dependency). +The redirection DLL ensures that all calls to the C runtime malloc API get redirected to +mimalloc (in `mimalloc-override.dll`). To ensure the mimalloc DLL is loaded at run-time it is easiest to insert some call to the mimalloc API in the `main` function, like `mi_version()` (or use the `/INCLUDE:mi_version` switch on the linker). See the `mimalloc-override-test` project -for an example on how to use this. +for an example on how to use this. For best performance on Windows with C++, it +is highly recommended to also override the `new`/`delete` operations (by including +[`mimalloc-new-delete.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) a single(!) source file in your project). The environment variable `MIMALLOC_DISABLE_REDIRECT=1` can be used to disable dynamic -overriding at run-time. Use `MIMALLOC_VERBOSE=1` to check if mimalloc successfully redirected. +overriding at run-time. Use `MIMALLOC_VERBOSE=1` to check if mimalloc was successfully redirected. -(Note: in principle, it should be possible to patch existing executables -that are linked with the dynamic C runtime (`ucrtbase.dll`) by just putting the mimalloc DLL into -the import table (and putting `mimalloc-redirect.dll` in the same folder) +(Note: in principle, it is possible to patch existing executables +that are linked with the dynamic C runtime (`ucrtbase.dll`) by just putting the `mimalloc-override.dll` into the import table (and putting `mimalloc-redirect.dll` in the same folder) Such patching can be done for example with [CFF Explorer](https://ntcore.com/?page_id=388)). + ## Static override On Unix systems, you can also statically link with _mimalloc_ to override the standard diff --git a/docs/annotated.html b/docs/annotated.html index dcc2e74d..5120b803 100644 --- a/docs/annotated.html +++ b/docs/annotated.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
@@ -105,6 +105,7 @@ $(document).ready(function(){initNavTree('annotated.html','');});
Here are the data structures with brief descriptions:
+
 Cmi_heap_area_tAn area of heap space contains blocks of a single size
 Cmi_stl_allocatorstd::allocator implementation for mimalloc for use in STL containers
diff --git a/docs/annotated_dup.js b/docs/annotated_dup.js index 6ed68bc3..67229123 100644 --- a/docs/annotated_dup.js +++ b/docs/annotated_dup.js @@ -1,4 +1,5 @@ var annotated_dup = [ - [ "mi_heap_area_t", "group__analysis.html#structmi__heap__area__t", "group__analysis_structmi__heap__area__t" ] + [ "mi_heap_area_t", "group__analysis.html#structmi__heap__area__t", "group__analysis_structmi__heap__area__t" ], + [ "mi_stl_allocator", "group__cpp.html#structmi__stl__allocator", null ] ]; \ No newline at end of file diff --git a/docs/bench.html b/docs/bench.html index 11b18550..6b289c04 100644 --- a/docs/bench.html +++ b/docs/bench.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
diff --git a/docs/build.html b/docs/build.html index 3e870697..755aad88 100644 --- a/docs/build.html +++ b/docs/build.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
@@ -103,7 +103,7 @@ $(document).ready(function(){initNavTree('build.html','');});

Checkout the sources from Github:

git clone https://github.com/microsoft/mimalloc

Windows

-

Open ide/vs2017/mimalloc.sln in Visual Studio 2017 and build. The mimalloc project builds a static library (in out/msvc-x64), while the mimalloc-override project builds a DLL for overriding malloc in the entire program.

+

Open ide/vs2019/mimalloc.sln in Visual Studio 2019 and build (or ide/vs2017/mimalloc.sln). The mimalloc project builds a static library (in out/msvc-x64), while the mimalloc-override project builds a DLL for overriding malloc in the entire program.

macOS, Linux, BSD, etc.

We use cmake1 as the build system:

> mkdir -p out/release
> cd out/release
> cmake ../..
> make

This builds the library as a shared (dynamic) library (.so or .dylib), a static library (.a), and as a single object file (.o).

diff --git a/docs/classes.html b/docs/classes.html index 760b28de..de960fb6 100644 --- a/docs/classes.html +++ b/docs/classes.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
@@ -105,10 +105,10 @@ $(document).ready(function(){initNavTree('classes.html','');}); - - - + + + +
  m  
-
mi_heap_area_t   
mi_stl_allocator   
mi_heap_area_t   
diff --git a/docs/environment.html b/docs/environment.html new file mode 100644 index 00000000..1063654e --- /dev/null +++ b/docs/environment.html @@ -0,0 +1,127 @@ + + + + + + + +mi-malloc: Environment Options + + + + + + + + + + + + + + + + +
+
+ + + + + + + + +
+
mi-malloc +  1.4 +
+
+ + + + + + +
+
+
+ + + +
+
+ +
+
+
+ +
+ +
+
+ + +
+ +
+ +
+
+
Environment Options
+
+
+

You can set further options either programmatically (using mi_option_set), or via environment variables.

+
    +
  • MIMALLOC_SHOW_STATS=1: show statistics when the program terminates.
  • +
  • MIMALLOC_VERBOSE=1: show verbose messages.
  • +
  • MIMALLOC_SHOW_ERRORS=1: show error and warning messages.
  • +
  • MIMALLOC_PAGE_RESET=1: reset (or purge) OS pages when not in use. This can reduce memory fragmentation in long running (server) programs. If performance is impacted, MIMALLOC_RESET_DELAY=_msecs_ can be set higher (100ms by default) to make the page reset occur less frequently.
  • +
  • MIMALLOC_LARGE_OS_PAGES=1: use large OS pages when available; for some workloads this can significantly improve performance. Use MIMALLOC_VERBOSE to check if the large OS pages are enabled – usually one needs to explicitly allow large OS pages (as on Windows and Linux). However, sometimes the OS is very slow to reserve contiguous physical memory for large OS pages so use with care on systems that can have fragmented memory (for that reason, we generally recommend to use MIMALLOC_RESERVE_HUGE_OS_PAGES instead when possible).
  • +
  • MIMALLOC_EAGER_REGION_COMMIT=1: on Windows, commit large (256MiB) regions eagerly. On Windows, these regions show in the working set even though usually just a small part is committed to physical memory. This is why it turned off by default on Windows as it looks not good in the task manager. However, in reality it is always better to turn it on as it improves performance and has no other drawbacks.
  • +
  • MIMALLOC_RESERVE_HUGE_OS_PAGES=N: where N is the number of 1GiB huge OS pages. This reserves the huge pages at startup and can give quite a performance improvement on long running workloads. Usually it is better to not use MIMALLOC_LARGE_OS_PAGES in combination with this setting. Just like large OS pages, use with care as reserving contiguous physical memory can take a long time when memory is fragmented. Note that we usually need to explicitly enable huge OS pages (as on Windows and Linux)). With huge OS pages, it may be beneficial to set the setting MIMALLOC_EAGER_COMMIT_DELAY=N (with usually N as 1) to delay the initial N segments of a thread to not allocate in the huge OS pages; this prevents threads that are short lived and allocate just a little to take up space in the huge OS page area (which cannot be reset).
  • +
+
+
+
+ + + + diff --git a/docs/functions.html b/docs/functions.html index d2615a17..43e116ed 100644 --- a/docs/functions.html +++ b/docs/functions.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
diff --git a/docs/functions_vars.html b/docs/functions_vars.html index b824832f..060a18d2 100644 --- a/docs/functions_vars.html +++ b/docs/functions_vars.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
diff --git a/docs/group__aligned.html b/docs/group__aligned.html index 4980b45a..88c10eb4 100644 --- a/docs/group__aligned.html +++ b/docs/group__aligned.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
diff --git a/docs/group__analysis.html b/docs/group__analysis.html index 3301fdef..b8d644aa 100644 --- a/docs/group__analysis.html +++ b/docs/group__analysis.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
diff --git a/docs/group__cpp.html b/docs/group__cpp.html new file mode 100644 index 00000000..caf758a8 --- /dev/null +++ b/docs/group__cpp.html @@ -0,0 +1,396 @@ + + + + + + + +mi-malloc: C++ wrappers + + + + + + + + + + + + + + + + +
+
+ + + + + + + + +
+
mi-malloc +  1.4 +
+
+ + + + + + +
+
+
+ + + +
+
+ +
+
+
+ +
+ +
+
+ + +
+ +
+ +
+ +
+
C++ wrappers
+
+
+ +

mi_ prefixed implementations of various allocation functions that use C++ semantics on out-of-memory, generally calling std::get_new_handler and raising a std::bad_alloc exception on failure. +More...

+ + + + + +

+Data Structures

struct  mi_stl_allocator< T >
 std::allocator implementation for mimalloc for use in STL containers. More...
 
+ + + + + + + + + + + + + + + + + + + + + + +

+Functions

void * mi_new (std::size_t n) noexcept(false)
 like mi_malloc(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exception on failure. More...
 
void * mi_new_n (size_t count, size_t size) noexcept(false)
 like mi_mallocn(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exception on failure. More...
 
void * mi_new_aligned (std::size_t n, std::align_val_t alignment) noexcept(false)
 like mi_malloc_aligned(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exception on failure. More...
 
void * mi_new_nothrow (size_t n)
 like mi_malloc, but when out of memory, use std::get_new_handler but return NULL on failure. More...
 
void * mi_new_aligned_nothrow (size_t n, size_t alignment)
 like mi_malloc_aligned, but when out of memory, use std::get_new_handler but return NULL on failure. More...
 
void * mi_new_realloc (void *p, size_t newsize)
 like mi_realloc(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exception on failure. More...
 
void * mi_new_reallocn (void *p, size_t newcount, size_t size)
 like mi_reallocn(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exception on failure. More...
 
+

Detailed Description

+

mi_ prefixed implementations of various allocation functions that use C++ semantics on out-of-memory, generally calling std::get_new_handler and raising a std::bad_alloc exception on failure.

+

Note: use the mimalloc-new-delete.h header to override the new and delete operators globally. The wrappers here are mostly for convience for library writers that need to interface with mimalloc from C++.

+

Data Structure Documentation

+ +

◆ mi_stl_allocator

+ +
+
+ + + + +
struct mi_stl_allocator
+
+

template<class T>
+struct mi_stl_allocator< T >

+ +

std::allocator implementation for mimalloc for use in STL containers.

+

For example:

std::vector<int, mi_stl_allocator<int> > vec;
vec.push_back(1);
vec.pop_back();
+
+
+

Function Documentation

+ +

◆ mi_new()

+ +
+
+ + + + + +
+ + + + + + + + +
void* mi_new (std::size_t n)
+
+noexcept
+
+ +

like mi_malloc(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exception on failure.

+ +
+
+ +

◆ mi_new_aligned()

+ +
+
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
void* mi_new_aligned (std::size_t n,
std::align_val_t alignment 
)
+
+noexcept
+
+ +

like mi_malloc_aligned(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exception on failure.

+ +
+
+ +

◆ mi_new_aligned_nothrow()

+ +
+
+ + + + + + + + + + + + + + + + + + +
void* mi_new_aligned_nothrow (size_t n,
size_t alignment 
)
+
+ +

like mi_malloc_aligned, but when out of memory, use std::get_new_handler but return NULL on failure.

+ +
+
+ +

◆ mi_new_n()

+ +
+
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
void* mi_new_n (size_t count,
size_t size 
)
+
+noexcept
+
+ +

like mi_mallocn(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exception on failure.

+ +
+
+ +

◆ mi_new_nothrow()

+ +
+
+ + + + + + + + +
void* mi_new_nothrow (size_t n)
+
+ +

like mi_malloc, but when out of memory, use std::get_new_handler but return NULL on failure.

+ +
+
+ +

◆ mi_new_realloc()

+ +
+
+ + + + + + + + + + + + + + + + + + +
void* mi_new_realloc (void * p,
size_t newsize 
)
+
+ +

like mi_realloc(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exception on failure.

+ +
+
+ +

◆ mi_new_reallocn()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + +
void* mi_new_reallocn (void * p,
size_t newcount,
size_t size 
)
+
+ +

like mi_reallocn(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exception on failure.

+ +
+
+
+
+ + + + diff --git a/docs/group__cpp.js b/docs/group__cpp.js new file mode 100644 index 00000000..20706646 --- /dev/null +++ b/docs/group__cpp.js @@ -0,0 +1,11 @@ +var group__cpp = +[ + [ "mi_stl_allocator", "group__cpp.html#structmi__stl__allocator", null ], + [ "mi_new", "group__cpp.html#gaad048a9fce3d02c5909cd05c6ec24545", null ], + [ "mi_new_aligned", "group__cpp.html#gaef2c2bdb4f70857902d3c8903ac095f3", null ], + [ "mi_new_aligned_nothrow", "group__cpp.html#gab5e29558926d934c3f1cae8c815f942c", null ], + [ "mi_new_n", "group__cpp.html#gae7bc4f56cd57ed3359060ff4f38bda81", null ], + [ "mi_new_nothrow", "group__cpp.html#gaeaded64eda71ed6b1d569d3e723abc4a", null ], + [ "mi_new_realloc", "group__cpp.html#gaab78a32f55149e9fbf432d5288e38e1e", null ], + [ "mi_new_reallocn", "group__cpp.html#ga756f4b2bc6a7ecd0a90baea8e90c7907", null ] +]; \ No newline at end of file diff --git a/docs/group__extended.html b/docs/group__extended.html index 4d07f38d..9e2a2efc 100644 --- a/docs/group__extended.html +++ b/docs/group__extended.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
@@ -118,12 +118,15 @@ Macros - - - - - - + + + + + + + + +

Typedefs

typedef void() mi_deferred_free_fun(bool force, unsigned long long heartbeat)
 Type of deferred free functions. More...
 
typedef void() mi_output_fun(const char *msg)
 Type of output functions. More...
 
typedef void() mi_deferred_free_fun(bool force, unsigned long long heartbeat, void *arg)
 Type of deferred free functions. More...
 
typedef void() mi_output_fun(const char *msg, void *arg)
 Type of output functions. More...
 
typedef void() mi_error_fun(int err, void *arg)
 Type of error callback functions. More...
 
@@ -142,9 +145,12 @@ Functions - - - + + + + + + @@ -157,21 +163,27 @@ Functions - - - - - - - - - + + + + + + + + + + + + - - - + + + + + + @@ -196,14 +208,14 @@ Functions

Typedef Documentation

- -

◆ mi_deferred_free_fun

+ +

◆ mi_deferred_free_fun

Functions

void mi_collect (bool force)
 Eagerly free memory. More...
 
void mi_stats_print (mi_output_fun *out)
 Print the main statistics. More...
 
void mi_stats_print (void *out)
 Print the main statistics. More...
 
void mi_stats_print (mi_output_fun *out, void *arg)
 Print the main statistics. More...
 
void mi_stats_reset (void)
 Reset statistics. More...
 
void mi_thread_done (void)
 Uninitialize mimalloc on a thread. More...
 
void mi_thread_stats_print (mi_output_fun *out)
 Print out heap statistics for this thread. More...
 
void mi_register_deferred_free (mi_deferred_free_fun *deferred_free)
 Register a deferred free function. More...
 
void mi_register_output (mi_output_fun *out) mi_attr_noexcept
 Register an output function. More...
 
void mi_thread_stats_print_out (mi_output_fun *out, void *arg)
 Print out heap statistics for this thread. More...
 
void mi_register_deferred_free (mi_deferred_free_fun *deferred_free, void *arg)
 Register a deferred free function. More...
 
void mi_register_output (mi_output_fun *out, void *arg)
 Register an output function. More...
 
void mi_register_error (mi_error_fun *errfun, void *arg)
 Register an error callback function. More...
 
bool mi_is_in_heap_region (const void *p)
 Is a pointer part of our heap? More...
 
int mi_reserve_huge_os_pages (size_t pages, double max_secs, size_t *pages_reserved)
 Reserve pages of huge OS pages (1GiB) but stops after at most max_secs seconds. More...
 
int mi_reserve_huge_os_pages_interleave (size_t pages, size_t numa_nodes, size_t timeout_msecs)
 Reserve pages of huge OS pages (1GiB) evenly divided over numa_nodes nodes, but stops after at most timeout_msecs seconds. More...
 
int mi_reserve_huge_os_pages_at (size_t pages, int numa_node, size_t timeout_msecs)
 Reserve pages of huge OS pages (1GiB) at a specific numa_node, but stops after at most timeout_msecs seconds. More...
 
bool mi_is_redirected ()
 Is the C runtime malloc API redirected? More...
 
- +
typedef void() mi_deferred_free_fun(bool force, unsigned long long heartbeat)typedef void() mi_deferred_free_fun(bool force, unsigned long long heartbeat, void *arg)
@@ -212,22 +224,47 @@ Functions
Parameters
- + +
forceIf true all outstanding items should be freed.
heartbeatA monotonically increasing count.
heartbeatA monotonically increasing count.
argArgument that was passed at registration to hold extra state.
-
See also
mi_register_deferred_free
+
See also
mi_register_deferred_free
- -

◆ mi_output_fun

+ +

◆ mi_error_fun

- + + +
typedef void() mi_output_fun(const char *msg)typedef void() mi_error_fun(int err, void *arg)
+
+ +

Type of error callback functions.

+
Parameters
+ + + +
errError code (see mi_register_error() for a complete list).
argArgument that was passed at registration to hold extra state.
+
+
+
See also
mi_register_error()
+ +
+
+ +

◆ mi_output_fun

+ +
+
+ + +
typedef void() mi_output_fun(const char *msg, void *arg)
@@ -235,11 +272,12 @@ Functions

Type of output functions.

Parameters
- + +
msgMessage to output.
msgMessage to output.
argArgument that was passed at registration to hold extra state.
-
See also
mi_register_output()
+
See also
mi_register_output()
@@ -375,8 +413,8 @@ Functions
- -

◆ mi_register_deferred_free()

+ +

◆ mi_register_deferred_free()

@@ -384,74 +422,14 @@ Functions void mi_register_deferred_free ( - mi_deferred_free_fun *  - deferred_free) - - - -
- -

Register a deferred free function.

-
Parameters
- - -
deferred_freeAddress of a deferred free-ing function or NULL to unregister.
-
-
-

Some runtime systems use deferred free-ing, for example when using reference counting to limit the worst case free time. Such systems can register (re-entrant) deferred free function to free more memory on demand. When the force parameter is true all possible memory should be freed. The per-thread heartbeat parameter is monotonically increasing and guaranteed to be deterministic if the program allocates deterministically. The deferred_free function is guaranteed to be called deterministically after some number of allocations (regardless of freeing or available free memory). At most one deferred_free function can be active.

- -
-
- -

◆ mi_register_output()

- -
-
- - - - - - - - -
void mi_register_output (mi_output_funout)
-
- -

Register an output function.

-
Parameters
- - -
outThe output function, use NULL to output to stdout.
-
-
-

The out function is called to output any information from mimalloc, like verbose or warning messages.

- -
-
- -

◆ mi_reserve_huge_os_pages()

- -
-
- - - - - - + + - - - - - - - - + + @@ -461,17 +439,189 @@ Functions
int mi_reserve_huge_os_pages (size_t pages, mi_deferred_free_fundeferred_free,
double max_secs,
size_t * pages_reserved void * arg 
-

Reserve pages of huge OS pages (1GiB) but stops after at most max_secs seconds.

+

Register a deferred free function.

+
Parameters
+ + + +
deferred_freeAddress of a deferred free-ing function or NULL to unregister.
argArgument that will be passed on to the deferred free function.
+
+
+

Some runtime systems use deferred free-ing, for example when using reference counting to limit the worst case free time. Such systems can register (re-entrant) deferred free function to free more memory on demand. When the force parameter is true all possible memory should be freed. The per-thread heartbeat parameter is monotonically increasing and guaranteed to be deterministic if the program allocates deterministically. The deferred_free function is guaranteed to be called deterministically after some number of allocations (regardless of freeing or available free memory). At most one deferred_free function can be active.

+ +
+
+ +

◆ mi_register_error()

+ +
+
+ + + + + + + + + + + + + + + + + + +
void mi_register_error (mi_error_funerrfun,
void * arg 
)
+
+ +

Register an error callback function.

+
Parameters
+ + + +
errfunThe error function that is called on an error (use NULL for default)
argExtra argument that will be passed on to the error function.
+
+
+

The errfun function is called on an error in mimalloc after emitting an error message (through the output function). It as always legal to just return from the errfun function in which case allocation functions generally return NULL or ignore the condition. The default function only calls abort() when compiled in secure mode with an EFAULT error. The possible error codes are:

    +
  • EAGAIN: Double free was detected (only in debug and secure mode).
  • +
  • EFAULT: Corrupted free list or meta-data was detected (only in debug and secure mode).
  • +
  • ENOMEM: Not enough memory available to satisfy the request.
  • +
  • EOVERFLOW: Too large a request, for example in mi_calloc(), the count and size parameters are too large.
  • +
  • EINVAL: Trying to free or re-allocate an invalid pointer.
  • +
+ +
+
+ +

◆ mi_register_output()

+ +
+
+ + + + + + + + + + + + + + + + + + +
void mi_register_output (mi_output_funout,
void * arg 
)
+
+ +

Register an output function.

+
Parameters
+ + + +
outThe output function, use NULL to output to stderr.
argArgument that will be passed on to the output function.
+
+
+

The out function is called to output any information from mimalloc, like verbose or warning messages.

+ +
+
+ +

◆ mi_reserve_huge_os_pages_at()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + +
int mi_reserve_huge_os_pages_at (size_t pages,
int numa_node,
size_t timeout_msecs 
)
+
+ +

Reserve pages of huge OS pages (1GiB) at a specific numa_node, but stops after at most timeout_msecs seconds.

Parameters
- - + +
pagesThe number of 1GiB pages to reserve.
max_secsMaximum number of seconds to try reserving.
pages_reservedIf not NULL, it is set to the actual number of pages that were reserved.
numa_nodeThe NUMA node where the memory is reserved (start at 0).
timeout_msecsMaximum number of milli-seconds to try reserving, or 0 for no timeout.
Returns
0 if successfull, ENOMEM if running out of memory, or ETIMEDOUT if timed out.
-

The reserved memory is used by mimalloc to satisfy allocations. May quit before max_secs are expired if it estimates it will take more than 1.5 times max_secs. The time limit is needed because on some operating systems it can take a long time to reserve contiguous memory if the physical memory is fragmented.

+

The reserved memory is used by mimalloc to satisfy allocations. May quit before timeout_msecs are expired if it estimates it will take more than 1.5 times timeout_msecs. The time limit is needed because on some operating systems it can take a long time to reserve contiguous memory if the physical memory is fragmented.

+ +
+
+ +

◆ mi_reserve_huge_os_pages_interleave()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + +
int mi_reserve_huge_os_pages_interleave (size_t pages,
size_t numa_nodes,
size_t timeout_msecs 
)
+
+ +

Reserve pages of huge OS pages (1GiB) evenly divided over numa_nodes nodes, but stops after at most timeout_msecs seconds.

+
Parameters
+ + + + +
pagesThe number of 1GiB pages to reserve.
numa_nodesThe number of nodes do evenly divide the pages over, or 0 for using the actual number of NUMA nodes.
timeout_msecsMaximum number of milli-seconds to try reserving, or 0 for no timeout.
+
+
+
Returns
0 if successfull, ENOMEM if running out of memory, or ETIMEDOUT if timed out.
+

The reserved memory is used by mimalloc to satisfy allocations. May quit before timeout_msecs are expired if it estimates it will take more than 1.5 times timeout_msecs. The time limit is needed because on some operating systems it can take a long time to reserve contiguous memory if the physical memory is fragmented.

@@ -495,8 +645,8 @@ Functions
- -

◆ mi_stats_print()

+ +

◆ mi_stats_print() [1/2]

@@ -504,7 +654,7 @@ Functions void mi_stats_print ( - mi_output_fun *  + void *  out) @@ -514,7 +664,45 @@ Functions

Print the main statistics.

Parameters
- + +
outOutput function. Use NULL for outputting to stderr.
outIgnored, outputs to the registered output function or stderr by default.
+
+
+

Most detailed when using a debug build.

+ +
+
+ +

◆ mi_stats_print() [2/2]

+ +
+
+ + + + + + + + + + + + + + + + + + +
void mi_stats_print (mi_output_funout,
void * arg 
)
+
+ +

Print the main statistics.

+
Parameters
+ + +
outAn output function or NULL for the default.
argOptional argument passed to out (if not NULL)
@@ -584,18 +772,28 @@ Functions
- -

◆ mi_thread_stats_print()

+ +

◆ mi_thread_stats_print_out()

- + - - + + + + + + + + + + + +
void mi_thread_stats_print void mi_thread_stats_print_out (mi_output_funout)mi_output_funout,
void * arg 
)
@@ -603,7 +801,8 @@ Functions

Print out heap statistics for this thread.

Parameters
- + +
outOutput function. Use NULL for outputting to stderr.
outAn output function or NULL for the default.
argOptional argument passed to out (if not NULL)
diff --git a/docs/group__extended.js b/docs/group__extended.js index 00c73614..ff8891b2 100644 --- a/docs/group__extended.js +++ b/docs/group__extended.js @@ -1,22 +1,26 @@ var group__extended = [ [ "MI_SMALL_SIZE_MAX", "group__extended.html#ga1ea64283508718d9d645c38efc2f4305", null ], - [ "mi_deferred_free_fun", "group__extended.html#ga22213691c3ce5ab4d91b24aff1023529", null ], - [ "mi_output_fun", "group__extended.html#ga2bed6d40b74591a67f81daea4b4a246f", null ], + [ "mi_deferred_free_fun", "group__extended.html#ga299dae78d25ce112e384a98b7309c5be", null ], + [ "mi_error_fun", "group__extended.html#ga251d369cda3f1c2a955c555486ed90e5", null ], + [ "mi_output_fun", "group__extended.html#gad823d23444a4b77a40f66bf075a98a0c", null ], [ "mi_collect", "group__extended.html#ga421430e2226d7d468529cec457396756", null ], [ "mi_good_size", "group__extended.html#gac057927cd06c854b45fe7847e921bd47", null ], [ "mi_is_in_heap_region", "group__extended.html#ga5f071b10d4df1c3658e04e7fd67a94e6", null ], [ "mi_is_redirected", "group__extended.html#gaad25050b19f30cd79397b227e0157a3f", null ], [ "mi_malloc_small", "group__extended.html#ga7136c2e55cb22c98ecf95d08d6debb99", null ], - [ "mi_register_deferred_free", "group__extended.html#ga24dc9cc6fca8daa2aa30aa8025467ce2", null ], - [ "mi_register_output", "group__extended.html#ga84a0c8b401e42eb5b1bce156852f44c5", null ], - [ "mi_reserve_huge_os_pages", "group__extended.html#ga2664f36a2dd557741c429cb799f04641", null ], + [ "mi_register_deferred_free", "group__extended.html#ga3460a6ca91af97be4058f523d3cb8ece", null ], + [ "mi_register_error", "group__extended.html#gaa1d55e0e894be240827e5d87ec3a1f45", null ], + [ "mi_register_output", "group__extended.html#gae5b17ff027cd2150b43a33040250cf3f", null ], + [ "mi_reserve_huge_os_pages_at", "group__extended.html#ga7795a13d20087447281858d2c771cca1", null ], + [ "mi_reserve_huge_os_pages_interleave", "group__extended.html#ga3132f521fb756fc0e8ec0b74fb58df50", null ], [ "mi_stats_merge", "group__extended.html#ga854b1de8cb067c7316286c28b2fcd3d1", null ], - [ "mi_stats_print", "group__extended.html#ga8ca07ccff283956d71f48272f4fd5c01", null ], + [ "mi_stats_print", "group__extended.html#ga2d126e5c62d3badc35445e5d84166df2", null ], + [ "mi_stats_print", "group__extended.html#ga256cc6f13a142deabbadd954a217e228", null ], [ "mi_stats_reset", "group__extended.html#ga3bb8468b8cfcc6e2a61d98aee85c5f99", null ], [ "mi_thread_done", "group__extended.html#ga0ae4581e85453456a0d658b2b98bf7bf", null ], [ "mi_thread_init", "group__extended.html#gaf8e73efc2cbca9ebfdfb166983a04c17", null ], - [ "mi_thread_stats_print", "group__extended.html#ga489670a15d1a257ab4639e645ee4612a", null ], + [ "mi_thread_stats_print_out", "group__extended.html#gab1dac8476c46cb9eecab767eb40c1525", null ], [ "mi_usable_size", "group__extended.html#ga089c859d9eddc5f9b4bd946cd53cebee", null ], [ "mi_zalloc_small", "group__extended.html#ga220f29f40a44404b0061c15bc1c31152", null ] ]; \ No newline at end of file diff --git a/docs/group__heap.html b/docs/group__heap.html index 753aaba3..0973279a 100644 --- a/docs/group__heap.html +++ b/docs/group__heap.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
diff --git a/docs/group__malloc.html b/docs/group__malloc.html index 6bd71d06..bee7b4eb 100644 --- a/docs/group__malloc.html +++ b/docs/group__malloc.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
diff --git a/docs/group__options.html b/docs/group__options.html index a34a9307..71c7ba24 100644 --- a/docs/group__options.html +++ b/docs/group__options.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
@@ -123,11 +123,12 @@ Enumerations mi_option_segment_cache,
  mi_option_page_reset, -mi_option_cache_reset, -mi_option_reset_decommits, -mi_option_eager_commit_delay, +mi_option_segment_reset, +mi_option_reset_delay, +mi_option_use_numa_nodes,
-  mi_option_segment_reset, +  mi_option_reset_decommits, +mi_option_eager_commit_delay, mi_option_os_tag, _mi_option_last
@@ -183,16 +184,18 @@ Functions mi_option_segment_cache 

The number of segments per thread to keep cached.

-mi_option_page_reset 

Reset page memory when it becomes free.

+mi_option_page_reset 

Reset page memory after mi_option_reset_delay milliseconds when it becomes free.

-mi_option_cache_reset 

Reset segment memory when a segment is cached.

+mi_option_segment_reset 

Experimental.

+ +mi_option_reset_delay 

Delay in milli-seconds before resetting a page (100ms by default)

+ +mi_option_use_numa_nodes 

Pretend there are at most N NUMA nodes.

mi_option_reset_decommits 

Experimental.

mi_option_eager_commit_delay 

Experimental.

-mi_option_segment_reset 

Experimental.

- mi_option_os_tag 

OS tag to assign to mimalloc'd memory.

_mi_option_last  diff --git a/docs/group__options.js b/docs/group__options.js index 4bf52d54..1d84ea8b 100644 --- a/docs/group__options.js +++ b/docs/group__options.js @@ -10,10 +10,11 @@ var group__options = [ "mi_option_reserve_huge_os_pages", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884caca7ed041be3b0b9d0b82432c7bf41af2", null ], [ "mi_option_segment_cache", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca2ecbe7ef32f5c84de3739aa4f0b805a1", null ], [ "mi_option_page_reset", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cada854dd272c66342f18a93ee254a2968", null ], - [ "mi_option_cache_reset", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cac2157a0cb79cd996c1db7d9f6a090c07", null ], + [ "mi_option_segment_reset", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cafb121d30d87591850d5410ccc3a95c6d", null ], + [ "mi_option_reset_delay", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca154fe170131d5212cff57e22b99523c5", null ], + [ "mi_option_use_numa_nodes", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca0ac33a18f6b659fcfaf44efb0bab1b74", null ], [ "mi_option_reset_decommits", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cac81ee965b130fa81238913a3c239d536", null ], [ "mi_option_eager_commit_delay", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca17a190c25be381142d87e0468c4c068c", null ], - [ "mi_option_segment_reset", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cafb121d30d87591850d5410ccc3a95c6d", null ], [ "mi_option_os_tag", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca4b74ae2a69e445de6c2361b73c1d14bf", null ], [ "_mi_option_last", "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca5b4357b74be0d87568036c32eb1a2e4a", null ] ] ], diff --git a/docs/group__posix.html b/docs/group__posix.html index b9cf0b52..1aea8dc8 100644 --- a/docs/group__posix.html +++ b/docs/group__posix.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
@@ -137,18 +137,6 @@ Functions   void mi_free_aligned (void *p, size_t alignment)   -void * mi_new (std::size_t n) noexcept(false) - raise std::bad_alloc exception on failure. More...
-  -void * mi_new_aligned (std::size_t n, std::align_val_t alignment) noexcept(false) - raise std::bad_alloc exception on failure. More...
-  -void * mi_new_nothrow (size_t n) - return NULL on failure. More...
-  -void * mi_new_aligned_nothrow (size_t n, size_t alignment) - return NULL on failure. More...

Detailed Description

mi_ prefixed implementations of various Posix, Unix, and C++ allocation functions.

@@ -388,122 +376,6 @@ Functions
-
-
- -

◆ mi_new()

- -
-
- - - - - -
- - - - - - - - -
void* mi_new (std::size_t n)
-
-noexcept
-
- -

raise std::bad_alloc exception on failure.

- -
-
- -

◆ mi_new_aligned()

- -
-
- - - - - -
- - - - - - - - - - - - - - - - - - -
void* mi_new_aligned (std::size_t n,
std::align_val_t alignment 
)
-
-noexcept
-
- -

raise std::bad_alloc exception on failure.

- -
-
- -

◆ mi_new_aligned_nothrow()

- -
-
- - - - - - - - - - - - - - - - - - -
void* mi_new_aligned_nothrow (size_t n,
size_t alignment 
)
-
- -

return NULL on failure.

- -
-
- -

◆ mi_new_nothrow()

- -
-
- - - - - - - - -
void* mi_new_nothrow (size_t n)
-
- -

return NULL on failure.

-
diff --git a/docs/group__posix.js b/docs/group__posix.js index 5584092b..e43453d9 100644 --- a/docs/group__posix.js +++ b/docs/group__posix.js @@ -9,10 +9,6 @@ var group__posix = [ "mi_malloc_size", "group__posix.html#ga4531c9e775bb3ae12db57c1ba8a5d7de", null ], [ "mi_malloc_usable_size", "group__posix.html#ga06d07cf357bbac5c73ba5d0c0c421e17", null ], [ "mi_memalign", "group__posix.html#gaab7fa71ea93b96873f5d9883db57d40e", null ], - [ "mi_new", "group__posix.html#gaad048a9fce3d02c5909cd05c6ec24545", null ], - [ "mi_new_aligned", "group__posix.html#gaef2c2bdb4f70857902d3c8903ac095f3", null ], - [ "mi_new_aligned_nothrow", "group__posix.html#gab5e29558926d934c3f1cae8c815f942c", null ], - [ "mi_new_nothrow", "group__posix.html#gaeaded64eda71ed6b1d569d3e723abc4a", null ], [ "mi_posix_memalign", "group__posix.html#gacff84f226ba9feb2031b8992e5579447", null ], [ "mi_pvalloc", "group__posix.html#gaeb325c39b887d3b90d85d1eb1712fb1e", null ], [ "mi_reallocarray", "group__posix.html#ga48fad8648a2f1dab9c87ea9448a52088", null ], diff --git a/docs/group__typed.html b/docs/group__typed.html index 8ea0f095..cf5ac5d1 100644 --- a/docs/group__typed.html +++ b/docs/group__typed.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
diff --git a/docs/group__zeroinit.html b/docs/group__zeroinit.html new file mode 100644 index 00000000..28983138 --- /dev/null +++ b/docs/group__zeroinit.html @@ -0,0 +1,597 @@ + + + + + + + +mi-malloc: Zero initialized re-allocation + + + + + + + + + + + + + + + + +
+
+ + + + + + + + +
+
mi-malloc +  1.4 +
+
+ + + + + + +
+
+
+ + + +
+
+ +
+
+
+ +
+ +
+
+ + +
+ +
+ +
+ +
+
Zero initialized re-allocation
+
+
+ +

The zero-initialized re-allocations are only valid on memory that was originally allocated with zero initialization too. +More...

+ + + + + + + + + + + + + + + + + + + + + + + + +

+Functions

void * mi_rezalloc (void *p, size_t newsize)
 
void * mi_rezalloc_aligned (void *p, size_t newsize, size_t alignment)
 
void * mi_rezalloc_aligned_at (void *p, size_t newsize, size_t alignment, size_t offset)
 
void * mi_recalloc_aligned (void *p, size_t newcount, size_t size, size_t alignment)
 
void * mi_recalloc_aligned_at (void *p, size_t newcount, size_t size, size_t alignment, size_t offset)
 
void * mi_heap_rezalloc (mi_heap_t *heap, void *p, size_t newsize)
 
void * mi_heap_recalloc (mi_heap_t *heap, void *p, size_t newcount, size_t size)
 
void * mi_heap_rezalloc_aligned (mi_heap_t *heap, void *p, size_t newsize, size_t alignment)
 
void * mi_heap_rezalloc_aligned_at (mi_heap_t *heap, void *p, size_t newsize, size_t alignment, size_t offset)
 
void * mi_heap_recalloc_aligned (mi_heap_t *heap, void *p, size_t newcount, size_t size, size_t alignment)
 
void * mi_heap_recalloc_aligned_at (mi_heap_t *heap, void *p, size_t newcount, size_t size, size_t alignment, size_t offset)
 
+

Detailed Description

+

The zero-initialized re-allocations are only valid on memory that was originally allocated with zero initialization too.

+

e.g. mi_calloc, mi_zalloc, mi_zalloc_aligned etc. see https://github.com/microsoft/mimalloc/issues/63#issuecomment-508272992

+

Function Documentation

+ +

◆ mi_heap_recalloc()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
void* mi_heap_recalloc (mi_heap_theap,
void * p,
size_t newcount,
size_t size 
)
+
+ +
+
+ +

◆ mi_heap_recalloc_aligned()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
void* mi_heap_recalloc_aligned (mi_heap_theap,
void * p,
size_t newcount,
size_t size,
size_t alignment 
)
+
+ +
+
+ +

◆ mi_heap_recalloc_aligned_at()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
void* mi_heap_recalloc_aligned_at (mi_heap_theap,
void * p,
size_t newcount,
size_t size,
size_t alignment,
size_t offset 
)
+
+ +
+
+ +

◆ mi_heap_rezalloc()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + +
void* mi_heap_rezalloc (mi_heap_theap,
void * p,
size_t newsize 
)
+
+ +
+
+ +

◆ mi_heap_rezalloc_aligned()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
void* mi_heap_rezalloc_aligned (mi_heap_theap,
void * p,
size_t newsize,
size_t alignment 
)
+
+ +
+
+ +

◆ mi_heap_rezalloc_aligned_at()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
void* mi_heap_rezalloc_aligned_at (mi_heap_theap,
void * p,
size_t newsize,
size_t alignment,
size_t offset 
)
+
+ +
+
+ +

◆ mi_recalloc_aligned()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
void* mi_recalloc_aligned (void * p,
size_t newcount,
size_t size,
size_t alignment 
)
+
+ +
+
+ +

◆ mi_recalloc_aligned_at()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
void* mi_recalloc_aligned_at (void * p,
size_t newcount,
size_t size,
size_t alignment,
size_t offset 
)
+
+ +
+
+ +

◆ mi_rezalloc()

+ +
+
+ + + + + + + + + + + + + + + + + + +
void* mi_rezalloc (void * p,
size_t newsize 
)
+
+ +
+
+ +

◆ mi_rezalloc_aligned()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + +
void* mi_rezalloc_aligned (void * p,
size_t newsize,
size_t alignment 
)
+
+ +
+
+ +

◆ mi_rezalloc_aligned_at()

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
void* mi_rezalloc_aligned_at (void * p,
size_t newsize,
size_t alignment,
size_t offset 
)
+
+ +
+
+
+
+ + + + diff --git a/docs/group__zeroinit.js b/docs/group__zeroinit.js new file mode 100644 index 00000000..b9297d21 --- /dev/null +++ b/docs/group__zeroinit.js @@ -0,0 +1,14 @@ +var group__zeroinit = +[ + [ "mi_heap_recalloc", "group__zeroinit.html#ga8648c5fbb22a80f0262859099f06dfbd", null ], + [ "mi_heap_recalloc_aligned", "group__zeroinit.html#ga9f3f999396c8f77ca5e80e7b40ac29e3", null ], + [ "mi_heap_recalloc_aligned_at", "group__zeroinit.html#ga496452c96f1de8c500be9fddf52edaf7", null ], + [ "mi_heap_rezalloc", "group__zeroinit.html#gacfad83f14eb5d6a42a497a898e19fc76", null ], + [ "mi_heap_rezalloc_aligned", "group__zeroinit.html#ga375fa8a611c51905e592d5d467c49664", null ], + [ "mi_heap_rezalloc_aligned_at", "group__zeroinit.html#gac90da54fa7e5d10bdc97ce0b51dce2eb", null ], + [ "mi_recalloc_aligned", "group__zeroinit.html#ga3e7e5c291acf1c7fd7ffd9914a9f945f", null ], + [ "mi_recalloc_aligned_at", "group__zeroinit.html#ga4ff5e92ad73585418a072c9d059e5cf9", null ], + [ "mi_rezalloc", "group__zeroinit.html#ga8c292e142110229a2980b37ab036dbc6", null ], + [ "mi_rezalloc_aligned", "group__zeroinit.html#gacd71a7bce96aab38ae6de17af2eb2cf0", null ], + [ "mi_rezalloc_aligned_at", "group__zeroinit.html#gae8b358c417e61d5307da002702b0a8e1", null ] +]; \ No newline at end of file diff --git a/docs/index.html b/docs/index.html index bf758c3c..0efc9c09 100644 --- a/docs/index.html +++ b/docs/index.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
@@ -105,7 +105,7 @@ $(document).ready(function(){initNavTree('index.html','');});

This is the API documentation of the mimalloc allocator (pronounced "me-malloc") – a general purpose allocator with excellent performance characteristics. Initially developed by Daan Leijen for the run-time systems of the Koka and Lean languages.

It is a drop-in replacement for malloc and can be used in other programs without code changes, for example, on Unix you can use it as:

> LD_PRELOAD=/usr/bin/libmimalloc.so myprogram

Notable aspects of the design include:

    -
  • small and consistent: the library is less than 3500 LOC using simple and consistent data structures. This makes it very suitable to integrate and adapt in other projects. For runtime systems it provides hooks for a monotonic heartbeat and deferred freeing (for bounded worst-case times with reference counting).
  • +
  • small and consistent: the library is less than 6k LOC using simple and consistent data structures. This makes it very suitable to integrate and adapt in other projects. For runtime systems it provides hooks for a monotonic heartbeat and deferred freeing (for bounded worst-case times with reference counting).
  • free list sharding: the big idea: instead of one big free list (per size class) we have many smaller lists per memory "page" which both reduces fragmentation and increases locality – things that are allocated close in time get allocated close in memory. (A memory "page" in mimalloc contains blocks of one size class and is usually 64KiB on a 64-bit system).
  • eager page reset: when a "page" becomes empty (with increased chance due to free list sharding) the memory is marked to the OS as unused ("reset" or "purged") reducing (real) memory pressure and fragmentation, especially in long running programs.
  • secure: mimalloc can be build in secure mode, adding guard pages, randomized allocation, encrypted free lists, etc. to protect against various heap vulnerabilities. The performance penalty is only around 3% on average over our benchmarks.
  • diff --git a/docs/mimalloc-doc_8h_source.html b/docs/mimalloc-doc_8h_source.html index 3a235533..f70ae81f 100644 --- a/docs/mimalloc-doc_8h_source.html +++ b/docs/mimalloc-doc_8h_source.html @@ -37,7 +37,7 @@ Logo
    mi-malloc -  1.0 +  1.4
    @@ -102,35 +102,33 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html','');});
    mimalloc-doc.h
-
1 /* ----------------------------------------------------------------------------
2 Copyright (c) 2018, Microsoft Research, Daan Leijen
3 This is free software; you can redistribute it and/or modify it under the
4 terms of the MIT license. A copy of the license can be found in the file
5 "LICENSE" at the root of this distribution.
6 -----------------------------------------------------------------------------*/
7 
8 #error "documentation file only!"
9 
10 
81 
85 
89 void mi_free(void* p);
90 
95 void* mi_malloc(size_t size);
96 
101 void* mi_zalloc(size_t size);
102 
112 void* mi_calloc(size_t count, size_t size);
113 
126 void* mi_realloc(void* p, size_t newsize);
127 
138 void* mi_recalloc(void* p, size_t count, size_t size);
139 
153 void* mi_expand(void* p, size_t newsize);
154 
164 void* mi_mallocn(size_t count, size_t size);
165 
175 void* mi_reallocn(void* p, size_t count, size_t size);
176 
193 void* mi_reallocf(void* p, size_t newsize);
194 
195 
204 char* mi_strdup(const char* s);
205 
215 char* mi_strndup(const char* s, size_t n);
216 
229 char* mi_realpath(const char* fname, char* resolved_name);
230 
232 
233 // ------------------------------------------------------
234 // Extended functionality
235 // ------------------------------------------------------
236 
240 
243 #define MI_SMALL_SIZE_MAX (128*sizeof(void*))
244 
252 void* mi_malloc_small(size_t size);
253 
261 void* mi_zalloc_small(size_t size);
262 
277 size_t mi_usable_size(void* p);
278 
288 size_t mi_good_size(size_t size);
289 
297 void mi_collect(bool force);
298 
303 void mi_stats_print(mi_output_fun* out);
304 
306 void mi_stats_reset(void);
307 
309 void mi_stats_merge(void);
310 
314 void mi_thread_init(void);
315 
320 void mi_thread_done(void);
321 
327 
333 typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat);
334 
350 
355 typedef void (mi_output_fun)(const char* msg);
356 
362 void mi_register_output(mi_output_fun* out) mi_attr_noexcept;
363 
368 bool mi_is_in_heap_region(const void* p);
369 
381 int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved);
382 
387 bool mi_is_redirected();
388 
389 
391 
392 // ------------------------------------------------------
393 // Aligned allocation
394 // ------------------------------------------------------
395 
401 
414 void* mi_malloc_aligned(size_t size, size_t alignment);
415 void* mi_zalloc_aligned(size_t size, size_t alignment);
416 void* mi_calloc_aligned(size_t count, size_t size, size_t alignment);
417 void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment);
418 
429 void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset);
430 void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset);
431 void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset);
432 void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset);
433 
435 
441 
446 struct mi_heap_s;
447 
452 typedef struct mi_heap_s mi_heap_t;
453 
456 
464 void mi_heap_delete(mi_heap_t* heap);
465 
473 void mi_heap_destroy(mi_heap_t* heap);
474 
479 
483 
490 
492 void mi_heap_collect(mi_heap_t* heap, bool force);
493 
496 void* mi_heap_malloc(mi_heap_t* heap, size_t size);
497 
501 void* mi_heap_malloc_small(mi_heap_t* heap, size_t size);
502 
505 void* mi_heap_zalloc(mi_heap_t* heap, size_t size);
506 
509 void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size);
510 
513 void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size);
514 
517 char* mi_heap_strdup(mi_heap_t* heap, const char* s);
518 
521 char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n);
522 
525 char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name);
526 
527 void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize);
528 void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size);
529 void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize);
530 
531 void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment);
532 void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset);
533 void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment);
534 void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset);
535 void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment);
536 void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset);
537 void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment);
538 void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset);
539 
541 
542 
551 
552 void* mi_rezalloc(void* p, size_t newsize);
553 void* mi_recalloc(void* p, size_t newcount, size_t size) ;
554 
555 void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment);
556 void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset);
557 void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment);
558 void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset);
559 
560 void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize);
561 void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t newcount, size_t size);
562 
563 void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment);
564 void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset);
565 void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment);
566 void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset);
567 
569 
575 
587 #define mi_malloc_tp(tp) ((tp*)mi_malloc(sizeof(tp)))
588 
590 #define mi_zalloc_tp(tp) ((tp*)mi_zalloc(sizeof(tp)))
591 
593 #define mi_calloc_tp(tp,count) ((tp*)mi_calloc(count,sizeof(tp)))
594 
596 #define mi_mallocn_tp(tp,count) ((tp*)mi_mallocn(count,sizeof(tp)))
597 
599 #define mi_reallocn_tp(p,tp,count) ((tp*)mi_reallocn(p,count,sizeof(tp)))
600 
602 #define mi_heap_malloc_tp(hp,tp) ((tp*)mi_heap_malloc(hp,sizeof(tp)))
603 
605 #define mi_heap_zalloc_tp(hp,tp) ((tp*)mi_heap_zalloc(hp,sizeof(tp)))
606 
608 #define mi_heap_calloc_tp(hp,tp,count) ((tp*)mi_heap_calloc(hp,count,sizeof(tp)))
609 
611 #define mi_heap_mallocn_tp(hp,tp,count) ((tp*)mi_heap_mallocn(hp,count,sizeof(tp)))
612 
614 #define mi_heap_reallocn_tp(hp,p,tp,count) ((tp*)mi_heap_reallocn(p,count,sizeof(tp)))
615 
617 #define mi_heap_recalloc_tp(hp,p,tp,count) ((tp*)mi_heap_recalloc(p,count,sizeof(tp)))
618 
620 
626 
633 bool mi_heap_contains_block(mi_heap_t* heap, const void* p);
634 
643 bool mi_heap_check_owned(mi_heap_t* heap, const void* p);
644 
652 bool mi_check_owned(const void* p);
653 
656 typedef struct mi_heap_area_s {
657  void* blocks;
658  size_t reserved;
659  size_t committed;
660  size_t used;
661  size_t block_size;
663 
671 typedef bool (mi_block_visit_fun)(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg);
672 
684 bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_blocks, mi_block_visit_fun* visitor, void* arg);
685 
687 
693 
695 typedef enum mi_option_e {
696  // stable options
700  // the following options are experimental
713 } mi_option_t;
714 
715 
716 bool mi_option_enabled(mi_option_t option);
717 void mi_option_enable(mi_option_t option, bool enable);
718 void mi_option_enable_default(mi_option_t option, bool enable);
719 
720 long mi_option_get(mi_option_t option);
721 void mi_option_set(mi_option_t option, long value);
722 void mi_option_set_default(mi_option_t option, long value);
723 
724 
726 
733 
734 void* mi_recalloc(void* p, size_t count, size_t size);
735 size_t mi_malloc_size(const void* p);
736 size_t mi_malloc_usable_size(const void *p);
737 
739 void mi_cfree(void* p);
740 
741 int mi_posix_memalign(void** p, size_t alignment, size_t size);
742 int mi__posix_memalign(void** p, size_t alignment, size_t size);
743 void* mi_memalign(size_t alignment, size_t size);
744 void* mi_valloc(size_t size);
745 
746 void* mi_pvalloc(size_t size);
747 void* mi_aligned_alloc(size_t alignment, size_t size);
748 void* mi_reallocarray(void* p, size_t count, size_t size);
749 
750 void mi_free_size(void* p, size_t size);
751 void mi_free_size_aligned(void* p, size_t size, size_t alignment);
752 void mi_free_aligned(void* p, size_t alignment);
753 
755 void* mi_new(std::size_t n) noexcept(false);
756 
758 void* mi_new_aligned(std::size_t n, std::align_val_t alignment) noexcept(false);
759 
761 void* mi_new_nothrow(size_t n);
762 ``
764 void* mi_new_aligned_nothrow(size_t n, size_t alignment);
765 
767 
void mi_option_enable_default(mi_option_t option, bool enable)
+
1 /* ----------------------------------------------------------------------------
2 Copyright (c) 2018, Microsoft Research, Daan Leijen
3 This is free software; you can redistribute it and/or modify it under the
4 terms of the MIT license. A copy of the license can be found in the file
5 "LICENSE" at the root of this distribution.
6 -----------------------------------------------------------------------------*/
7 
8 #error "documentation file only!"
9 
10 
81 
85 
89 void mi_free(void* p);
90 
95 void* mi_malloc(size_t size);
96 
101 void* mi_zalloc(size_t size);
102 
112 void* mi_calloc(size_t count, size_t size);
113 
126 void* mi_realloc(void* p, size_t newsize);
127 
138 void* mi_recalloc(void* p, size_t count, size_t size);
139 
153 void* mi_expand(void* p, size_t newsize);
154 
164 void* mi_mallocn(size_t count, size_t size);
165 
175 void* mi_reallocn(void* p, size_t count, size_t size);
176 
193 void* mi_reallocf(void* p, size_t newsize);
194 
195 
204 char* mi_strdup(const char* s);
205 
215 char* mi_strndup(const char* s, size_t n);
216 
229 char* mi_realpath(const char* fname, char* resolved_name);
230 
232 
233 // ------------------------------------------------------
234 // Extended functionality
235 // ------------------------------------------------------
236 
240 
243 #define MI_SMALL_SIZE_MAX (128*sizeof(void*))
244 
252 void* mi_malloc_small(size_t size);
253 
261 void* mi_zalloc_small(size_t size);
262 
277 size_t mi_usable_size(void* p);
278 
288 size_t mi_good_size(size_t size);
289 
297 void mi_collect(bool force);
298 
303 void mi_stats_print(void* out);
304 
310 void mi_stats_print(mi_output_fun* out, void* arg);
311 
313 void mi_stats_reset(void);
314 
316 void mi_stats_merge(void);
317 
321 void mi_thread_init(void);
322 
327 void mi_thread_done(void);
328 
334 void mi_thread_stats_print_out(mi_output_fun* out, void* arg);
335 
342 typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat, void* arg);
343 
359 void mi_register_deferred_free(mi_deferred_free_fun* deferred_free, void* arg);
360 
366 typedef void (mi_output_fun)(const char* msg, void* arg);
367 
374 void mi_register_output(mi_output_fun* out, void* arg);
375 
381 typedef void (mi_error_fun)(int err, void* arg);
382 
398 void mi_register_error(mi_error_fun* errfun, void* arg);
399 
404 bool mi_is_in_heap_region(const void* p);
405 
406 
419 int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs);
420 
433 int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs);
434 
435 
440 bool mi_is_redirected();
441 
442 
444 
445 // ------------------------------------------------------
446 // Aligned allocation
447 // ------------------------------------------------------
448 
454 
467 void* mi_malloc_aligned(size_t size, size_t alignment);
468 void* mi_zalloc_aligned(size_t size, size_t alignment);
469 void* mi_calloc_aligned(size_t count, size_t size, size_t alignment);
470 void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment);
471 
482 void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset);
483 void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset);
484 void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset);
485 void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset);
486 
488 
494 
499 struct mi_heap_s;
500 
505 typedef struct mi_heap_s mi_heap_t;
506 
509 
517 void mi_heap_delete(mi_heap_t* heap);
518 
526 void mi_heap_destroy(mi_heap_t* heap);
527 
532 
536 
543 
545 void mi_heap_collect(mi_heap_t* heap, bool force);
546 
549 void* mi_heap_malloc(mi_heap_t* heap, size_t size);
550 
554 void* mi_heap_malloc_small(mi_heap_t* heap, size_t size);
555 
558 void* mi_heap_zalloc(mi_heap_t* heap, size_t size);
559 
562 void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size);
563 
566 void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size);
567 
570 char* mi_heap_strdup(mi_heap_t* heap, const char* s);
571 
574 char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n);
575 
578 char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name);
579 
580 void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize);
581 void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size);
582 void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize);
583 
584 void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment);
585 void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset);
586 void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment);
587 void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset);
588 void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment);
589 void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset);
590 void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment);
591 void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset);
592 
594 
595 
604 
605 void* mi_rezalloc(void* p, size_t newsize);
606 void* mi_recalloc(void* p, size_t newcount, size_t size) ;
607 
608 void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment);
609 void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset);
610 void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment);
611 void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset);
612 
613 void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize);
614 void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t newcount, size_t size);
615 
616 void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment);
617 void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset);
618 void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment);
619 void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset);
620 
622 
628 
640 #define mi_malloc_tp(tp) ((tp*)mi_malloc(sizeof(tp)))
641 
643 #define mi_zalloc_tp(tp) ((tp*)mi_zalloc(sizeof(tp)))
644 
646 #define mi_calloc_tp(tp,count) ((tp*)mi_calloc(count,sizeof(tp)))
647 
649 #define mi_mallocn_tp(tp,count) ((tp*)mi_mallocn(count,sizeof(tp)))
650 
652 #define mi_reallocn_tp(p,tp,count) ((tp*)mi_reallocn(p,count,sizeof(tp)))
653 
655 #define mi_heap_malloc_tp(hp,tp) ((tp*)mi_heap_malloc(hp,sizeof(tp)))
656 
658 #define mi_heap_zalloc_tp(hp,tp) ((tp*)mi_heap_zalloc(hp,sizeof(tp)))
659 
661 #define mi_heap_calloc_tp(hp,tp,count) ((tp*)mi_heap_calloc(hp,count,sizeof(tp)))
662 
664 #define mi_heap_mallocn_tp(hp,tp,count) ((tp*)mi_heap_mallocn(hp,count,sizeof(tp)))
665 
667 #define mi_heap_reallocn_tp(hp,p,tp,count) ((tp*)mi_heap_reallocn(p,count,sizeof(tp)))
668 
670 #define mi_heap_recalloc_tp(hp,p,tp,count) ((tp*)mi_heap_recalloc(p,count,sizeof(tp)))
671 
673 
679 
686 bool mi_heap_contains_block(mi_heap_t* heap, const void* p);
687 
696 bool mi_heap_check_owned(mi_heap_t* heap, const void* p);
697 
705 bool mi_check_owned(const void* p);
706 
709 typedef struct mi_heap_area_s {
710  void* blocks;
711  size_t reserved;
712  size_t committed;
713  size_t used;
714  size_t block_size;
716 
724 typedef bool (mi_block_visit_fun)(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg);
725 
737 bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_blocks, mi_block_visit_fun* visitor, void* arg);
738 
740 
746 
748 typedef enum mi_option_e {
749  // stable options
753  // the following options are experimental
767 } mi_option_t;
768 
769 
770 bool mi_option_enabled(mi_option_t option);
771 void mi_option_enable(mi_option_t option, bool enable);
772 void mi_option_enable_default(mi_option_t option, bool enable);
773 
774 long mi_option_get(mi_option_t option);
775 void mi_option_set(mi_option_t option, long value);
776 void mi_option_set_default(mi_option_t option, long value);
777 
778 
780 
787 
788 void* mi_recalloc(void* p, size_t count, size_t size);
789 size_t mi_malloc_size(const void* p);
790 size_t mi_malloc_usable_size(const void *p);
791 
793 void mi_cfree(void* p);
794 
795 int mi_posix_memalign(void** p, size_t alignment, size_t size);
796 int mi__posix_memalign(void** p, size_t alignment, size_t size);
797 void* mi_memalign(size_t alignment, size_t size);
798 void* mi_valloc(size_t size);
799 
800 void* mi_pvalloc(size_t size);
801 void* mi_aligned_alloc(size_t alignment, size_t size);
802 void* mi_reallocarray(void* p, size_t count, size_t size);
803 
804 void mi_free_size(void* p, size_t size);
805 void mi_free_size_aligned(void* p, size_t size, size_t alignment);
806 void mi_free_aligned(void* p, size_t alignment);
807 
809 
822 
824 void* mi_new(std::size_t n) noexcept(false);
825 
827 void* mi_new_n(size_t count, size_t size) noexcept(false);
828 
830 void* mi_new_aligned(std::size_t n, std::align_val_t alignment) noexcept(false);
831 
833 void* mi_new_nothrow(size_t n);
834 
836 void* mi_new_aligned_nothrow(size_t n, size_t alignment);
837 
839 void* mi_new_realloc(void* p, size_t newsize);
840 
842 void* mi_new_reallocn(void* p, size_t newcount, size_t size);
843 
851 template<class T> struct mi_stl_allocator { }
852 
854 
void mi_option_enable_default(mi_option_t option, bool enable)
size_t mi_usable_size(void *p)
Return the available bytes in a memory block.
+
void * mi_new_nothrow(size_t n)
like mi_malloc, but when out of memory, use std::get_new_handler but return NULL on failure.
void * mi_reallocn(void *p, size_t count, size_t size)
Re-allocate memory to count elements of size bytes.
void * mi_malloc_aligned(size_t size, size_t alignment)
Allocate size bytes aligned by alignment.
void * mi_recalloc_aligned_at(void *p, size_t newcount, size_t size, size_t alignment, size_t offset)
-
void mi_stats_print(mi_output_fun *out)
Print the main statistics.
void mi_stats_reset(void)
Reset statistics.
void * mi_heap_realloc_aligned(mi_heap_t *heap, void *p, size_t newsize, size_t alignment)
+
void * mi_new_realloc(void *p, size_t newsize)
like mi_realloc(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exceptio...
void * mi_recalloc(void *p, size_t count, size_t size)
Re-allocate memory to count elements of size bytes, with extra memory initialized to zero.
void * mi_mallocn(size_t count, size_t size)
Allocate count elements of size bytes.
size_t mi_malloc_size(const void *p)
-
Reset segment memory when a segment is cached.
Definition: mimalloc-doc.h:707
int mi_posix_memalign(void **p, size_t alignment, size_t size)
void mi_stats_merge(void)
Merge thread local statistics with the main statistics and reset.
-
void() mi_output_fun(const char *msg)
Type of output functions.
Definition: mimalloc-doc.h:355
-
void mi_register_output(mi_output_fun *out) mi_attr_noexcept
Register an output function.
+
void * mi_new_n(size_t count, size_t size) noexcept(false)
like mi_mallocn(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exceptio...
void mi_option_set_default(mi_option_t option, long value)
-
void * mi_new_aligned(std::size_t n, std::align_val_t alignment) noexcept(false)
raise std::bad_alloc exception on failure.
+
void() mi_error_fun(int err, void *arg)
Type of error callback functions.
Definition: mimalloc-doc.h:381
void * mi_rezalloc(void *p, size_t newsize)
-
Eagerly commit segments (4MiB) (enabled by default).
Definition: mimalloc-doc.h:701
+
Eagerly commit segments (4MiB) (enabled by default).
Definition: mimalloc-doc.h:754
void * mi_heap_zalloc(mi_heap_t *heap, size_t size)
Allocate zero-initialized in a specific heap.
void mi_option_set(mi_option_t option, long value)
-
void mi_register_deferred_free(mi_deferred_free_fun *deferred_free)
Register a deferred free function.
-
Eagerly commit large (256MiB) memory regions (enabled by default, except on Windows)
Definition: mimalloc-doc.h:702
+
Eagerly commit large (256MiB) memory regions (enabled by default, except on Windows)
Definition: mimalloc-doc.h:755
void mi_cfree(void *p)
Just as free but also checks if the pointer p belongs to our heap.
void * mi_recalloc_aligned(void *p, size_t newcount, size_t size, size_t alignment)
-
Definition: mimalloc-doc.h:712
+
Definition: mimalloc-doc.h:766
void * mi_realloc_aligned_at(void *p, size_t newsize, size_t alignment, size_t offset)
-
void * blocks
start of the area containing heap blocks
Definition: mimalloc-doc.h:657
+
void * blocks
start of the area containing heap blocks
Definition: mimalloc-doc.h:710
void * mi_realloc_aligned(void *p, size_t newsize, size_t alignment)
int mi__posix_memalign(void **p, size_t alignment, size_t size)
void mi_free(void *p)
Free previously allocated memory.
@@ -146,72 +144,80 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html','');});
void * mi_heap_rezalloc_aligned_at(mi_heap_t *heap, void *p, size_t newsize, size_t alignment, size_t offset)
void * mi_zalloc(size_t size)
Allocate zero-initialized size bytes.
void * mi_heap_rezalloc(mi_heap_t *heap, void *p, size_t newsize)
-
The number of segments per thread to keep cached.
Definition: mimalloc-doc.h:705
+
The number of segments per thread to keep cached.
Definition: mimalloc-doc.h:758
void * mi_heap_calloc(mi_heap_t *heap, size_t count, size_t size)
Allocate count zero-initialized elements in a specific heap.
-
void * mi_new(std::size_t n) noexcept(false)
raise std::bad_alloc exception on failure.
void * mi_heap_calloc_aligned(mi_heap_t *heap, size_t count, size_t size, size_t alignment)
bool mi_is_redirected()
Is the C runtime malloc API redirected?
-
size_t block_size
size in bytes of one block
Definition: mimalloc-doc.h:661
+
size_t block_size
size in bytes of one block
Definition: mimalloc-doc.h:714
void * mi_reallocarray(void *p, size_t count, size_t size)
+
int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs)
Reserve pages of huge OS pages (1GiB) evenly divided over numa_nodes nodes, but stops after at most t...
+
void() mi_deferred_free_fun(bool force, unsigned long long heartbeat, void *arg)
Type of deferred free functions.
Definition: mimalloc-doc.h:342
bool mi_is_in_heap_region(const void *p)
Is a pointer part of our heap?
void mi_option_enable(mi_option_t option, bool enable)
+
void * mi_new_aligned(std::size_t n, std::align_val_t alignment) noexcept(false)
like mi_malloc_aligned(), but when out of memory, use std::get_new_handler and raise std::bad_alloc e...
void * mi_realloc(void *p, size_t newsize)
Re-allocate memory to newsize bytes.
-
The number of huge OS pages (1GiB in size) to reserve at the start of the program.
Definition: mimalloc-doc.h:704
-
int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t *pages_reserved)
Reserve pages of huge OS pages (1GiB) but stops after at most max_secs seconds.
+
The number of huge OS pages (1GiB in size) to reserve at the start of the program.
Definition: mimalloc-doc.h:757
void * mi_heap_reallocf(mi_heap_t *heap, void *p, size_t newsize)
void mi_free_size_aligned(void *p, size_t size, size_t alignment)
void * mi_rezalloc_aligned_at(void *p, size_t newsize, size_t alignment, size_t offset)
-
Reset page memory when it becomes free.
Definition: mimalloc-doc.h:706
+
Reset page memory after mi_option_reset_delay milliseconds when it becomes free.
Definition: mimalloc-doc.h:759
void mi_thread_done(void)
Uninitialize mimalloc on a thread.
bool mi_heap_visit_blocks(const mi_heap_t *heap, bool visit_all_blocks, mi_block_visit_fun *visitor, void *arg)
Visit all areas and blocks in a heap.
-
void mi_thread_stats_print(mi_output_fun *out)
Print out heap statistics for this thread.
+
Pretend there are at most N NUMA nodes.
Definition: mimalloc-doc.h:762
void * mi_malloc(size_t size)
Allocate size bytes.
bool mi_option_enabled(mi_option_t option)
-
Experimental.
Definition: mimalloc-doc.h:708
+
void mi_register_error(mi_error_fun *errfun, void *arg)
Register an error callback function.
+
Experimental.
Definition: mimalloc-doc.h:763
char * mi_heap_strndup(mi_heap_t *heap, const char *s, size_t n)
Duplicate a string of at most length n in a specific heap.
-
bool() mi_block_visit_fun(const mi_heap_t *heap, const mi_heap_area_t *area, void *block, size_t block_size, void *arg)
Visitor function passed to mi_heap_visit_blocks()
Definition: mimalloc-doc.h:671
+
bool() mi_block_visit_fun(const mi_heap_t *heap, const mi_heap_area_t *area, void *block, size_t block_size, void *arg)
Visitor function passed to mi_heap_visit_blocks()
Definition: mimalloc-doc.h:724
void * mi_heap_recalloc(mi_heap_t *heap, void *p, size_t newcount, size_t size)
void * mi_heap_malloc_aligned_at(mi_heap_t *heap, size_t size, size_t alignment, size_t offset)
char * mi_realpath(const char *fname, char *resolved_name)
Resolve a file path name.
-
Print error messages to stderr.
Definition: mimalloc-doc.h:698
-
Experimental.
Definition: mimalloc-doc.h:710
+
Print error messages to stderr.
Definition: mimalloc-doc.h:751
+
Experimental.
Definition: mimalloc-doc.h:760
void * mi_heap_rezalloc_aligned(mi_heap_t *heap, void *p, size_t newsize, size_t alignment)
+
void * mi_new_aligned_nothrow(size_t n, size_t alignment)
like mi_malloc_aligned, but when out of memory, use std::get_new_handler but return NULL on failure.
void * mi_memalign(size_t alignment, size_t size)
-
void * mi_new_aligned_nothrow(size_t n, size_t alignment)
return NULL on failure.
-
void * mi_new_nothrow(size_t n)
return NULL on failure.
void * mi_rezalloc_aligned(void *p, size_t newsize, size_t alignment)
bool mi_heap_contains_block(mi_heap_t *heap, const void *p)
Does a heap contain a pointer to a previously allocated block?
void mi_heap_collect(mi_heap_t *heap, bool force)
Release outstanding resources in a specific heap.
void * mi_heap_recalloc_aligned_at(mi_heap_t *heap, void *p, size_t newcount, size_t size, size_t alignment, size_t offset)
-
Print verbose messages to stderr.
Definition: mimalloc-doc.h:699
+
Print verbose messages to stderr.
Definition: mimalloc-doc.h:752
void * mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset)
void * mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset)
Allocate size bytes aligned by alignment at a specified offset.
void mi_heap_delete(mi_heap_t *heap)
Delete a previously allocated heap.
-
OS tag to assign to mimalloc'd memory.
Definition: mimalloc-doc.h:711
+
OS tag to assign to mimalloc'd memory.
Definition: mimalloc-doc.h:765
mi_heap_t * mi_heap_get_default()
Get the default heap that is used for mi_malloc() et al.
+
int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs)
Reserve pages of huge OS pages (1GiB) at a specific numa_node, but stops after at most timeout_msecs ...
void * mi_aligned_alloc(size_t alignment, size_t size)
void * mi_valloc(size_t size)
void mi_thread_init(void)
Initialize mimalloc on a thread.
size_t mi_good_size(size_t size)
Return the used allocation size.
-
Experimental.
Definition: mimalloc-doc.h:709
+
void mi_stats_print(void *out)
Print the main statistics.
+
Experimental.
Definition: mimalloc-doc.h:764
void * mi_heap_recalloc_aligned(mi_heap_t *heap, void *p, size_t newcount, size_t size, size_t alignment)
void * mi_heap_mallocn(mi_heap_t *heap, size_t count, size_t size)
Allocate count elements in a specific heap.
-
An area of heap space contains blocks of a single size.
Definition: mimalloc-doc.h:656
-
Print statistics to stderr when the program is done.
Definition: mimalloc-doc.h:697
+
An area of heap space contains blocks of a single size.
Definition: mimalloc-doc.h:709
+
void mi_thread_stats_print_out(mi_output_fun *out, void *arg)
Print out heap statistics for this thread.
+
Print statistics to stderr when the program is done.
Definition: mimalloc-doc.h:750
void * mi_zalloc_aligned(size_t size, size_t alignment)
-
size_t reserved
bytes reserved for this area
Definition: mimalloc-doc.h:658
-
struct mi_heap_s mi_heap_t
Type of first-class heaps.
Definition: mimalloc-doc.h:452
-
size_t used
bytes in use by allocated blocks
Definition: mimalloc-doc.h:660
-
void() mi_deferred_free_fun(bool force, unsigned long long heartbeat)
Type of deferred free functions.
Definition: mimalloc-doc.h:333
+
size_t reserved
bytes reserved for this area
Definition: mimalloc-doc.h:711
+
struct mi_heap_s mi_heap_t
Type of first-class heaps.
Definition: mimalloc-doc.h:505
+
size_t used
bytes in use by allocated blocks
Definition: mimalloc-doc.h:713
+
void mi_register_deferred_free(mi_deferred_free_fun *deferred_free, void *arg)
Register a deferred free function.
void mi_free_size(void *p, size_t size)
void mi_collect(bool force)
Eagerly free memory.
+
void * mi_new_reallocn(void *p, size_t newcount, size_t size)
like mi_reallocn(), but when out of memory, use std::get_new_handler and raise std::bad_alloc excepti...
void mi_heap_destroy(mi_heap_t *heap)
Destroy a heap, freeing all its still allocated blocks.
void * mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset)
-
Use large OS pages (2MiB in size) if possible.
Definition: mimalloc-doc.h:703
+
Use large OS pages (2MiB in size) if possible.
Definition: mimalloc-doc.h:756
void * mi_heap_reallocn(mi_heap_t *heap, void *p, size_t count, size_t size)
+
void mi_register_output(mi_output_fun *out, void *arg)
Register an output function.
+
std::allocator implementation for mimalloc for use in STL containers.
Definition: mimalloc-doc.h:851
void * mi_heap_malloc_small(mi_heap_t *heap, size_t size)
Allocate a small object in a specific heap.
void * mi_heap_realloc(mi_heap_t *heap, void *p, size_t newsize)
size_t mi_malloc_usable_size(const void *p)
+
void() mi_output_fun(const char *msg, void *arg)
Type of output functions.
Definition: mimalloc-doc.h:366
char * mi_strdup(const char *s)
Allocate and duplicate a string.
void * mi_heap_realloc_aligned_at(mi_heap_t *heap, void *p, size_t newsize, size_t alignment, size_t offset)
void * mi_reallocf(void *p, size_t newsize)
Re-allocate memory to newsize bytes,.
@@ -223,10 +229,12 @@ $(document).ready(function(){initNavTree('mimalloc-doc_8h_source.html','');});
long mi_option_get(mi_option_t option)
mi_heap_t * mi_heap_get_backing()
Get the backing heap.
void mi_free_aligned(void *p, size_t alignment)
+
void * mi_new(std::size_t n) noexcept(false)
like mi_malloc(), but when out of memory, use std::get_new_handler and raise std::bad_alloc exception...
+
Delay in milli-seconds before resetting a page (100ms by default)
Definition: mimalloc-doc.h:761
mi_heap_t * mi_heap_new()
Create a new heap that can be used for allocation.
void * mi_heap_malloc(mi_heap_t *heap, size_t size)
Allocate in a specific heap.
-
size_t committed
current committed bytes of this area
Definition: mimalloc-doc.h:659
-
mi_option_t
Runtime options.
Definition: mimalloc-doc.h:695
+
size_t committed
current committed bytes of this area
Definition: mimalloc-doc.h:712
+
mi_option_t
Runtime options.
Definition: mimalloc-doc.h:748
bool mi_heap_check_owned(mi_heap_t *heap, const void *p)
Check safely if any pointer is part of a heap.
mi_heap_t * mi_heap_set_default(mi_heap_t *heap)
Set the default heap to use for mi_malloc() et al.
diff --git a/docs/modules.html b/docs/modules.html index ca18e1eb..91bf17e8 100644 --- a/docs/modules.html +++ b/docs/modules.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
@@ -113,6 +113,7 @@ $(document).ready(function(){initNavTree('modules.html','');});  Heap IntrospectionInspect the heap at runtime  Runtime OptionsSet runtime behavior  Posixmi_ prefixed implementations of various Posix, Unix, and C++ allocation functions + C++ wrappersmi_ prefixed implementations of various allocation functions that use C++ semantics on out-of-memory, generally calling std::get_new_handler and raising a std::bad_alloc exception on failure
diff --git a/docs/modules.js b/docs/modules.js index 47e99b42..b2c2a224 100644 --- a/docs/modules.js +++ b/docs/modules.js @@ -8,5 +8,6 @@ var modules = [ "Typed Macros", "group__typed.html", "group__typed" ], [ "Heap Introspection", "group__analysis.html", "group__analysis" ], [ "Runtime Options", "group__options.html", "group__options" ], - [ "Posix", "group__posix.html", "group__posix" ] + [ "Posix", "group__posix.html", "group__posix" ], + [ "C++ wrappers", "group__cpp.html", "group__cpp" ] ]; \ No newline at end of file diff --git a/docs/navtreeindex0.js b/docs/navtreeindex0.js index 90be7d78..047d6dbc 100644 --- a/docs/navtreeindex0.js +++ b/docs/navtreeindex0.js @@ -28,26 +28,39 @@ var NAVTREEINDEX0 = "group__analysis.html#gaa862aa8ed8d57d84cae41fc1022d71af":[5,6,4], "group__analysis.html#gadfa01e2900f0e5d515ad5506b26f6d65":[5,6,1], "group__analysis.html#structmi__heap__area__t":[5,6,0], +"group__cpp.html":[5,9], +"group__cpp.html#ga756f4b2bc6a7ecd0a90baea8e90c7907":[5,9,7], +"group__cpp.html#gaab78a32f55149e9fbf432d5288e38e1e":[5,9,6], +"group__cpp.html#gaad048a9fce3d02c5909cd05c6ec24545":[5,9,1], +"group__cpp.html#gab5e29558926d934c3f1cae8c815f942c":[5,9,3], +"group__cpp.html#gae7bc4f56cd57ed3359060ff4f38bda81":[5,9,4], +"group__cpp.html#gaeaded64eda71ed6b1d569d3e723abc4a":[5,9,5], +"group__cpp.html#gaef2c2bdb4f70857902d3c8903ac095f3":[5,9,2], +"group__cpp.html#structmi__stl__allocator":[5,9,0], "group__extended.html":[5,1], -"group__extended.html#ga089c859d9eddc5f9b4bd946cd53cebee":[5,1,17], -"group__extended.html#ga0ae4581e85453456a0d658b2b98bf7bf":[5,1,14], +"group__extended.html#ga089c859d9eddc5f9b4bd946cd53cebee":[5,1,21], +"group__extended.html#ga0ae4581e85453456a0d658b2b98bf7bf":[5,1,18], "group__extended.html#ga1ea64283508718d9d645c38efc2f4305":[5,1,0], -"group__extended.html#ga220f29f40a44404b0061c15bc1c31152":[5,1,18], -"group__extended.html#ga22213691c3ce5ab4d91b24aff1023529":[5,1,1], -"group__extended.html#ga24dc9cc6fca8daa2aa30aa8025467ce2":[5,1,8], -"group__extended.html#ga2664f36a2dd557741c429cb799f04641":[5,1,10], -"group__extended.html#ga2bed6d40b74591a67f81daea4b4a246f":[5,1,2], -"group__extended.html#ga3bb8468b8cfcc6e2a61d98aee85c5f99":[5,1,13], -"group__extended.html#ga421430e2226d7d468529cec457396756":[5,1,3], -"group__extended.html#ga489670a15d1a257ab4639e645ee4612a":[5,1,16], -"group__extended.html#ga5f071b10d4df1c3658e04e7fd67a94e6":[5,1,5], -"group__extended.html#ga7136c2e55cb22c98ecf95d08d6debb99":[5,1,7], -"group__extended.html#ga84a0c8b401e42eb5b1bce156852f44c5":[5,1,9], -"group__extended.html#ga854b1de8cb067c7316286c28b2fcd3d1":[5,1,11], -"group__extended.html#ga8ca07ccff283956d71f48272f4fd5c01":[5,1,12], -"group__extended.html#gaad25050b19f30cd79397b227e0157a3f":[5,1,6], -"group__extended.html#gac057927cd06c854b45fe7847e921bd47":[5,1,4], -"group__extended.html#gaf8e73efc2cbca9ebfdfb166983a04c17":[5,1,15], +"group__extended.html#ga220f29f40a44404b0061c15bc1c31152":[5,1,22], +"group__extended.html#ga251d369cda3f1c2a955c555486ed90e5":[5,1,2], +"group__extended.html#ga256cc6f13a142deabbadd954a217e228":[5,1,16], +"group__extended.html#ga299dae78d25ce112e384a98b7309c5be":[5,1,1], +"group__extended.html#ga2d126e5c62d3badc35445e5d84166df2":[5,1,15], +"group__extended.html#ga3132f521fb756fc0e8ec0b74fb58df50":[5,1,13], +"group__extended.html#ga3460a6ca91af97be4058f523d3cb8ece":[5,1,9], +"group__extended.html#ga3bb8468b8cfcc6e2a61d98aee85c5f99":[5,1,17], +"group__extended.html#ga421430e2226d7d468529cec457396756":[5,1,4], +"group__extended.html#ga5f071b10d4df1c3658e04e7fd67a94e6":[5,1,6], +"group__extended.html#ga7136c2e55cb22c98ecf95d08d6debb99":[5,1,8], +"group__extended.html#ga7795a13d20087447281858d2c771cca1":[5,1,12], +"group__extended.html#ga854b1de8cb067c7316286c28b2fcd3d1":[5,1,14], +"group__extended.html#gaa1d55e0e894be240827e5d87ec3a1f45":[5,1,10], +"group__extended.html#gaad25050b19f30cd79397b227e0157a3f":[5,1,7], +"group__extended.html#gab1dac8476c46cb9eecab767eb40c1525":[5,1,20], +"group__extended.html#gac057927cd06c854b45fe7847e921bd47":[5,1,5], +"group__extended.html#gad823d23444a4b77a40f66bf075a98a0c":[5,1,3], +"group__extended.html#gae5b17ff027cd2150b43a33040250cf3f":[5,1,11], +"group__extended.html#gaf8e73efc2cbca9ebfdfb166983a04c17":[5,1,19], "group__heap.html":[5,3], "group__heap.html#ga00e95ba1e01acac3cfd95bb7a357a6f0":[5,3,20], "group__heap.html#ga08ca6419a5c057a4d965868998eef487":[5,3,3], @@ -99,38 +112,35 @@ var NAVTREEINDEX0 = "group__options.html#gaf84921c32375e25754dc2ee6a911fa60":[5,7,5], "group__options.html#gafebf7ed116adb38ae5218bc3ce06884c":[5,7,0], "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca0957ef73b2550764b4840edf48422fda":[5,7,0,0], -"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca17a190c25be381142d87e0468c4c068c":[5,7,0,11], +"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca0ac33a18f6b659fcfaf44efb0bab1b74":[5,7,0,11], +"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca154fe170131d5212cff57e22b99523c5":[5,7,0,10], +"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca17a190c25be381142d87e0468c4c068c":[5,7,0,13], "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca1e8de72c93da7ff22d91e1e27b52ac2b":[5,7,0,3], "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca2ecbe7ef32f5c84de3739aa4f0b805a1":[5,7,0,7], "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca32ce97ece29f69e82579679cf8a307ad":[5,7,0,4], "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca4192d491200d0055df0554d4cf65054e":[5,7,0,5], -"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca4b74ae2a69e445de6c2361b73c1d14bf":[5,7,0,13], -"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca5b4357b74be0d87568036c32eb1a2e4a":[5,7,0,14], +"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca4b74ae2a69e445de6c2361b73c1d14bf":[5,7,0,14], +"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca5b4357b74be0d87568036c32eb1a2e4a":[5,7,0,15], "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca7c8b7bf5281c581bad64f5daa6442777":[5,7,0,2], -"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cac2157a0cb79cd996c1db7d9f6a090c07":[5,7,0,9], -"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cac81ee965b130fa81238913a3c239d536":[5,7,0,10], +"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cac81ee965b130fa81238913a3c239d536":[5,7,0,12], "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884caca7ed041be3b0b9d0b82432c7bf41af2":[5,7,0,6], "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cada854dd272c66342f18a93ee254a2968":[5,7,0,8], -"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cafb121d30d87591850d5410ccc3a95c6d":[5,7,0,12], +"group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cafb121d30d87591850d5410ccc3a95c6d":[5,7,0,9], "group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cafbf4822e5c00732c5984b32a032837f0":[5,7,0,1], "group__posix.html":[5,8], "group__posix.html#ga06d07cf357bbac5c73ba5d0c0c421e17":[5,8,7], "group__posix.html#ga0d28d5cf61e6bfbb18c63092939fe5c9":[5,8,3], "group__posix.html#ga1326d2e4388630b5f81ca7206318b8e5":[5,8,1], "group__posix.html#ga4531c9e775bb3ae12db57c1ba8a5d7de":[5,8,6], -"group__posix.html#ga48fad8648a2f1dab9c87ea9448a52088":[5,8,15], +"group__posix.html#ga48fad8648a2f1dab9c87ea9448a52088":[5,8,11], "group__posix.html#ga705dc7a64bffacfeeb0141501a5c35d7":[5,8,2], "group__posix.html#ga72e9d7ffb5fe94d69bc722c8506e27bc":[5,8,5], -"group__posix.html#ga73baaf5951f5165ba0763d0c06b6a93b":[5,8,16], +"group__posix.html#ga73baaf5951f5165ba0763d0c06b6a93b":[5,8,12], "group__posix.html#gaab7fa71ea93b96873f5d9883db57d40e":[5,8,8], -"group__posix.html#gaad048a9fce3d02c5909cd05c6ec24545":[5,8,9], -"group__posix.html#gab5e29558926d934c3f1cae8c815f942c":[5,8,11], -"group__posix.html#gacff84f226ba9feb2031b8992e5579447":[5,8,13], +"group__posix.html#gacff84f226ba9feb2031b8992e5579447":[5,8,9], "group__posix.html#gad5a69c8fea96aa2b7a7c818c2130090a":[5,8,0], "group__posix.html#gae01389eedab8d67341ff52e2aad80ebb":[5,8,4], -"group__posix.html#gaeaded64eda71ed6b1d569d3e723abc4a":[5,8,12], -"group__posix.html#gaeb325c39b887d3b90d85d1eb1712fb1e":[5,8,14], -"group__posix.html#gaef2c2bdb4f70857902d3c8903ac095f3":[5,8,10], +"group__posix.html#gaeb325c39b887d3b90d85d1eb1712fb1e":[5,8,10], "group__typed.html":[5,5], "group__typed.html#ga0619a62c5fd886f1016030abe91f0557":[5,5,7], "group__typed.html#ga1158b49a55dfa81f58a4426a7578f523":[5,5,9], diff --git a/docs/overrides.html b/docs/overrides.html index 74ef9dbd..3b5d9bd3 100644 --- a/docs/overrides.html +++ b/docs/overrides.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
@@ -118,10 +118,10 @@ $(document).ready(function(){initNavTree('overrides.html','');});

Note that certain security restrictions may apply when doing this from the shell.

Note: unfortunately, at this time, dynamic overriding on macOS seems broken but it is actively worked on to fix this (see issue #50).

Windows

-

On Windows you need to link your program explicitly with the mimalloc DLL and use the C-runtime library as a DLL (using the /MD or /MDd switch). Moreover, you need to ensure the mimalloc-redirect.dll (or mimalloc-redirect32.dll) is available in the same folder as the mimalloc DLL at runtime (as it as referred to by the mimalloc DLL). The redirection DLL's ensure all calls to the C runtime malloc API get redirected to mimalloc.

-

To ensure the mimalloc DLL is loaded at run-time it is easiest to insert some call to the mimalloc API in the main function, like mi_version() (or use the /INCLUDE:mi_version switch on the linker). See the mimalloc-override-test project for an example on how to use this.

-

The environment variable MIMALLOC_DISABLE_REDIRECT=1 can be used to disable dynamic overriding at run-time. Use MIMALLOC_VERBOSE=1 to check if mimalloc successfully redirected.

-

(Note: in principle, it should be possible to patch existing executables that are linked with the dynamic C runtime (ucrtbase.dll) by just putting the mimalloc DLL into the import table (and putting mimalloc-redirect.dll in the same folder) Such patching can be done for example with CFF Explorer).

+

Overriding on Windows is robust but requires that you link your program explicitly with the mimalloc DLL and use the C-runtime library as a DLL (using the /MD or /MDd switch). Moreover, you need to ensure the mimalloc-redirect.dll (or mimalloc-redirect32.dll) is available in the same folder as the main mimalloc-override.dll at runtime (as it is a dependency). The redirection DLL ensures that all calls to the C runtime malloc API get redirected to mimalloc (in mimalloc-override.dll).

+

To ensure the mimalloc DLL is loaded at run-time it is easiest to insert some call to the mimalloc API in the main function, like mi_version() (or use the /INCLUDE:mi_version switch on the linker). See the mimalloc-override-test project for an example on how to use this. For best performance on Windows with C++, it is highly recommended to also override the new/delete operations (by including mimalloc-new-delete.h a single(!) source file in your project).

+

The environment variable MIMALLOC_DISABLE_REDIRECT=1 can be used to disable dynamic overriding at run-time. Use MIMALLOC_VERBOSE=1 to check if mimalloc was successfully redirected.

+

(Note: in principle, it is possible to patch existing executables that are linked with the dynamic C runtime (ucrtbase.dll) by just putting the mimalloc-override.dll into the import table (and putting mimalloc-redirect.dll in the same folder) Such patching can be done for example with CFF Explorer).

Static override

On Unix systems, you can also statically link with mimalloc to override the standard malloc interface. The recommended way is to link the final program with the mimalloc single object file (mimalloc-override.o). We use an object file instead of a library file as linkers give preference to that over archives to resolve symbols. To ensure that the standard malloc interface resolves to the mimalloc library, link it as the first object file. For example:

gcc -o myprogram mimalloc-override.o myfile1.c ...

List of Overrides:

diff --git a/docs/pages.html b/docs/pages.html index d0ee9f7a..ad5549bf 100644 --- a/docs/pages.html +++ b/docs/pages.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
diff --git a/docs/search/all_3.js b/docs/search/all_3.js index af76e9c8..2e08411f 100644 --- a/docs/search/all_3.js +++ b/docs/search/all_3.js @@ -1,4 +1,5 @@ var searchData= [ - ['committed',['committed',['../group__analysis.html#ab47526df656d8837ec3e97f11b83f835',1,'mi_heap_area_t']]] + ['committed',['committed',['../group__analysis.html#ab47526df656d8837ec3e97f11b83f835',1,'mi_heap_area_t']]], + ['c_2b_2b_20wrappers',['C++ wrappers',['../group__cpp.html',1,'']]] ]; diff --git a/docs/search/all_6.js b/docs/search/all_6.js index 2edb9986..c757cbbf 100644 --- a/docs/search/all_6.js +++ b/docs/search/all_6.js @@ -10,7 +10,8 @@ var searchData= ['mi_5fcfree',['mi_cfree',['../group__posix.html#ga705dc7a64bffacfeeb0141501a5c35d7',1,'mimalloc-doc.h']]], ['mi_5fcheck_5fowned',['mi_check_owned',['../group__analysis.html#ga628c237489c2679af84a4d0d143b3dd5',1,'mimalloc-doc.h']]], ['mi_5fcollect',['mi_collect',['../group__extended.html#ga421430e2226d7d468529cec457396756',1,'mimalloc-doc.h']]], - ['mi_5fdeferred_5ffree_5ffun',['mi_deferred_free_fun',['../group__extended.html#ga22213691c3ce5ab4d91b24aff1023529',1,'mimalloc-doc.h']]], + ['mi_5fdeferred_5ffree_5ffun',['mi_deferred_free_fun',['../group__extended.html#ga299dae78d25ce112e384a98b7309c5be',1,'mimalloc-doc.h']]], + ['mi_5ferror_5ffun',['mi_error_fun',['../group__extended.html#ga251d369cda3f1c2a955c555486ed90e5',1,'mimalloc-doc.h']]], ['mi_5fexpand',['mi_expand',['../group__malloc.html#gaaee66a1d483c3e28f585525fb96707e4',1,'mimalloc-doc.h']]], ['mi_5ffree',['mi_free',['../group__malloc.html#gaf2c7b89c327d1f60f59e68b9ea644d95',1,'mimalloc-doc.h']]], ['mi_5ffree_5faligned',['mi_free_aligned',['../group__posix.html#ga0d28d5cf61e6bfbb18c63092939fe5c9',1,'mimalloc-doc.h']]], @@ -72,11 +73,13 @@ var searchData= ['mi_5fmallocn',['mi_mallocn',['../group__malloc.html#ga0b05e2bf0f73e7401ae08597ff782ac6',1,'mimalloc-doc.h']]], ['mi_5fmallocn_5ftp',['mi_mallocn_tp',['../group__typed.html#gae5cb6e0fafc9f23169c5622e077afe8b',1,'mimalloc-doc.h']]], ['mi_5fmemalign',['mi_memalign',['../group__posix.html#gaab7fa71ea93b96873f5d9883db57d40e',1,'mimalloc-doc.h']]], - ['mi_5fnew',['mi_new',['../group__posix.html#gaad048a9fce3d02c5909cd05c6ec24545',1,'mimalloc-doc.h']]], - ['mi_5fnew_5faligned',['mi_new_aligned',['../group__posix.html#gaef2c2bdb4f70857902d3c8903ac095f3',1,'mimalloc-doc.h']]], - ['mi_5fnew_5faligned_5fnothrow',['mi_new_aligned_nothrow',['../group__posix.html#gab5e29558926d934c3f1cae8c815f942c',1,'mimalloc-doc.h']]], - ['mi_5fnew_5fnothrow',['mi_new_nothrow',['../group__posix.html#gaeaded64eda71ed6b1d569d3e723abc4a',1,'mimalloc-doc.h']]], - ['mi_5foption_5fcache_5freset',['mi_option_cache_reset',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cac2157a0cb79cd996c1db7d9f6a090c07',1,'mimalloc-doc.h']]], + ['mi_5fnew',['mi_new',['../group__cpp.html#gaad048a9fce3d02c5909cd05c6ec24545',1,'mimalloc-doc.h']]], + ['mi_5fnew_5faligned',['mi_new_aligned',['../group__cpp.html#gaef2c2bdb4f70857902d3c8903ac095f3',1,'mimalloc-doc.h']]], + ['mi_5fnew_5faligned_5fnothrow',['mi_new_aligned_nothrow',['../group__cpp.html#gab5e29558926d934c3f1cae8c815f942c',1,'mimalloc-doc.h']]], + ['mi_5fnew_5fn',['mi_new_n',['../group__cpp.html#gae7bc4f56cd57ed3359060ff4f38bda81',1,'mimalloc-doc.h']]], + ['mi_5fnew_5fnothrow',['mi_new_nothrow',['../group__cpp.html#gaeaded64eda71ed6b1d569d3e723abc4a',1,'mimalloc-doc.h']]], + ['mi_5fnew_5frealloc',['mi_new_realloc',['../group__cpp.html#gaab78a32f55149e9fbf432d5288e38e1e',1,'mimalloc-doc.h']]], + ['mi_5fnew_5freallocn',['mi_new_reallocn',['../group__cpp.html#ga756f4b2bc6a7ecd0a90baea8e90c7907',1,'mimalloc-doc.h']]], ['mi_5foption_5feager_5fcommit',['mi_option_eager_commit',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca1e8de72c93da7ff22d91e1e27b52ac2b',1,'mimalloc-doc.h']]], ['mi_5foption_5feager_5fcommit_5fdelay',['mi_option_eager_commit_delay',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca17a190c25be381142d87e0468c4c068c',1,'mimalloc-doc.h']]], ['mi_5foption_5feager_5fregion_5fcommit',['mi_option_eager_region_commit',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca32ce97ece29f69e82579679cf8a307ad',1,'mimalloc-doc.h']]], @@ -89,6 +92,7 @@ var searchData= ['mi_5foption_5fpage_5freset',['mi_option_page_reset',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cada854dd272c66342f18a93ee254a2968',1,'mimalloc-doc.h']]], ['mi_5foption_5freserve_5fhuge_5fos_5fpages',['mi_option_reserve_huge_os_pages',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884caca7ed041be3b0b9d0b82432c7bf41af2',1,'mimalloc-doc.h']]], ['mi_5foption_5freset_5fdecommits',['mi_option_reset_decommits',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cac81ee965b130fa81238913a3c239d536',1,'mimalloc-doc.h']]], + ['mi_5foption_5freset_5fdelay',['mi_option_reset_delay',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca154fe170131d5212cff57e22b99523c5',1,'mimalloc-doc.h']]], ['mi_5foption_5fsegment_5fcache',['mi_option_segment_cache',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca2ecbe7ef32f5c84de3739aa4f0b805a1',1,'mimalloc-doc.h']]], ['mi_5foption_5fsegment_5freset',['mi_option_segment_reset',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cafb121d30d87591850d5410ccc3a95c6d',1,'mimalloc-doc.h']]], ['mi_5foption_5fset',['mi_option_set',['../group__options.html#gaf84921c32375e25754dc2ee6a911fa60',1,'mimalloc-doc.h']]], @@ -96,8 +100,9 @@ var searchData= ['mi_5foption_5fshow_5ferrors',['mi_option_show_errors',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cafbf4822e5c00732c5984b32a032837f0',1,'mimalloc-doc.h']]], ['mi_5foption_5fshow_5fstats',['mi_option_show_stats',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca0957ef73b2550764b4840edf48422fda',1,'mimalloc-doc.h']]], ['mi_5foption_5ft',['mi_option_t',['../group__options.html#gafebf7ed116adb38ae5218bc3ce06884c',1,'mimalloc-doc.h']]], + ['mi_5foption_5fuse_5fnuma_5fnodes',['mi_option_use_numa_nodes',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca0ac33a18f6b659fcfaf44efb0bab1b74',1,'mimalloc-doc.h']]], ['mi_5foption_5fverbose',['mi_option_verbose',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca7c8b7bf5281c581bad64f5daa6442777',1,'mimalloc-doc.h']]], - ['mi_5foutput_5ffun',['mi_output_fun',['../group__extended.html#ga2bed6d40b74591a67f81daea4b4a246f',1,'mimalloc-doc.h']]], + ['mi_5foutput_5ffun',['mi_output_fun',['../group__extended.html#gad823d23444a4b77a40f66bf075a98a0c',1,'mimalloc-doc.h']]], ['mi_5fposix_5fmemalign',['mi_posix_memalign',['../group__posix.html#gacff84f226ba9feb2031b8992e5579447',1,'mimalloc-doc.h']]], ['mi_5fpvalloc',['mi_pvalloc',['../group__posix.html#gaeb325c39b887d3b90d85d1eb1712fb1e',1,'mimalloc-doc.h']]], ['mi_5frealloc',['mi_realloc',['../group__malloc.html#gaf11eb497da57bdfb2de65eb191c69db6',1,'mimalloc-doc.h']]], @@ -111,21 +116,24 @@ var searchData= ['mi_5frecalloc',['mi_recalloc',['../group__malloc.html#ga23a0fbb452b5dce8e31fab1a1958cacc',1,'mimalloc-doc.h']]], ['mi_5frecalloc_5faligned',['mi_recalloc_aligned',['../group__zeroinit.html#ga3e7e5c291acf1c7fd7ffd9914a9f945f',1,'mimalloc-doc.h']]], ['mi_5frecalloc_5faligned_5fat',['mi_recalloc_aligned_at',['../group__zeroinit.html#ga4ff5e92ad73585418a072c9d059e5cf9',1,'mimalloc-doc.h']]], - ['mi_5fregister_5fdeferred_5ffree',['mi_register_deferred_free',['../group__extended.html#ga24dc9cc6fca8daa2aa30aa8025467ce2',1,'mimalloc-doc.h']]], - ['mi_5fregister_5foutput',['mi_register_output',['../group__extended.html#ga84a0c8b401e42eb5b1bce156852f44c5',1,'mimalloc-doc.h']]], - ['mi_5freserve_5fhuge_5fos_5fpages',['mi_reserve_huge_os_pages',['../group__extended.html#ga2664f36a2dd557741c429cb799f04641',1,'mimalloc-doc.h']]], + ['mi_5fregister_5fdeferred_5ffree',['mi_register_deferred_free',['../group__extended.html#ga3460a6ca91af97be4058f523d3cb8ece',1,'mimalloc-doc.h']]], + ['mi_5fregister_5ferror',['mi_register_error',['../group__extended.html#gaa1d55e0e894be240827e5d87ec3a1f45',1,'mimalloc-doc.h']]], + ['mi_5fregister_5foutput',['mi_register_output',['../group__extended.html#gae5b17ff027cd2150b43a33040250cf3f',1,'mimalloc-doc.h']]], + ['mi_5freserve_5fhuge_5fos_5fpages_5fat',['mi_reserve_huge_os_pages_at',['../group__extended.html#ga7795a13d20087447281858d2c771cca1',1,'mimalloc-doc.h']]], + ['mi_5freserve_5fhuge_5fos_5fpages_5finterleave',['mi_reserve_huge_os_pages_interleave',['../group__extended.html#ga3132f521fb756fc0e8ec0b74fb58df50',1,'mimalloc-doc.h']]], ['mi_5frezalloc',['mi_rezalloc',['../group__zeroinit.html#ga8c292e142110229a2980b37ab036dbc6',1,'mimalloc-doc.h']]], ['mi_5frezalloc_5faligned',['mi_rezalloc_aligned',['../group__zeroinit.html#gacd71a7bce96aab38ae6de17af2eb2cf0',1,'mimalloc-doc.h']]], ['mi_5frezalloc_5faligned_5fat',['mi_rezalloc_aligned_at',['../group__zeroinit.html#gae8b358c417e61d5307da002702b0a8e1',1,'mimalloc-doc.h']]], ['mi_5fsmall_5fsize_5fmax',['MI_SMALL_SIZE_MAX',['../group__extended.html#ga1ea64283508718d9d645c38efc2f4305',1,'mimalloc-doc.h']]], ['mi_5fstats_5fmerge',['mi_stats_merge',['../group__extended.html#ga854b1de8cb067c7316286c28b2fcd3d1',1,'mimalloc-doc.h']]], - ['mi_5fstats_5fprint',['mi_stats_print',['../group__extended.html#ga8ca07ccff283956d71f48272f4fd5c01',1,'mimalloc-doc.h']]], + ['mi_5fstats_5fprint',['mi_stats_print',['../group__extended.html#ga2d126e5c62d3badc35445e5d84166df2',1,'mi_stats_print(void *out): mimalloc-doc.h'],['../group__extended.html#ga256cc6f13a142deabbadd954a217e228',1,'mi_stats_print(mi_output_fun *out, void *arg): mimalloc-doc.h']]], ['mi_5fstats_5freset',['mi_stats_reset',['../group__extended.html#ga3bb8468b8cfcc6e2a61d98aee85c5f99',1,'mimalloc-doc.h']]], + ['mi_5fstl_5fallocator',['mi_stl_allocator',['../group__cpp.html#structmi__stl__allocator',1,'']]], ['mi_5fstrdup',['mi_strdup',['../group__malloc.html#gac7cffe13f1f458ed16789488bf92b9b2',1,'mimalloc-doc.h']]], ['mi_5fstrndup',['mi_strndup',['../group__malloc.html#gaaabf971c2571891433477e2d21a35266',1,'mimalloc-doc.h']]], ['mi_5fthread_5fdone',['mi_thread_done',['../group__extended.html#ga0ae4581e85453456a0d658b2b98bf7bf',1,'mimalloc-doc.h']]], ['mi_5fthread_5finit',['mi_thread_init',['../group__extended.html#gaf8e73efc2cbca9ebfdfb166983a04c17',1,'mimalloc-doc.h']]], - ['mi_5fthread_5fstats_5fprint',['mi_thread_stats_print',['../group__extended.html#ga489670a15d1a257ab4639e645ee4612a',1,'mimalloc-doc.h']]], + ['mi_5fthread_5fstats_5fprint_5fout',['mi_thread_stats_print_out',['../group__extended.html#gab1dac8476c46cb9eecab767eb40c1525',1,'mimalloc-doc.h']]], ['mi_5fusable_5fsize',['mi_usable_size',['../group__extended.html#ga089c859d9eddc5f9b4bd946cd53cebee',1,'mimalloc-doc.h']]], ['mi_5fvalloc',['mi_valloc',['../group__posix.html#ga73baaf5951f5165ba0763d0c06b6a93b',1,'mimalloc-doc.h']]], ['mi_5fzalloc',['mi_zalloc',['../group__malloc.html#gafdd9d8bb2986e668ba9884f28af38000',1,'mimalloc-doc.h']]], diff --git a/docs/search/all_c.html b/docs/search/all_c.html new file mode 100644 index 00000000..3de15867 --- /dev/null +++ b/docs/search/all_c.html @@ -0,0 +1,30 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/search/all_c.js b/docs/search/all_c.js new file mode 100644 index 00000000..2b9b4cea --- /dev/null +++ b/docs/search/all_c.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['zero_20initialized_20re_2dallocation',['Zero initialized re-allocation',['../group__zeroinit.html',1,'']]] +]; diff --git a/docs/search/all_d.html b/docs/search/all_d.html new file mode 100644 index 00000000..a2d5bd7e --- /dev/null +++ b/docs/search/all_d.html @@ -0,0 +1,30 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/search/all_d.js b/docs/search/all_d.js new file mode 100644 index 00000000..2b9b4cea --- /dev/null +++ b/docs/search/all_d.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['zero_20initialized_20re_2dallocation',['Zero initialized re-allocation',['../group__zeroinit.html',1,'']]] +]; diff --git a/docs/search/classes_0.js b/docs/search/classes_0.js index 4c5482b9..0010dd97 100644 --- a/docs/search/classes_0.js +++ b/docs/search/classes_0.js @@ -1,4 +1,5 @@ var searchData= [ - ['mi_5fheap_5farea_5ft',['mi_heap_area_t',['../group__analysis.html#structmi__heap__area__t',1,'']]] + ['mi_5fheap_5farea_5ft',['mi_heap_area_t',['../group__analysis.html#structmi__heap__area__t',1,'']]], + ['mi_5fstl_5fallocator',['mi_stl_allocator',['../group__cpp.html#structmi__stl__allocator',1,'']]] ]; diff --git a/docs/search/enumvalues_1.js b/docs/search/enumvalues_1.js index 3ed91631..3b712708 100644 --- a/docs/search/enumvalues_1.js +++ b/docs/search/enumvalues_1.js @@ -1,6 +1,5 @@ var searchData= [ - ['mi_5foption_5fcache_5freset',['mi_option_cache_reset',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cac2157a0cb79cd996c1db7d9f6a090c07',1,'mimalloc-doc.h']]], ['mi_5foption_5feager_5fcommit',['mi_option_eager_commit',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca1e8de72c93da7ff22d91e1e27b52ac2b',1,'mimalloc-doc.h']]], ['mi_5foption_5feager_5fcommit_5fdelay',['mi_option_eager_commit_delay',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca17a190c25be381142d87e0468c4c068c',1,'mimalloc-doc.h']]], ['mi_5foption_5feager_5fregion_5fcommit',['mi_option_eager_region_commit',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca32ce97ece29f69e82579679cf8a307ad',1,'mimalloc-doc.h']]], @@ -9,9 +8,11 @@ var searchData= ['mi_5foption_5fpage_5freset',['mi_option_page_reset',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cada854dd272c66342f18a93ee254a2968',1,'mimalloc-doc.h']]], ['mi_5foption_5freserve_5fhuge_5fos_5fpages',['mi_option_reserve_huge_os_pages',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884caca7ed041be3b0b9d0b82432c7bf41af2',1,'mimalloc-doc.h']]], ['mi_5foption_5freset_5fdecommits',['mi_option_reset_decommits',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cac81ee965b130fa81238913a3c239d536',1,'mimalloc-doc.h']]], + ['mi_5foption_5freset_5fdelay',['mi_option_reset_delay',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca154fe170131d5212cff57e22b99523c5',1,'mimalloc-doc.h']]], ['mi_5foption_5fsegment_5fcache',['mi_option_segment_cache',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca2ecbe7ef32f5c84de3739aa4f0b805a1',1,'mimalloc-doc.h']]], ['mi_5foption_5fsegment_5freset',['mi_option_segment_reset',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cafb121d30d87591850d5410ccc3a95c6d',1,'mimalloc-doc.h']]], ['mi_5foption_5fshow_5ferrors',['mi_option_show_errors',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884cafbf4822e5c00732c5984b32a032837f0',1,'mimalloc-doc.h']]], ['mi_5foption_5fshow_5fstats',['mi_option_show_stats',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca0957ef73b2550764b4840edf48422fda',1,'mimalloc-doc.h']]], + ['mi_5foption_5fuse_5fnuma_5fnodes',['mi_option_use_numa_nodes',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca0ac33a18f6b659fcfaf44efb0bab1b74',1,'mimalloc-doc.h']]], ['mi_5foption_5fverbose',['mi_option_verbose',['../group__options.html#ggafebf7ed116adb38ae5218bc3ce06884ca7c8b7bf5281c581bad64f5daa6442777',1,'mimalloc-doc.h']]] ]; diff --git a/docs/search/functions_0.js b/docs/search/functions_0.js index c5eeb540..6271797a 100644 --- a/docs/search/functions_0.js +++ b/docs/search/functions_0.js @@ -59,10 +59,13 @@ var searchData= ['mi_5fmalloc_5fusable_5fsize',['mi_malloc_usable_size',['../group__posix.html#ga06d07cf357bbac5c73ba5d0c0c421e17',1,'mimalloc-doc.h']]], ['mi_5fmallocn',['mi_mallocn',['../group__malloc.html#ga0b05e2bf0f73e7401ae08597ff782ac6',1,'mimalloc-doc.h']]], ['mi_5fmemalign',['mi_memalign',['../group__posix.html#gaab7fa71ea93b96873f5d9883db57d40e',1,'mimalloc-doc.h']]], - ['mi_5fnew',['mi_new',['../group__posix.html#gaad048a9fce3d02c5909cd05c6ec24545',1,'mimalloc-doc.h']]], - ['mi_5fnew_5faligned',['mi_new_aligned',['../group__posix.html#gaef2c2bdb4f70857902d3c8903ac095f3',1,'mimalloc-doc.h']]], - ['mi_5fnew_5faligned_5fnothrow',['mi_new_aligned_nothrow',['../group__posix.html#gab5e29558926d934c3f1cae8c815f942c',1,'mimalloc-doc.h']]], - ['mi_5fnew_5fnothrow',['mi_new_nothrow',['../group__posix.html#gaeaded64eda71ed6b1d569d3e723abc4a',1,'mimalloc-doc.h']]], + ['mi_5fnew',['mi_new',['../group__cpp.html#gaad048a9fce3d02c5909cd05c6ec24545',1,'mimalloc-doc.h']]], + ['mi_5fnew_5faligned',['mi_new_aligned',['../group__cpp.html#gaef2c2bdb4f70857902d3c8903ac095f3',1,'mimalloc-doc.h']]], + ['mi_5fnew_5faligned_5fnothrow',['mi_new_aligned_nothrow',['../group__cpp.html#gab5e29558926d934c3f1cae8c815f942c',1,'mimalloc-doc.h']]], + ['mi_5fnew_5fn',['mi_new_n',['../group__cpp.html#gae7bc4f56cd57ed3359060ff4f38bda81',1,'mimalloc-doc.h']]], + ['mi_5fnew_5fnothrow',['mi_new_nothrow',['../group__cpp.html#gaeaded64eda71ed6b1d569d3e723abc4a',1,'mimalloc-doc.h']]], + ['mi_5fnew_5frealloc',['mi_new_realloc',['../group__cpp.html#gaab78a32f55149e9fbf432d5288e38e1e',1,'mimalloc-doc.h']]], + ['mi_5fnew_5freallocn',['mi_new_reallocn',['../group__cpp.html#ga756f4b2bc6a7ecd0a90baea8e90c7907',1,'mimalloc-doc.h']]], ['mi_5foption_5fenable',['mi_option_enable',['../group__options.html#ga6d45a20a3131f18bc351b69763b38ce4',1,'mimalloc-doc.h']]], ['mi_5foption_5fenable_5fdefault',['mi_option_enable_default',['../group__options.html#ga37988264b915a7db92530cc02d5494cb',1,'mimalloc-doc.h']]], ['mi_5foption_5fenabled',['mi_option_enabled',['../group__options.html#gacebe3f6d91b4a50b54eb84e2a1da1b30',1,'mimalloc-doc.h']]], @@ -81,20 +84,22 @@ var searchData= ['mi_5frecalloc',['mi_recalloc',['../group__malloc.html#ga23a0fbb452b5dce8e31fab1a1958cacc',1,'mimalloc-doc.h']]], ['mi_5frecalloc_5faligned',['mi_recalloc_aligned',['../group__zeroinit.html#ga3e7e5c291acf1c7fd7ffd9914a9f945f',1,'mimalloc-doc.h']]], ['mi_5frecalloc_5faligned_5fat',['mi_recalloc_aligned_at',['../group__zeroinit.html#ga4ff5e92ad73585418a072c9d059e5cf9',1,'mimalloc-doc.h']]], - ['mi_5fregister_5fdeferred_5ffree',['mi_register_deferred_free',['../group__extended.html#ga24dc9cc6fca8daa2aa30aa8025467ce2',1,'mimalloc-doc.h']]], - ['mi_5fregister_5foutput',['mi_register_output',['../group__extended.html#ga84a0c8b401e42eb5b1bce156852f44c5',1,'mimalloc-doc.h']]], - ['mi_5freserve_5fhuge_5fos_5fpages',['mi_reserve_huge_os_pages',['../group__extended.html#ga2664f36a2dd557741c429cb799f04641',1,'mimalloc-doc.h']]], + ['mi_5fregister_5fdeferred_5ffree',['mi_register_deferred_free',['../group__extended.html#ga3460a6ca91af97be4058f523d3cb8ece',1,'mimalloc-doc.h']]], + ['mi_5fregister_5ferror',['mi_register_error',['../group__extended.html#gaa1d55e0e894be240827e5d87ec3a1f45',1,'mimalloc-doc.h']]], + ['mi_5fregister_5foutput',['mi_register_output',['../group__extended.html#gae5b17ff027cd2150b43a33040250cf3f',1,'mimalloc-doc.h']]], + ['mi_5freserve_5fhuge_5fos_5fpages_5fat',['mi_reserve_huge_os_pages_at',['../group__extended.html#ga7795a13d20087447281858d2c771cca1',1,'mimalloc-doc.h']]], + ['mi_5freserve_5fhuge_5fos_5fpages_5finterleave',['mi_reserve_huge_os_pages_interleave',['../group__extended.html#ga3132f521fb756fc0e8ec0b74fb58df50',1,'mimalloc-doc.h']]], ['mi_5frezalloc',['mi_rezalloc',['../group__zeroinit.html#ga8c292e142110229a2980b37ab036dbc6',1,'mimalloc-doc.h']]], ['mi_5frezalloc_5faligned',['mi_rezalloc_aligned',['../group__zeroinit.html#gacd71a7bce96aab38ae6de17af2eb2cf0',1,'mimalloc-doc.h']]], ['mi_5frezalloc_5faligned_5fat',['mi_rezalloc_aligned_at',['../group__zeroinit.html#gae8b358c417e61d5307da002702b0a8e1',1,'mimalloc-doc.h']]], ['mi_5fstats_5fmerge',['mi_stats_merge',['../group__extended.html#ga854b1de8cb067c7316286c28b2fcd3d1',1,'mimalloc-doc.h']]], - ['mi_5fstats_5fprint',['mi_stats_print',['../group__extended.html#ga8ca07ccff283956d71f48272f4fd5c01',1,'mimalloc-doc.h']]], + ['mi_5fstats_5fprint',['mi_stats_print',['../group__extended.html#ga2d126e5c62d3badc35445e5d84166df2',1,'mi_stats_print(void *out): mimalloc-doc.h'],['../group__extended.html#ga256cc6f13a142deabbadd954a217e228',1,'mi_stats_print(mi_output_fun *out, void *arg): mimalloc-doc.h']]], ['mi_5fstats_5freset',['mi_stats_reset',['../group__extended.html#ga3bb8468b8cfcc6e2a61d98aee85c5f99',1,'mimalloc-doc.h']]], ['mi_5fstrdup',['mi_strdup',['../group__malloc.html#gac7cffe13f1f458ed16789488bf92b9b2',1,'mimalloc-doc.h']]], ['mi_5fstrndup',['mi_strndup',['../group__malloc.html#gaaabf971c2571891433477e2d21a35266',1,'mimalloc-doc.h']]], ['mi_5fthread_5fdone',['mi_thread_done',['../group__extended.html#ga0ae4581e85453456a0d658b2b98bf7bf',1,'mimalloc-doc.h']]], ['mi_5fthread_5finit',['mi_thread_init',['../group__extended.html#gaf8e73efc2cbca9ebfdfb166983a04c17',1,'mimalloc-doc.h']]], - ['mi_5fthread_5fstats_5fprint',['mi_thread_stats_print',['../group__extended.html#ga489670a15d1a257ab4639e645ee4612a',1,'mimalloc-doc.h']]], + ['mi_5fthread_5fstats_5fprint_5fout',['mi_thread_stats_print_out',['../group__extended.html#gab1dac8476c46cb9eecab767eb40c1525',1,'mimalloc-doc.h']]], ['mi_5fusable_5fsize',['mi_usable_size',['../group__extended.html#ga089c859d9eddc5f9b4bd946cd53cebee',1,'mimalloc-doc.h']]], ['mi_5fvalloc',['mi_valloc',['../group__posix.html#ga73baaf5951f5165ba0763d0c06b6a93b',1,'mimalloc-doc.h']]], ['mi_5fzalloc',['mi_zalloc',['../group__malloc.html#gafdd9d8bb2986e668ba9884f28af38000',1,'mimalloc-doc.h']]], diff --git a/docs/search/functions_1.html b/docs/search/functions_1.html new file mode 100644 index 00000000..bfcf880b --- /dev/null +++ b/docs/search/functions_1.html @@ -0,0 +1,30 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/search/functions_1.js b/docs/search/functions_1.js new file mode 100644 index 00000000..06dbb19b --- /dev/null +++ b/docs/search/functions_1.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['void',['void',['../group__extended.html#gadc49452cc1634aa03ac83ffe9b97a19c',1,'mimalloc-doc.h']]] +]; diff --git a/docs/search/groups_2.js b/docs/search/groups_2.js index 68c73dbe..29185761 100644 --- a/docs/search/groups_2.js +++ b/docs/search/groups_2.js @@ -1,4 +1,4 @@ var searchData= [ - ['extended_20functions',['Extended Functions',['../group__extended.html',1,'']]] + ['c_2b_2b_20wrappers',['C++ wrappers',['../group__cpp.html',1,'']]] ]; diff --git a/docs/search/groups_3.js b/docs/search/groups_3.js index e7e40934..68c73dbe 100644 --- a/docs/search/groups_3.js +++ b/docs/search/groups_3.js @@ -1,5 +1,4 @@ var searchData= [ - ['heap_20introspection',['Heap Introspection',['../group__analysis.html',1,'']]], - ['heap_20allocation',['Heap Allocation',['../group__heap.html',1,'']]] + ['extended_20functions',['Extended Functions',['../group__extended.html',1,'']]] ]; diff --git a/docs/search/groups_4.js b/docs/search/groups_4.js index 4f005682..e7e40934 100644 --- a/docs/search/groups_4.js +++ b/docs/search/groups_4.js @@ -1,4 +1,5 @@ var searchData= [ - ['posix',['Posix',['../group__posix.html',1,'']]] + ['heap_20introspection',['Heap Introspection',['../group__analysis.html',1,'']]], + ['heap_20allocation',['Heap Allocation',['../group__heap.html',1,'']]] ]; diff --git a/docs/search/groups_5.js b/docs/search/groups_5.js index 2533cb94..4f005682 100644 --- a/docs/search/groups_5.js +++ b/docs/search/groups_5.js @@ -1,4 +1,4 @@ var searchData= [ - ['runtime_20options',['Runtime Options',['../group__options.html',1,'']]] + ['posix',['Posix',['../group__posix.html',1,'']]] ]; diff --git a/docs/search/groups_6.js b/docs/search/groups_6.js index 647887f5..2533cb94 100644 --- a/docs/search/groups_6.js +++ b/docs/search/groups_6.js @@ -1,4 +1,4 @@ var searchData= [ - ['typed_20macros',['Typed Macros',['../group__typed.html',1,'']]] + ['runtime_20options',['Runtime Options',['../group__options.html',1,'']]] ]; diff --git a/docs/search/groups_7.html b/docs/search/groups_7.html new file mode 100644 index 00000000..6a24e7cf --- /dev/null +++ b/docs/search/groups_7.html @@ -0,0 +1,30 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/search/groups_7.js b/docs/search/groups_7.js new file mode 100644 index 00000000..647887f5 --- /dev/null +++ b/docs/search/groups_7.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['typed_20macros',['Typed Macros',['../group__typed.html',1,'']]] +]; diff --git a/docs/search/groups_8.html b/docs/search/groups_8.html new file mode 100644 index 00000000..81ac9508 --- /dev/null +++ b/docs/search/groups_8.html @@ -0,0 +1,30 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/search/groups_8.js b/docs/search/groups_8.js new file mode 100644 index 00000000..2b9b4cea --- /dev/null +++ b/docs/search/groups_8.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['zero_20initialized_20re_2dallocation',['Zero initialized re-allocation',['../group__zeroinit.html',1,'']]] +]; diff --git a/docs/search/pages_4.html b/docs/search/pages_4.html new file mode 100644 index 00000000..021d277a --- /dev/null +++ b/docs/search/pages_4.html @@ -0,0 +1,30 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/search/pages_4.js b/docs/search/pages_4.js new file mode 100644 index 00000000..b47682a4 --- /dev/null +++ b/docs/search/pages_4.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['using_20the_20library',['Using the library',['../using.html',1,'']]] +]; diff --git a/docs/search/searchdata.js b/docs/search/searchdata.js index 919719e9..dd31068e 100644 --- a/docs/search/searchdata.js +++ b/docs/search/searchdata.js @@ -7,7 +7,7 @@ var indexSectionsWithContent = 4: "m", 5: "m", 6: "_m", - 7: "abehprtz", + 7: "abcehprtz", 8: "beopu" }; diff --git a/docs/search/typedefs_0.js b/docs/search/typedefs_0.js index c6f0f7ec..44a0a6c6 100644 --- a/docs/search/typedefs_0.js +++ b/docs/search/typedefs_0.js @@ -1,7 +1,8 @@ var searchData= [ ['mi_5fblock_5fvisit_5ffun',['mi_block_visit_fun',['../group__analysis.html#gadfa01e2900f0e5d515ad5506b26f6d65',1,'mimalloc-doc.h']]], - ['mi_5fdeferred_5ffree_5ffun',['mi_deferred_free_fun',['../group__extended.html#ga22213691c3ce5ab4d91b24aff1023529',1,'mimalloc-doc.h']]], + ['mi_5fdeferred_5ffree_5ffun',['mi_deferred_free_fun',['../group__extended.html#ga299dae78d25ce112e384a98b7309c5be',1,'mimalloc-doc.h']]], + ['mi_5ferror_5ffun',['mi_error_fun',['../group__extended.html#ga251d369cda3f1c2a955c555486ed90e5',1,'mimalloc-doc.h']]], ['mi_5fheap_5ft',['mi_heap_t',['../group__heap.html#ga34a47cde5a5b38c29f1aa3c5e76943c2',1,'mimalloc-doc.h']]], - ['mi_5foutput_5ffun',['mi_output_fun',['../group__extended.html#ga2bed6d40b74591a67f81daea4b4a246f',1,'mimalloc-doc.h']]] + ['mi_5foutput_5ffun',['mi_output_fun',['../group__extended.html#gad823d23444a4b77a40f66bf075a98a0c',1,'mimalloc-doc.h']]] ]; diff --git a/docs/search/typedefs_1.html b/docs/search/typedefs_1.html new file mode 100644 index 00000000..c8a02685 --- /dev/null +++ b/docs/search/typedefs_1.html @@ -0,0 +1,30 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/search/typedefs_1.js b/docs/search/typedefs_1.js new file mode 100644 index 00000000..ecccb16a --- /dev/null +++ b/docs/search/typedefs_1.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['heartbeat',['heartbeat',['../group__extended.html#ga411f6e94394a2400aa460c796beff8d8',1,'mimalloc-doc.h']]] +]; diff --git a/docs/search/typedefs_2.html b/docs/search/typedefs_2.html new file mode 100644 index 00000000..86a91955 --- /dev/null +++ b/docs/search/typedefs_2.html @@ -0,0 +1,30 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/search/typedefs_2.js b/docs/search/typedefs_2.js new file mode 100644 index 00000000..2af06079 --- /dev/null +++ b/docs/search/typedefs_2.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['mi_5fblock_5fvisit_5ffun',['mi_block_visit_fun',['../group__analysis.html#gadfa01e2900f0e5d515ad5506b26f6d65',1,'mimalloc-doc.h']]], + ['mi_5fheap_5ft',['mi_heap_t',['../group__heap.html#ga34a47cde5a5b38c29f1aa3c5e76943c2',1,'mimalloc-doc.h']]] +]; diff --git a/docs/using.html b/docs/using.html index 9b7305b0..c5dc12e7 100644 --- a/docs/using.html +++ b/docs/using.html @@ -37,7 +37,7 @@ Logo
mi-malloc -  1.0 +  1.4
@@ -102,7 +102,11 @@ $(document).ready(function(){initNavTree('using.html','');});
Using the library
-

The preferred usage is including <mimalloc.h>, linking with the shared- or static library, and using the mi_malloc API exclusively for allocation. For example,

gcc -o myprogram -lmimalloc myfile.c

mimalloc uses only safe OS calls (mmap and VirtualAlloc) and can co-exist with other allocators linked to the same program. If you use cmake, you can simply use:

find_package(mimalloc 1.0 REQUIRED)

in your CMakeLists.txt to find a locally installed mimalloc. Then use either:

target_link_libraries(myapp PUBLIC mimalloc)

to link with the shared (dynamic) library, or:

target_link_libraries(myapp PUBLIC mimalloc-static)

to link with the static library. See test\CMakeLists.txt for an example.

+

Build

+

The preferred usage is including <mimalloc.h>, linking with the shared- or static library, and using the mi_malloc API exclusively for allocation. For example,

gcc -o myprogram -lmimalloc myfile.c

mimalloc uses only safe OS calls (mmap and VirtualAlloc) and can co-exist with other allocators linked to the same program. If you use cmake, you can simply use:

find_package(mimalloc 1.0 REQUIRED)

in your CMakeLists.txt to find a locally installed mimalloc. Then use either:

target_link_libraries(myapp PUBLIC mimalloc)

to link with the shared (dynamic) library, or:

target_link_libraries(myapp PUBLIC mimalloc-static)

to link with the static library. See test\CMakeLists.txt for an example.

+

C++

+

For best performance in C++ programs, it is also recommended to override the global new and delete operators. For convience, mimalloc provides mimalloc-new-delete.h which does this for you – just include it in a single(!) source file in your project.

+

In C++, mimalloc also provides the mi_stl_allocator struct which implements the std::allocator interface. For example:

std::vector<some_struct, mi_stl_allocator<some_struct>> vec;
vec.push_back(some_struct());

Statistics

You can pass environment variables to print verbose messages (MIMALLOC_VERBOSE=1) and statistics (MIMALLOC_SHOW_STATS=1) (in the debug version):

> env MIMALLOC_SHOW_STATS=1 ./cfrac 175451865205073170563711388363
175451865205073170563711388363 = 374456281610909315237213 * 468551
heap stats: peak total freed unit
normal 2: 16.4 kb 17.5 mb 17.5 mb 16 b ok
normal 3: 16.3 kb 15.2 mb 15.2 mb 24 b ok
normal 4: 64 b 4.6 kb 4.6 kb 32 b ok
normal 5: 80 b 118.4 kb 118.4 kb 40 b ok
normal 6: 48 b 48 b 48 b 48 b ok
normal 17: 960 b 960 b 960 b 320 b ok
heap stats: peak total freed unit
normal: 33.9 kb 32.8 mb 32.8 mb 1 b ok
huge: 0 b 0 b 0 b 1 b ok
total: 33.9 kb 32.8 mb 32.8 mb 1 b ok
malloc requested: 32.8 mb
committed: 58.2 kb 58.2 kb 58.2 kb 1 b ok
reserved: 2.0 mb 2.0 mb 2.0 mb 1 b ok
reset: 0 b 0 b 0 b 1 b ok
segments: 1 1 1
-abandoned: 0
pages: 6 6 6
-abandoned: 0
mmaps: 3
mmap fast: 0
mmap slow: 1
threads: 0
elapsed: 2.022s
process: user: 1.781s, system: 0.016s, faults: 756, reclaims: 0, rss: 2.7 mb

The above model of using the mi_ prefixed API is not always possible though in existing programs that already use the standard malloc interface, and another option is to override the standard malloc interface completely and redirect all calls to the mimalloc library instead.

See Overriding Malloc for more info.

diff --git a/ide/vs2017/mimalloc-override-test.vcxproj b/ide/vs2017/mimalloc-override-test.vcxproj index b8e2648b..faaa00e3 100644 --- a/ide/vs2017/mimalloc-override-test.vcxproj +++ b/ide/vs2017/mimalloc-override-test.vcxproj @@ -112,7 +112,7 @@ true ..\..\include MultiThreadedDebugDLL - false + Sync Default false diff --git a/ide/vs2017/mimalloc-override.vcxproj b/ide/vs2017/mimalloc-override.vcxproj index 511c0fab..26c8080b 100644 --- a/ide/vs2017/mimalloc-override.vcxproj +++ b/ide/vs2017/mimalloc-override.vcxproj @@ -1,4 +1,4 @@ - + @@ -129,7 +129,7 @@ Default - $(ProjectDir)\..\..\bin\mimalloc-redirect.lib;%(AdditionalDependencies) + $(ProjectDir)\..\..\bin\mimalloc-redirect.lib;bcrypt.lib;%(AdditionalDependencies) @@ -195,7 +195,7 @@ true true - $(ProjectDir)\..\..\bin\mimalloc-redirect.lib;%(AdditionalDependencies) + $(ProjectDir)\..\..\bin\mimalloc-redirect.lib;bcrypt.lib;%(AdditionalDependencies) Default @@ -231,9 +231,10 @@ + - + @@ -243,10 +244,11 @@ true + - \ No newline at end of file + diff --git a/ide/vs2017/mimalloc-override.vcxproj.filters b/ide/vs2017/mimalloc-override.vcxproj.filters index 6ac0c0b5..02652658 100644 --- a/ide/vs2017/mimalloc-override.vcxproj.filters +++ b/ide/vs2017/mimalloc-override.vcxproj.filters @@ -61,7 +61,7 @@ Source Files - + Source Files @@ -70,5 +70,11 @@ Source Files + + Source Files + + + Source Files + - \ No newline at end of file + diff --git a/ide/vs2017/mimalloc.vcxproj b/ide/vs2017/mimalloc.vcxproj index 6147c349..9d6af0e5 100644 --- a/ide/vs2017/mimalloc.vcxproj +++ b/ide/vs2017/mimalloc.vcxproj @@ -1,4 +1,4 @@ - + @@ -111,7 +111,7 @@ - Level3 + Level4 Disabled true true @@ -165,7 +165,7 @@ - Level3 + Level4 MaxSpeed true true @@ -217,9 +217,10 @@ + - + true @@ -228,6 +229,7 @@ true + @@ -243,4 +245,4 @@ - \ No newline at end of file + diff --git a/ide/vs2017/mimalloc.vcxproj.filters b/ide/vs2017/mimalloc.vcxproj.filters index a2b64314..43660519 100644 --- a/ide/vs2017/mimalloc.vcxproj.filters +++ b/ide/vs2017/mimalloc.vcxproj.filters @@ -47,12 +47,18 @@ Source Files - + Source Files Source Files + + Source Files + + + Source Files + @@ -74,4 +80,4 @@ Header Files - \ No newline at end of file + diff --git a/ide/vs2019/mimalloc-override-test.vcxproj b/ide/vs2019/mimalloc-override-test.vcxproj index 79adedb0..a2497a19 100644 --- a/ide/vs2019/mimalloc-override-test.vcxproj +++ b/ide/vs2019/mimalloc-override-test.vcxproj @@ -90,7 +90,7 @@ true ..\..\include MultiThreadedDebugDLL - false + Sync Default false @@ -112,7 +112,7 @@ true ..\..\include MultiThreadedDebugDLL - false + Sync Default false diff --git a/ide/vs2019/mimalloc-override.vcxproj b/ide/vs2019/mimalloc-override.vcxproj index 96a8924f..17b6f4c0 100644 --- a/ide/vs2019/mimalloc-override.vcxproj +++ b/ide/vs2019/mimalloc-override.vcxproj @@ -1,4 +1,4 @@ - + @@ -123,7 +123,7 @@ true true ../../include - MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions); + MI_DEBUG=3;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions); MultiThreadedDebugDLL false Default @@ -231,9 +231,13 @@ + + + true + - + @@ -243,10 +247,11 @@ true + - \ No newline at end of file + diff --git a/ide/vs2019/mimalloc-override.vcxproj.filters b/ide/vs2019/mimalloc-override.vcxproj.filters index bc1e4c60..83d6f7fe 100644 --- a/ide/vs2019/mimalloc-override.vcxproj.filters +++ b/ide/vs2019/mimalloc-override.vcxproj.filters @@ -22,7 +22,7 @@ Source Files - + Source Files @@ -40,6 +40,15 @@ Source Files + + Source Files + + + Source Files + + + Source Files + @@ -69,4 +78,4 @@ {39cb7e38-69d0-43fb-8406-6a0f7cefc3b4} - \ No newline at end of file + diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index 28e96d71..a1372204 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -1,4 +1,4 @@ - + @@ -100,7 +100,7 @@ MI_DEBUG=3;%(PreprocessorDefinitions); CompileAsCpp false - stdcpp17 + Default @@ -111,15 +111,15 @@ - Level2 + Level4 Disabled true true ../../include - MI_DEBUG=1;%(PreprocessorDefinitions); + MI_DEBUG=3;%(PreprocessorDefinitions); CompileAsCpp false - stdcpp17 + Default @@ -217,9 +217,13 @@ + + + true + - + true @@ -228,6 +232,7 @@ true + @@ -243,4 +248,4 @@ - \ No newline at end of file + diff --git a/ide/vs2019/mimalloc.vcxproj.filters b/ide/vs2019/mimalloc.vcxproj.filters index b2282df3..4704fb2e 100644 --- a/ide/vs2019/mimalloc.vcxproj.filters +++ b/ide/vs2019/mimalloc.vcxproj.filters @@ -22,7 +22,7 @@ Source Files - + Source Files @@ -43,6 +43,15 @@ Source Files + + Source Files + + + Source Files + + + Source Files + @@ -72,4 +81,4 @@ {852a14ae-6dde-4e95-8077-ca705e97e5af} - \ No newline at end of file + diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h index 10368df3..8577dbc5 100644 --- a/include/mimalloc-atomic.h +++ b/include/mimalloc-atomic.h @@ -9,7 +9,7 @@ terms of the MIT license. A copy of the license can be found in the file #define MIMALLOC_ATOMIC_H // ------------------------------------------------------ -// Atomics +// Atomics // We need to be portable between C, C++, and MSVC. // ------------------------------------------------------ @@ -23,20 +23,24 @@ terms of the MIT license. A copy of the license can be found in the file #include #endif -#define mi_atomic_cast(tp,x) (volatile _Atomic(tp)*)(x) - // ------------------------------------------------------ // Atomic operations specialized for mimalloc // ------------------------------------------------------ -// Atomically add a 64-bit value; returns the previous value. +// Atomically add a 64-bit value; returns the previous value. // Note: not using _Atomic(int64_t) as it is only used for statistics. -static inline void mi_atomic_add64(volatile int64_t* p, int64_t add); +static inline void mi_atomic_addi64(volatile int64_t* p, int64_t add); // Atomically add a value; returns the previous value. Memory ordering is relaxed. -static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add); +static inline uintptr_t mi_atomic_add(volatile _Atomic(uintptr_t)* p, uintptr_t add); -// Atomically compare and exchange a value; returns `true` if successful. +// Atomically "and" a value; returns the previous value. Memory ordering is relaxed. +static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x); + +// Atomically "or" a value; returns the previous value. Memory ordering is relaxed. +static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x); + +// Atomically compare and exchange a value; returns `true` if successful. // May fail spuriously. Memory ordering as release on success, and relaxed on failure. // (Note: expected and desired are in opposite order from atomic_compare_exchange) static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected); @@ -62,57 +66,57 @@ static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x); static inline void mi_atomic_yield(void); - -// Atomically add a value; returns the previous value. -static inline uintptr_t mi_atomic_addu(volatile _Atomic(uintptr_t)* p, uintptr_t add) { - return (uintptr_t)mi_atomic_add((volatile _Atomic(intptr_t)*)p, (intptr_t)add); -} // Atomically subtract a value; returns the previous value. -static inline uintptr_t mi_atomic_subu(volatile _Atomic(uintptr_t)* p, uintptr_t sub) { - return (uintptr_t)mi_atomic_add((volatile _Atomic(intptr_t)*)p, -((intptr_t)sub)); +static inline uintptr_t mi_atomic_sub(volatile _Atomic(uintptr_t)* p, uintptr_t sub) { + return mi_atomic_add(p, (uintptr_t)(-((intptr_t)sub))); } // Atomically increment a value; returns the incremented result. static inline uintptr_t mi_atomic_increment(volatile _Atomic(uintptr_t)* p) { - return mi_atomic_addu(p, 1); + return mi_atomic_add(p, 1); } // Atomically decrement a value; returns the decremented result. static inline uintptr_t mi_atomic_decrement(volatile _Atomic(uintptr_t)* p) { - return mi_atomic_subu(p, 1); + return mi_atomic_sub(p, 1); } -// Atomically read a pointer; Memory order is relaxed. -static inline void* mi_atomic_read_ptr_relaxed(volatile _Atomic(void*) const * p) { - return (void*)mi_atomic_read_relaxed((const volatile _Atomic(uintptr_t)*)p); +// Atomically add a signed value; returns the previous value. +static inline intptr_t mi_atomic_addi(volatile _Atomic(intptr_t)* p, intptr_t add) { + return (intptr_t)mi_atomic_add((volatile _Atomic(uintptr_t)*)p, (uintptr_t)add); } +// Atomically subtract a signed value; returns the previous value. +static inline intptr_t mi_atomic_subi(volatile _Atomic(intptr_t)* p, intptr_t sub) { + return (intptr_t)mi_atomic_addi(p,-sub); +} + +// Atomically read a pointer; Memory order is relaxed (i.e. no fence, only atomic). +#define mi_atomic_read_ptr_relaxed(T,p) \ + (T*)(mi_atomic_read_relaxed((const volatile _Atomic(uintptr_t)*)(p))) + // Atomically read a pointer; Memory order is acquire. -static inline void* mi_atomic_read_ptr(volatile _Atomic(void*) const * p) { - return (void*)mi_atomic_read((const volatile _Atomic(uintptr_t)*)p); -} +#define mi_atomic_read_ptr(T,p) \ + (T*)(mi_atomic_read((const volatile _Atomic(uintptr_t)*)(p))) -// Atomically write a pointer -static inline void mi_atomic_write_ptr(volatile _Atomic(void*)* p, void* x) { - mi_atomic_write((volatile _Atomic(uintptr_t)*)p, (uintptr_t)x ); -} +// Atomically write a pointer; Memory order is acquire. +#define mi_atomic_write_ptr(T,p,x) \ + mi_atomic_write((volatile _Atomic(uintptr_t)*)(p), (uintptr_t)((T*)x)) // Atomically compare and exchange a pointer; returns `true` if successful. May fail spuriously. +// Memory order is release. (like a write) // (Note: expected and desired are in opposite order from atomic_compare_exchange) -static inline bool mi_atomic_cas_ptr_weak(volatile _Atomic(void*)* p, void* desired, void* expected) { - return mi_atomic_cas_weak((volatile _Atomic(uintptr_t)*)p, (uintptr_t)desired, (uintptr_t)expected); -} - -// Atomically compare and exchange a pointer; returns `true` if successful. +#define mi_atomic_cas_ptr_weak(T,p,desired,expected) \ + mi_atomic_cas_weak((volatile _Atomic(uintptr_t)*)(p), (uintptr_t)((T*)(desired)), (uintptr_t)((T*)(expected))) + +// Atomically compare and exchange a pointer; returns `true` if successful. Memory order is acquire_release. // (Note: expected and desired are in opposite order from atomic_compare_exchange) -static inline bool mi_atomic_cas_ptr_strong(volatile _Atomic(void*)* p, void* desired, void* expected) { - return mi_atomic_cas_strong((volatile _Atomic(uintptr_t)*)p, (uintptr_t)desired, (uintptr_t)expected); -} +#define mi_atomic_cas_ptr_strong(T,p,desired,expected) \ + mi_atomic_cas_strong((volatile _Atomic(uintptr_t)*)(p),(uintptr_t)((T*)(desired)), (uintptr_t)((T*)(expected))) // Atomically exchange a pointer value. -static inline void* mi_atomic_exchange_ptr(volatile _Atomic(void*)* p, void* exchange) { - return (void*)mi_atomic_exchange((volatile _Atomic(uintptr_t)*)p, (uintptr_t)exchange); -} +#define mi_atomic_exchange_ptr(T,p,exchange) \ + (T*)mi_atomic_exchange((volatile _Atomic(uintptr_t)*)(p), (uintptr_t)((T*)exchange)) #ifdef _MSC_VER @@ -121,38 +125,48 @@ static inline void* mi_atomic_exchange_ptr(volatile _Atomic(void*)* p, void* exc #include #ifdef _WIN64 typedef LONG64 msc_intptr_t; -#define RC64(f) f##64 +#define MI_64(f) f##64 #else typedef LONG msc_intptr_t; -#define RC64(f) f +#define MI_64(f) f #endif -static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add) { - return (intptr_t)RC64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add); +static inline uintptr_t mi_atomic_add(volatile _Atomic(uintptr_t)* p, uintptr_t add) { + return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add); +} +static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x) { + return (uintptr_t)MI_64(_InterlockedAnd)((volatile msc_intptr_t*)p, (msc_intptr_t)x); +} +static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x) { + return (uintptr_t)MI_64(_InterlockedOr)((volatile msc_intptr_t*)p, (msc_intptr_t)x); } static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) { - return (expected == (uintptr_t)RC64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)expected)); + return (expected == (uintptr_t)MI_64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)expected)); } static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) { return mi_atomic_cas_strong(p,desired,expected); } static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange) { - return (uintptr_t)RC64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange); + return (uintptr_t)MI_64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange); } static inline uintptr_t mi_atomic_read(volatile _Atomic(uintptr_t) const* p) { return *p; } static inline uintptr_t mi_atomic_read_relaxed(volatile _Atomic(uintptr_t) const* p) { - return mi_atomic_read(p); + return *p; } static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x) { + #if defined(_M_IX86) || defined(_M_X64) + *p = x; + #else mi_atomic_exchange(p,x); + #endif } static inline void mi_atomic_yield(void) { YieldProcessor(); } -static inline void mi_atomic_add64(volatile _Atomic(int64_t)* p, int64_t add) { +static inline void mi_atomic_addi64(volatile _Atomic(int64_t)* p, int64_t add) { #ifdef _WIN64 - mi_atomic_add(p,add); + mi_atomic_addi(p,add); #else int64_t current; int64_t sum; @@ -169,14 +183,22 @@ static inline void mi_atomic_add64(volatile _Atomic(int64_t)* p, int64_t add) { #else #define MI_USING_STD #endif -static inline void mi_atomic_add64(volatile int64_t* p, int64_t add) { +static inline void mi_atomic_addi64(volatile int64_t* p, int64_t add) { MI_USING_STD atomic_fetch_add_explicit((volatile _Atomic(int64_t)*)p, add, memory_order_relaxed); } -static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add) { +static inline uintptr_t mi_atomic_add(volatile _Atomic(uintptr_t)* p, uintptr_t add) { MI_USING_STD return atomic_fetch_add_explicit(p, add, memory_order_relaxed); } +static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x) { + MI_USING_STD + return atomic_fetch_and_explicit(p, x, memory_order_relaxed); +} +static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x) { + MI_USING_STD + return atomic_fetch_or_explicit(p, x, memory_order_relaxed); +} static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) { MI_USING_STD return atomic_compare_exchange_weak_explicit(p, &expected, desired, memory_order_release, memory_order_relaxed); diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 452f0b68..c7d7a1da 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -10,44 +10,53 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc-types.h" -#if defined(MI_MALLOC_OVERRIDE) && (defined(__APPLE__) || defined(__OpenBSD__)) +#if defined(MI_MALLOC_OVERRIDE) && (defined(__APPLE__) || defined(__OpenBSD__) || defined(__DragonFly__)) #define MI_TLS_RECURSE_GUARD #endif #if (MI_DEBUG>0) #define mi_trace_message(...) _mi_trace_message(__VA_ARGS__) #else -#define mi_trace_message(...) +#define mi_trace_message(...) #endif +#define MI_CACHE_LINE 64 #if defined(_MSC_VER) -#define mi_decl_noinline __declspec(noinline) -#define mi_attr_noreturn -#elif defined(__GNUC__) || defined(__clang__) -#define mi_decl_noinline __attribute__((noinline)) -#define mi_attr_noreturn __attribute__((noreturn)) +#pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths) +#define mi_decl_noinline __declspec(noinline) +#define mi_decl_thread __declspec(thread) +#define mi_decl_cache_align __declspec(align(MI_CACHE_LINE)) +#elif (defined(__GNUC__) && (__GNUC__>=3)) // includes clang and icc +#define mi_decl_noinline __attribute__((noinline)) +#define mi_decl_thread __thread +#define mi_decl_cache_align __attribute__((aligned(MI_CACHE_LINE))) #else #define mi_decl_noinline -#define mi_attr_noreturn +#define mi_decl_thread __thread // hope for the best :-) +#define mi_decl_cache_align #endif // "options.c" -void _mi_fputs(mi_output_fun* out, const char* prefix, const char* message); -void _mi_fprintf(mi_output_fun* out, const char* fmt, ...); -void _mi_error_message(const char* fmt, ...); +void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message); +void _mi_fprintf(mi_output_fun* out, void* arg, const char* fmt, ...); void _mi_warning_message(const char* fmt, ...); void _mi_verbose_message(const char* fmt, ...); void _mi_trace_message(const char* fmt, ...); void _mi_options_init(void); -void _mi_fatal_error(const char* fmt, ...) mi_attr_noreturn; +void _mi_error_message(int err, const char* fmt, ...); -// "init.c" +// random.c +void _mi_random_init(mi_random_ctx_t* ctx); +void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx); +uintptr_t _mi_random_next(mi_random_ctx_t* ctx); +uintptr_t _mi_heap_random_next(mi_heap_t* heap); +static inline uintptr_t _mi_random_shuffle(uintptr_t x); + +// init.c extern mi_stats_t _mi_stats_main; extern const mi_page_t _mi_page_empty; bool _mi_is_main_thread(void); -uintptr_t _mi_random_shuffle(uintptr_t x); -uintptr_t _mi_random_init(uintptr_t seed /* can be zero */); bool _mi_preloading(); // true while the C runtime is not ready // os.c @@ -59,23 +68,24 @@ size_t _mi_os_good_alloc_size(size_t size); // memory.c void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* id, mi_os_tld_t* tld); -void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats); +void _mi_mem_free(void* p, size_t size, size_t id, bool fully_committed, bool any_reset, mi_os_tld_t* tld); -bool _mi_mem_reset(void* p, size_t size, mi_stats_t* stats); -bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); -bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); +bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld); +bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld); +bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld); bool _mi_mem_protect(void* addr, size_t size); bool _mi_mem_unprotect(void* addr, size_t size); -void _mi_mem_collect(mi_stats_t* stats); +void _mi_mem_collect(mi_os_tld_t* tld); // "segment.c" -mi_page_t* _mi_segment_page_alloc(size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); +mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld); void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld); -bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld); +uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size, size_t* pre_size); // page start for any page void _mi_segment_thread_collect(mi_segments_tld_t* tld); -uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size); // page start for any page +void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld); +void _mi_abandoned_await_readers(void); // "page.c" void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc; @@ -85,8 +95,9 @@ void _mi_page_unfull(mi_page_t* page); void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force); // free the page void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq); // abandon the page, to be picked up by another thread... void _mi_heap_delayed_free(mi_heap_t* heap); +void _mi_heap_collect_retired(mi_heap_t* heap, bool force); -void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay); +void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never); size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append); void _mi_deferred_free(mi_heap_t* heap, bool force); @@ -100,13 +111,14 @@ uint8_t _mi_bsr(uintptr_t x); // bit-scan-right, used on BSD i // "heap.c" void _mi_heap_destroy_pages(mi_heap_t* heap); void _mi_heap_collect_abandon(mi_heap_t* heap); -uintptr_t _mi_heap_random(mi_heap_t* heap); void _mi_heap_set_default_direct(mi_heap_t* heap); // "stats.c" void _mi_stats_done(mi_stats_t* stats); -double _mi_clock_end(double start); -double _mi_clock_start(void); + +mi_msecs_t _mi_clock_now(void); +mi_msecs_t _mi_clock_end(mi_msecs_t start); +mi_msecs_t _mi_clock_start(void); // "alloc.c" void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept; // called from `_mi_malloc_generic` @@ -138,13 +150,36 @@ bool _mi_page_is_valid(mi_page_t* page); #endif +/* ----------------------------------------------------------- + Error codes passed to `_mi_fatal_error` + All are recoverable but EFAULT is a serious error and aborts by default in secure mode. + For portability define undefined error codes using common Unix codes: + +----------------------------------------------------------- */ +#include +#ifndef EAGAIN // double free +#define EAGAIN (11) +#endif +#ifndef ENOMEM // out of memory +#define ENOMEM (12) +#endif +#ifndef EFAULT // corrupted free-list or meta-data +#define EFAULT (14) +#endif +#ifndef EINVAL // trying to free an invalid pointer +#define EINVAL (22) +#endif +#ifndef EOVERFLOW // count*size overflow +#define EOVERFLOW (75) +#endif + /* ----------------------------------------------------------- Inlined definitions ----------------------------------------------------------- */ #define UNUSED(x) (void)(x) -#if (MI_DEBUG>0) -#define UNUSED_RELEASE(x) +#if (MI_DEBUG>0) +#define UNUSED_RELEASE(x) #else #define UNUSED_RELEASE(x) UNUSED(x) #endif @@ -158,25 +193,6 @@ bool _mi_page_is_valid(mi_page_t* page); #define MI_INIT256(x) MI_INIT128(x),MI_INIT128(x) -// Overflow detecting multiply -#define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX) -static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { -#if __has_builtin(__builtin_umul_overflow) || __GNUC__ >= 5 -#include // UINT_MAX, ULONG_MAX -#if (SIZE_MAX == UINT_MAX) - return __builtin_umul_overflow(count, size, total); -#elif (SIZE_MAX == ULONG_MAX) - return __builtin_umull_overflow(count, size, total); -#else - return __builtin_umulll_overflow(count, size, total); -#endif -#else /* __builtin_umul_overflow is unavailable */ - *total = count * size; - return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW) - && size > 0 && (SIZE_MAX / size) < count); -#endif -} - // Is `x` a power of two? (0 is considered a power of two) static inline bool _mi_is_power_of_two(uintptr_t x) { return ((x & (x - 1)) == 0); @@ -184,6 +200,7 @@ static inline bool _mi_is_power_of_two(uintptr_t x) { // Align upwards static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) { + mi_assert_internal(alignment != 0); uintptr_t mask = alignment - 1; if ((alignment & mask) == 0) { // power of two? return ((sz + mask) & ~mask); @@ -193,6 +210,12 @@ static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) { } } +// Divide upwards: `s <= _mi_divide_up(s,d)*d < s+d`. +static inline uintptr_t _mi_divide_up(uintptr_t size, size_t divider) { + mi_assert_internal(divider != 0); + return (divider == 0 ? size : ((size + divider - 1) / divider)); +} + // Is memory zero initialized? static inline bool mi_mem_is_zero(void* p, size_t size) { for (size_t i = 0; i < size; i++) { @@ -209,6 +232,40 @@ static inline size_t _mi_wsize_from_size(size_t size) { } +// Overflow detecting multiply +static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { +#if __has_builtin(__builtin_umul_overflow) || __GNUC__ >= 5 +#include // UINT_MAX, ULONG_MAX +#if (SIZE_MAX == UINT_MAX) + return __builtin_umul_overflow(count, size, total); +#elif (SIZE_MAX == ULONG_MAX) + return __builtin_umull_overflow(count, size, total); +#else + return __builtin_umulll_overflow(count, size, total); +#endif +#else /* __builtin_umul_overflow is unavailable */ + #define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX) + *total = count * size; + return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW) + && size > 0 && (SIZE_MAX / size) < count); +#endif +} + +// Safe multiply `count*size` into `total`; return `true` on overflow. +static inline bool mi_count_size_overflow(size_t count, size_t size, size_t* total) { + if (count==1) { // quick check for the case where count is one (common for C++ allocators) + *total = size; + return false; + } + else if (mi_unlikely(mi_mul_overflow(count, size, total))) { + _mi_error_message(EOVERFLOW, "allocation request too large (%zu * %zu bytes)\n", count, size); + *total = SIZE_MAX; + return true; + } + else return false; +} + + /* ----------------------------------------------------------- The thread local default heap ----------------------------------------------------------- */ @@ -221,7 +278,7 @@ extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate static inline mi_heap_t* mi_get_default_heap(void) { #ifdef MI_TLS_RECURSE_GUARD - // on some platforms, like macOS, the dynamic loader calls `malloc` + // on some BSD platforms, like macOS, the dynamic loader calls `malloc` // to initialize thread local data. To avoid recursion, we need to avoid // accessing the thread local `_mi_default_heap` until our module is loaded // and use the statically allocated main heap until that time. @@ -279,7 +336,7 @@ static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) { static inline uintptr_t _mi_segment_page_idx_of(const mi_segment_t* segment, const void* p) { // if (segment->page_size > MI_SEGMENT_SIZE) return &segment->pages[0]; // huge pages ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment; - mi_assert_internal(diff >= 0 && diff < MI_SEGMENT_SIZE); + mi_assert_internal(diff >= 0 && (size_t)diff < MI_SEGMENT_SIZE); uintptr_t idx = (uintptr_t)diff >> segment->page_shift; mi_assert_internal(idx < segment->capacity); mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM || idx == 0); @@ -294,7 +351,9 @@ static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const // Quick page start for initialized pages static inline uint8_t* _mi_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) { - return _mi_segment_page_start(segment, page, page->block_size, page_size); + const size_t bsize = page->xblock_size; + mi_assert_internal(bsize > 0 && (bsize%sizeof(void*)) == 0); + return _mi_segment_page_start(segment, page, bsize, page_size, NULL); } // Get the page containing the pointer @@ -302,7 +361,40 @@ static inline mi_page_t* _mi_ptr_page(void* p) { return _mi_segment_page_of(_mi_ptr_segment(p), p); } +// Get the block size of a page (special cased for huge objects) +static inline size_t mi_page_block_size(const mi_page_t* page) { + const size_t bsize = page->xblock_size; + mi_assert_internal(bsize > 0); + if (mi_likely(bsize < MI_HUGE_BLOCK_SIZE)) { + return bsize; + } + else { + size_t psize; + _mi_segment_page_start(_mi_page_segment(page), page, bsize, &psize, NULL); + return psize; + } +} + // Thread free access +static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) { + return (mi_block_t*)(mi_atomic_read_relaxed(&page->xthread_free) & ~3); +} + +static inline mi_delayed_t mi_page_thread_free_flag(const mi_page_t* page) { + return (mi_delayed_t)(mi_atomic_read_relaxed(&page->xthread_free) & 3); +} + +// Heap access +static inline mi_heap_t* mi_page_heap(const mi_page_t* page) { + return (mi_heap_t*)(mi_atomic_read_relaxed(&page->xheap)); +} + +static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) { + mi_assert_internal(mi_page_thread_free_flag(page) != MI_DELAYED_FREEING); + mi_atomic_write(&page->xheap,(uintptr_t)heap); +} + +// Thread free flag helpers static inline mi_block_t* mi_tf_block(mi_thread_free_t tf) { return (mi_block_t*)(tf & ~0x03); } @@ -319,36 +411,30 @@ static inline mi_thread_free_t mi_tf_set_block(mi_thread_free_t tf, mi_block_t* return mi_tf_make(block, mi_tf_delayed(tf)); } -// are all blocks in a page freed? +// are all blocks in a page freed? +// note: needs up-to-date used count, (as the `xthread_free` list may not be empty). see `_mi_page_collect_free`. static inline bool mi_page_all_free(const mi_page_t* page) { mi_assert_internal(page != NULL); - return (page->used - page->thread_freed == 0); + return (page->used == 0); } -// are there immediately available blocks +// are there any available blocks? +static inline bool mi_page_has_any_available(const mi_page_t* page) { + mi_assert_internal(page != NULL && page->reserved > 0); + return (page->used < page->reserved || (mi_page_thread_free(page) != NULL)); +} + +// are there immediately available blocks, i.e. blocks available on the free list. static inline bool mi_page_immediate_available(const mi_page_t* page) { mi_assert_internal(page != NULL); return (page->free != NULL); } -// are there free blocks in this page? -static inline bool mi_page_has_free(mi_page_t* page) { - mi_assert_internal(page != NULL); - bool hasfree = (mi_page_immediate_available(page) || page->local_free != NULL || (mi_tf_block(page->thread_free) != NULL)); - mi_assert_internal(hasfree || page->used - page->thread_freed == page->capacity); - return hasfree; -} - -// are all blocks in use? -static inline bool mi_page_all_used(mi_page_t* page) { - mi_assert_internal(page != NULL); - return !mi_page_has_free(page); -} // is more than 7/8th of a page in use? static inline bool mi_page_mostly_used(const mi_page_t* page) { if (page==NULL) return true; uint16_t frac = page->reserved / 8U; - return (page->reserved - page->used + page->thread_freed <= frac); + return (page->reserved - page->used <= frac); } static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size) { @@ -377,12 +463,30 @@ static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) { } -// ------------------------------------------------------------------- -// Encoding/Decoding the free list next pointers -// Note: we pass a `null` value to be used as the `NULL` value for the -// end of a free list. This is to prevent the cookie itself to ever -// be present among user blocks (as `cookie^0==cookie`). -// ------------------------------------------------------------------- +/* ------------------------------------------------------------------- +Encoding/Decoding the free list next pointers + +This is to protect against buffer overflow exploits where the +free list is mutated. Many hardened allocators xor the next pointer `p` +with a secret key `k1`, as `p^k1`. This prevents overwriting with known +values but might be still too weak: if the attacker can guess +the pointer `p` this can reveal `k1` (since `p^k1^p == k1`). +Moreover, if multiple blocks can be read as well, the attacker can +xor both as `(p1^k1) ^ (p2^k1) == p1^p2` which may reveal a lot +about the pointers (and subsequently `k1`). + +Instead mimalloc uses an extra key `k2` and encodes as `((p^k2)<<> (MI_INTPTR_BITS - shift))); +} +static inline uintptr_t mi_rotr(uintptr_t x, uintptr_t shift) { + shift %= MI_INTPTR_BITS; + return ((x >> shift) | (x << (MI_INTPTR_BITS - shift))); +} + +static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, uintptr_t key1, uintptr_t key2 ) { #ifdef MI_ENCODE_FREELIST - mi_block_t* b = (mi_block_t*)(block->next ^ cookie); + mi_block_t* b = (mi_block_t*)(mi_rotr(block->next - key1, key1) ^ key2); if (mi_unlikely((void*)b==null)) { b = NULL; } return b; #else - UNUSED(cookie); UNUSED(null); + UNUSED(key1); UNUSED(key2); UNUSED(null); return (mi_block_t*)block->next; #endif } -static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, uintptr_t cookie) { +static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, uintptr_t key1, uintptr_t key2) { #ifdef MI_ENCODE_FREELIST if (mi_unlikely(next==NULL)) { next = (mi_block_t*)null; } - block->next = (mi_encoded_t)next ^ cookie; + block->next = mi_rotl((uintptr_t)next ^ key2, key1) + key1; #else - UNUSED(cookie); UNUSED(null); + UNUSED(key1); UNUSED(key2); UNUSED(null); block->next = (mi_encoded_t)next; #endif } static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* block) { #ifdef MI_ENCODE_FREELIST - mi_block_t* next = mi_block_nextx(page,block,page->cookie); - // check for free list corruption: is `next` at least in our segment range? + mi_block_t* next = mi_block_nextx(page,block,page->key[0],page->key[1]); + // check for free list corruption: is `next` at least in the same page? // TODO: check if `next` is `page->block_size` aligned? - if (next!=NULL && !mi_is_in_same_page(block, next)) { - _mi_fatal_error("corrupted free list entry of size %zub at %p: value 0x%zx\n", page->block_size, block, (uintptr_t)next); + if (mi_unlikely(next!=NULL && !mi_is_in_same_page(block, next))) { + _mi_error_message(EFAULT, "corrupted free list entry of size %zub at %p: value 0x%zx\n", mi_page_block_size(page), block, (uintptr_t)next); next = NULL; - } + } return next; #else UNUSED(page); - return mi_block_nextx(page,block,0); + return mi_block_nextx(page,block,0,0); #endif } static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, const mi_block_t* next) { #ifdef MI_ENCODE_FREELIST - mi_block_set_nextx(page,block,next, page->cookie); + mi_block_set_nextx(page,block,next, page->key[0], page->key[1]); #else UNUSED(page); - mi_block_set_nextx(page,block, next,0); + mi_block_set_nextx(page,block, next,0,0); #endif } +// ------------------------------------------------------------------- +// Fast "random" shuffle +// ------------------------------------------------------------------- + +static inline uintptr_t _mi_random_shuffle(uintptr_t x) { + if (x==0) { x = 17; } // ensure we don't get stuck in generating zeros +#if (MI_INTPTR_SIZE==8) + // by Sebastiano Vigna, see: + x ^= x >> 30; + x *= 0xbf58476d1ce4e5b9UL; + x ^= x >> 27; + x *= 0x94d049bb133111ebUL; + x ^= x >> 31; +#elif (MI_INTPTR_SIZE==4) + // by Chris Wellons, see: + x ^= x >> 16; + x *= 0x7feb352dUL; + x ^= x >> 15; + x *= 0x846ca68bUL; + x ^= x >> 16; +#endif + return x; +} + +// ------------------------------------------------------------------- +// Optimize numa node access for the common case (= one node) +// ------------------------------------------------------------------- + +int _mi_os_numa_node_get(mi_os_tld_t* tld); +size_t _mi_os_numa_node_count_get(void); + +extern size_t _mi_numa_node_count; +static inline int _mi_os_numa_node(mi_os_tld_t* tld) { + if (mi_likely(_mi_numa_node_count == 1)) return 0; + else return _mi_os_numa_node_get(tld); +} +static inline size_t _mi_os_numa_node_count(void) { + if (mi_likely(_mi_numa_node_count>0)) return _mi_numa_node_count; + else return _mi_os_numa_node_count_get(); +} + + // ------------------------------------------------------------------- // Getting the thread id should be performant // as it is called in the fast path of `_mi_free`, diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 96e1860f..48d86a25 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -46,7 +46,7 @@ terms of the MIT license. A copy of the license can be found in the file // Encoded free lists allow detection of corrupted free lists // and can detect buffer overflows and double `free`s. -#if (MI_SECURE>=3 || MI_DEBUG>=1) +#if (MI_SECURE>=3 || MI_DEBUG>=1) #define MI_ENCODE_FREELIST 1 #endif @@ -76,6 +76,7 @@ terms of the MIT license. A copy of the license can be found in the file #endif #define MI_INTPTR_SIZE (1< MI_HUGE_BLOCK_SIZE the size is determined from the segment page size +// - `thread_free` uses the bottom bits as a delayed-free flags to optimize +// concurrent frees where only the first concurrent free adds to the owning +// heap `thread_delayed_free` list (see `alloc.c:mi_free_block_mt`). +// The invariant is that no-delayed-free is only set if there is +// at least one block that will be added, or as already been added, to +// the owning heap `thread_delayed_free` list. This guarantees that pages +// will be freed correctly even if only other threads free blocks. typedef struct mi_page_s { // "owned" by the segment uint8_t segment_idx; // index in the segment `pages` array, `page == &segment->pages[page->segment_idx]` @@ -181,34 +199,27 @@ typedef struct mi_page_s { uint8_t is_reset:1; // `true` if the page memory was reset uint8_t is_committed:1; // `true` if the page virtual memory is committed uint8_t is_zero_init:1; // `true` if the page was zero initialized - + // layout like this to optimize access in `mi_malloc` and `mi_free` uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear` uint16_t reserved; // number of blocks reserved in memory mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits) - bool is_zero; // `true` if the blocks in the free list are zero initialized + uint8_t is_zero:1; // `true` if the blocks in the free list are zero initialized + uint8_t retire_expire:7; // expiration count for retired blocks mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) #ifdef MI_ENCODE_FREELIST - uintptr_t cookie; // random cookie to encode the free lists + uintptr_t key[2]; // two random keys to encode the free lists (see `_mi_block_next`) #endif - size_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`) - - mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) - volatile _Atomic(uintptr_t) thread_freed; // at least this number of blocks are in `thread_free` - volatile _Atomic(mi_thread_free_t) thread_free; // list of deferred free blocks freed by other threads + uint32_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`) + uint32_t xblock_size; // size available in each block (always `>0`) - // less accessed info - size_t block_size; // size available in each block (always `>0`) - mi_heap_t* heap; // the owning heap + mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) + volatile _Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads + volatile _Atomic(uintptr_t) xheap; + struct mi_page_s* next; // next page owned by this thread with the same `block_size` struct mi_page_s* prev; // previous page owned by this thread with the same `block_size` - - // improve page index calculation - // without padding: 10 words on 64-bit, 11 on 32-bit. Secure adds one word - #if (MI_INTPTR_SIZE==8 && defined(MI_ENCODE_FREELIST)) || (MI_INTPTR_SIZE==4 && !defined(MI_ENCODE_FREELIST)) - void* padding[1]; // 12 words on 64-bit with cookie, 12 words on 32-bit plain - #endif } mi_page_t; @@ -226,19 +237,21 @@ typedef enum mi_page_kind_e { typedef struct mi_segment_s { // memory fields size_t memid; // id for the os-level memory manager - bool mem_is_fixed; // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages) + bool mem_is_fixed; // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages) bool mem_is_committed; // `true` if the whole segment is eagerly committed // segment fields - struct mi_segment_s* next; // must be the first segment field -- see `segment.c:segment_alloc` + struct mi_segment_s* next; // must be the first segment field -- see `segment.c:segment_alloc` struct mi_segment_s* prev; - volatile _Atomic(struct mi_segment_s*) abandoned_next; - size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) + struct mi_segment_s* abandoned_next; + size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) + size_t abandoned_visits; // count how often this segment is visited in the abandoned list (to force reclaim it it is too long) + size_t used; // count of pages in use (`used <= capacity`) size_t capacity; // count of available pages (`#free + used`) size_t segment_size;// for huge pages this may be different from `MI_SEGMENT_SIZE` size_t segment_info_size; // space we are using from the first page for segment meta-data and possible guard pages. - uintptr_t cookie; // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie` + uintptr_t cookie; // verify addresses in secure mode: `_mi_ptr_cookie(segment) == segment->cookie` // layout like this to optimize access in `mi_free` size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`). @@ -273,6 +286,14 @@ typedef struct mi_page_queue_s { #define MI_BIN_FULL (MI_BIN_HUGE+1) +// Random context +typedef struct mi_random_cxt_s { + uint32_t input[16]; + uint32_t output[16]; + int output_available; +} mi_random_ctx_t; + + // A heap owns a set of pages. struct mi_heap_s { mi_tld_t* tld; @@ -280,8 +301,9 @@ struct mi_heap_s { mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin") volatile _Atomic(mi_block_t*) thread_delayed_free; uintptr_t thread_id; // thread this heap belongs too - uintptr_t cookie; - uintptr_t random; // random number used for secure allocation + uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`) + uintptr_t key[2]; // twb random keys used to encode the `thread_delayed_free` list + mi_random_ctx_t random; // random number context used for secure allocation size_t page_count; // total number of pages in the `pages` queues. bool no_reclaim; // `true` if this heap should not reclaim abandoned pages }; @@ -384,22 +406,29 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount); #define mi_heap_stat_increase(heap,stat,amount) mi_stat_increase( (heap)->tld->stats.stat, amount) #define mi_heap_stat_decrease(heap,stat,amount) mi_stat_decrease( (heap)->tld->stats.stat, amount) - // ------------------------------------------------------ // Thread Local data // ------------------------------------------------------ +typedef int64_t mi_msecs_t; + // Queue of segments typedef struct mi_segment_queue_s { mi_segment_t* first; mi_segment_t* last; } mi_segment_queue_t; +// OS thread local data +typedef struct mi_os_tld_s { + size_t region_idx; // start point for next allocation + mi_stats_t* stats; // points to tld stats +} mi_os_tld_t; // Segments thread local data typedef struct mi_segments_tld_s { mi_segment_queue_t small_free; // queue of segments with free small pages mi_segment_queue_t medium_free; // queue of segments with free medium pages + mi_page_queue_t pages_reset; // queue of freed pages that can be reset size_t count; // current number of segments; size_t peak_count; // peak number of segments size_t current_size; // current size of all segments @@ -408,14 +437,9 @@ typedef struct mi_segments_tld_s { size_t cache_size; // total size of all segments in the cache mi_segment_t* cache; // (small) cache of segments mi_stats_t* stats; // points to tld stats + mi_os_tld_t* os; // points to os stats } mi_segments_tld_t; -// OS thread local data -typedef struct mi_os_tld_s { - size_t region_idx; // start point for next allocation - mi_stats_t* stats; // points to tld stats -} mi_os_tld_t; - // Thread local data struct mi_tld_s { unsigned long long heartbeat; // monotonic heartbeat count diff --git a/include/mimalloc.h b/include/mimalloc.h index 7f26896c..94fcd788 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -8,17 +8,17 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_H #define MIMALLOC_H -#define MI_MALLOC_VERSION 120 // major + 2 digits minor +#define MI_MALLOC_VERSION 150 // major + 2 digits minor // ------------------------------------------------------ // Compiler specific attributes // ------------------------------------------------------ #ifdef __cplusplus - #if (__GNUC__ <= 5) || (_MSC_VER <= 1900) - #define mi_attr_noexcept throw() - #else + #if (__cplusplus >= 201103L) || (_MSC_VER > 1900) // C++11 #define mi_attr_noexcept noexcept + #else + #define mi_attr_noexcept throw() #endif #else #define mi_attr_noexcept @@ -37,32 +37,37 @@ terms of the MIT license. A copy of the license can be found in the file #else #define mi_decl_allocator __declspec(restrict) #endif - #define mi_decl_thread __declspec(thread) + #define mi_cdecl __cdecl #define mi_attr_malloc #define mi_attr_alloc_size(s) #define mi_attr_alloc_size2(s1,s2) - #define mi_cdecl __cdecl -#elif defined(__GNUC__) || defined(__clang__) - #define mi_decl_thread __thread + #define mi_attr_alloc_align(p) +#elif defined(__GNUC__) // includes clang and icc + #define mi_cdecl // leads to warnings... __attribute__((cdecl)) #define mi_decl_export __attribute__((visibility("default"))) #define mi_decl_allocator #define mi_attr_malloc __attribute__((malloc)) - #if defined(__clang_major__) && (__clang_major__ < 4) + #if (defined(__clang_major__) && (__clang_major__ < 4)) || (__GNUC__ < 5) #define mi_attr_alloc_size(s) #define mi_attr_alloc_size2(s1,s2) + #define mi_attr_alloc_align(p) + #elif defined(__INTEL_COMPILER) + #define mi_attr_alloc_size(s) __attribute__((alloc_size(s))) + #define mi_attr_alloc_size2(s1,s2) __attribute__((alloc_size(s1,s2))) + #define mi_attr_alloc_align(p) #else #define mi_attr_alloc_size(s) __attribute__((alloc_size(s))) #define mi_attr_alloc_size2(s1,s2) __attribute__((alloc_size(s1,s2))) + #define mi_attr_alloc_align(p) __attribute__((alloc_align(p))) #endif - #define mi_cdecl // leads to warnings... __attribute__((cdecl)) #else - #define mi_decl_thread __thread + #define mi_cdecl #define mi_decl_export #define mi_decl_allocator #define mi_attr_malloc #define mi_attr_alloc_size(s) #define mi_attr_alloc_size2(s1,s2) - #define mi_cdecl + #define mi_attr_alloc_align(p) #endif // ------------------------------------------------------ @@ -104,26 +109,34 @@ mi_decl_export mi_decl_allocator void* mi_mallocn(size_t count, size_t size) mi_decl_export mi_decl_allocator void* mi_reallocn(void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2,3); mi_decl_export mi_decl_allocator void* mi_reallocf(void* p, size_t newsize) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); - mi_decl_export size_t mi_usable_size(const void* p) mi_attr_noexcept; mi_decl_export size_t mi_good_size(size_t size) mi_attr_noexcept; -typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat); -mi_decl_export void mi_register_deferred_free(mi_deferred_free_fun* deferred_free) mi_attr_noexcept; -typedef void (mi_output_fun)(const char* msg); -mi_decl_export void mi_register_output(mi_output_fun* out) mi_attr_noexcept; +// ------------------------------------------------------ +// Internals +// ------------------------------------------------------ + +typedef void (mi_cdecl mi_deferred_free_fun)(bool force, unsigned long long heartbeat, void* arg); +mi_decl_export void mi_register_deferred_free(mi_deferred_free_fun* deferred_free, void* arg) mi_attr_noexcept; + +typedef void (mi_cdecl mi_output_fun)(const char* msg, void* arg); +mi_decl_export void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept; + +typedef void (mi_cdecl mi_error_fun)(int err, void* arg); +mi_decl_export void mi_register_error(mi_error_fun* fun, void* arg); mi_decl_export void mi_collect(bool force) mi_attr_noexcept; mi_decl_export int mi_version(void) mi_attr_noexcept; mi_decl_export void mi_stats_reset(void) mi_attr_noexcept; mi_decl_export void mi_stats_merge(void) mi_attr_noexcept; -mi_decl_export void mi_stats_print(mi_output_fun* out) mi_attr_noexcept; +mi_decl_export void mi_stats_print(void* out) mi_attr_noexcept; // backward compatibility: `out` is ignored and should be NULL +mi_decl_export void mi_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept; mi_decl_export void mi_process_init(void) mi_attr_noexcept; mi_decl_export void mi_thread_init(void) mi_attr_noexcept; mi_decl_export void mi_thread_done(void) mi_attr_noexcept; -mi_decl_export void mi_thread_stats_print(mi_output_fun* out) mi_attr_noexcept; +mi_decl_export void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept; // ------------------------------------------------------------------------------------- @@ -132,19 +145,19 @@ mi_decl_export void mi_thread_stats_print(mi_output_fun* out) mi_attr_noexcept; // allocation, but unfortunately this differs from `posix_memalign` and `aligned_alloc`. // ------------------------------------------------------------------------------------- -mi_decl_export mi_decl_allocator void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_export mi_decl_allocator void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); mi_decl_export mi_decl_allocator void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_export mi_decl_allocator void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_export mi_decl_allocator void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); mi_decl_export mi_decl_allocator void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_export mi_decl_allocator void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2); +mi_decl_export mi_decl_allocator void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2) mi_attr_alloc_align(3); mi_decl_export mi_decl_allocator void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2); -mi_decl_export mi_decl_allocator void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_export mi_decl_allocator void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(3); mi_decl_export mi_decl_allocator void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); -// ------------------------------------------------------ -// Heaps -// ------------------------------------------------------ +// ------------------------------------------------------------------------------------- +// Heaps: first-class, but can only allocate from the same thread that created it. +// ------------------------------------------------------------------------------------- struct mi_heap_s; typedef struct mi_heap_s mi_heap_t; @@ -170,13 +183,13 @@ mi_decl_export char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_noex mi_decl_export char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n) mi_attr_noexcept; mi_decl_export char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept; -mi_decl_export mi_decl_allocator void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_export mi_decl_allocator void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(3); mi_decl_export mi_decl_allocator void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); -mi_decl_export mi_decl_allocator void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_export mi_decl_allocator void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(3); mi_decl_export mi_decl_allocator void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); -mi_decl_export mi_decl_allocator void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3); +mi_decl_export mi_decl_allocator void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3) mi_attr_alloc_align(4); mi_decl_export mi_decl_allocator void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3); -mi_decl_export mi_decl_allocator void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3); +mi_decl_export mi_decl_allocator void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3) mi_attr_alloc_align(4); mi_decl_export mi_decl_allocator void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3); @@ -190,17 +203,17 @@ mi_decl_export mi_decl_allocator void* mi_heap_realloc_aligned_at(mi_heap_t* hea mi_decl_export mi_decl_allocator void* mi_rezalloc(void* p, size_t newsize) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); mi_decl_export mi_decl_allocator void* mi_recalloc(void* p, size_t newcount, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2,3); -mi_decl_export mi_decl_allocator void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_export mi_decl_allocator void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(3); mi_decl_export mi_decl_allocator void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); -mi_decl_export mi_decl_allocator void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2,3); +mi_decl_export mi_decl_allocator void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2,3) mi_attr_alloc_align(4); mi_decl_export mi_decl_allocator void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2,3); mi_decl_export mi_decl_allocator void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3); mi_decl_export mi_decl_allocator void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t newcount, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(3,4); -mi_decl_export mi_decl_allocator void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3); +mi_decl_export mi_decl_allocator void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3) mi_attr_alloc_align(4); mi_decl_export mi_decl_allocator void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(3); -mi_decl_export mi_decl_allocator void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(3,4); +mi_decl_export mi_decl_allocator void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(3,4) mi_attr_alloc_align(5); mi_decl_export mi_decl_allocator void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(3,4); @@ -228,9 +241,14 @@ mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_b // Experimental mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept; -mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept; mi_decl_export bool mi_is_redirected() mi_attr_noexcept; +mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept; +mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept; + +// deprecated +mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept; + // ------------------------------------------------------ // Convenience // ------------------------------------------------------ @@ -262,17 +280,20 @@ typedef enum mi_option_e { // the following options are experimental mi_option_eager_commit, mi_option_eager_region_commit, + mi_option_reset_decommits, mi_option_large_os_pages, // implies eager commit mi_option_reserve_huge_os_pages, mi_option_segment_cache, mi_option_page_reset, - mi_option_cache_reset, - mi_option_reset_decommits, - mi_option_eager_commit_delay, + mi_option_abandoned_page_reset, mi_option_segment_reset, + mi_option_eager_commit_delay, + mi_option_reset_delay, + mi_option_use_numa_nodes, mi_option_os_tag, mi_option_max_errors, - _mi_option_last + _mi_option_last, + mi_option_eager_page_commit = mi_option_eager_commit } mi_option_t; @@ -298,11 +319,11 @@ mi_decl_export void mi_cfree(void* p) mi_attr_noexcept; mi_decl_export void* mi__expand(void* p, size_t newsize) mi_attr_noexcept; mi_decl_export int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept; -mi_decl_export void* mi_memalign(size_t alignment, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_export void* mi_memalign(size_t alignment, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(1); mi_decl_export void* mi_valloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); mi_decl_export void* mi_pvalloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_export void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_export void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(1); mi_decl_export void* mi_reallocarray(void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2,3); mi_decl_export void* mi_aligned_recalloc(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept; @@ -317,14 +338,74 @@ mi_decl_export void mi_free_size(void* p, size_t size) mi_attr_noexcept; mi_decl_export void mi_free_size_aligned(void* p, size_t size, size_t alignment) mi_attr_noexcept; mi_decl_export void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept; -mi_decl_export void* mi_new(size_t n) mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_export void* mi_new_aligned(size_t n, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_export void* mi_new_nothrow(size_t n) mi_attr_malloc mi_attr_alloc_size(1); -mi_decl_export void* mi_new_aligned_nothrow(size_t n, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1); +// The `mi_new` wrappers implement C++ semantics on out-of-memory instead of directly returning `NULL`. +// (and call `std::get_new_handler` and potentially raise a `std::bad_alloc` exception). +mi_decl_export void* mi_new(size_t size) mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_export void* mi_new_aligned(size_t size, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); +mi_decl_export void* mi_new_nothrow(size_t size) mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_export void* mi_new_aligned_nothrow(size_t size, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); +mi_decl_export void* mi_new_n(size_t count, size_t size) mi_attr_malloc mi_attr_alloc_size2(1, 2); +mi_decl_export void* mi_new_realloc(void* p, size_t newsize) mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_export void* mi_new_reallocn(void* p, size_t newcount, size_t size) mi_attr_malloc mi_attr_alloc_size2(2, 3); #ifdef __cplusplus } #endif +// --------------------------------------------------------------------------------------------- +// Implement the C++ std::allocator interface for use in STL containers. +// (note: see `mimalloc-new-delete.h` for overriding the new/delete operators globally) +// --------------------------------------------------------------------------------------------- +#ifdef __cplusplus + +#include // std::numeric_limits +#if (__cplusplus >= 201103L) || (_MSC_VER > 1900) // C++11 +#include // std::true_type +#include // std::forward +#endif + +template struct mi_stl_allocator { + typedef T value_type; + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; + typedef value_type& reference; + typedef value_type const& const_reference; + typedef value_type* pointer; + typedef value_type const* const_pointer; + template struct rebind { typedef mi_stl_allocator other; }; + + mi_stl_allocator() mi_attr_noexcept { } + mi_stl_allocator(const mi_stl_allocator&) mi_attr_noexcept { } + template mi_stl_allocator(const mi_stl_allocator&) mi_attr_noexcept { } + mi_stl_allocator select_on_container_copy_construction() const { return *this; } + void deallocate(T* p, size_type) { mi_free(p); } + + #if (__cplusplus >= 201703L) // C++17 + T* allocate(size_type count) { return static_cast(mi_new_n(count, sizeof(T))); } + T* allocate(size_type count, const void*) { return allocate(count); } + #else + pointer allocate(size_type count, const void* = 0) { return static_cast(mi_new_n(count, sizeof(value_type))); } + #endif + + #if ((__cplusplus >= 201103L) || (_MSC_VER > 1900)) // C++11 + using propagate_on_container_copy_assignment = std::true_type; + using propagate_on_container_move_assignment = std::true_type; + using propagate_on_container_swap = std::true_type; + using is_always_equal = std::true_type; + template void construct(U* p, Args&& ...args) { ::new(p) U(std::forward(args)...); } + template void destroy(U* p) mi_attr_noexcept { p->~U(); } + #else + void construct(pointer p, value_type const& val) { ::new(p) value_type(val); } + void destroy(pointer p) { p->~value_type(); } + #endif + + size_type max_size() const mi_attr_noexcept { return (std::numeric_limits::max() / sizeof(value_type)); } + pointer address(reference x) const { return &x; } + const_pointer address(const_reference x) const { return &x; } +}; + +template bool operator==(const mi_stl_allocator& , const mi_stl_allocator& ) mi_attr_noexcept { return true; } +template bool operator!=(const mi_stl_allocator& , const mi_stl_allocator& ) mi_attr_noexcept { return false; } +#endif // __cplusplus #endif diff --git a/readme.md b/readme.md index 9d3974c9..baac2a93 100644 --- a/readme.md +++ b/readme.md @@ -10,15 +10,15 @@ mimalloc (pronounced "me-malloc") is a general purpose allocator with excellent [performance](#performance) characteristics. Initially developed by Daan Leijen for the run-time systems of the -[Koka](https://github.com/koka-lang/koka) and [Lean](https://github.com/leanprover/lean) languages. +[Koka](https://github.com/koka-lang/koka) and [Lean](https://github.com/leanprover/lean) languages. +Latest release:`v1.4.0` (2020-01-22). It is a drop-in replacement for `malloc` and can be used in other programs without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as: ``` > LD_PRELOAD=/usr/bin/libmimalloc.so myprogram ``` - -Notable aspects of the design include: +It also has an easy way to override the allocator in [Windows](#override_on_windows). Notable aspects of the design include: - __small and consistent__: the library is about 6k LOC using simple and consistent data structures. This makes it very suitable @@ -45,9 +45,10 @@ Notable aspects of the design include: times (_wcat_), bounded space overhead (~0.2% meta-data, with at most 12.5% waste in allocation sizes), and has no internal points of contention using only atomic operations. - __fast__: In our benchmarks (see [below](#performance)), - _mimalloc_ always outperforms all other leading allocators (_jemalloc_, _tcmalloc_, _Hoard_, etc), + _mimalloc_ outperforms other leading allocators (_jemalloc_, _tcmalloc_, _Hoard_, etc), and usually uses less memory (up to 25% more in the worst case). A nice property - is that it does consistently well over a wide range of benchmarks. + is that it does consistently well over a wide range of benchmarks. There is also good huge OS page + support for larger server programs. The [documentation](https://microsoft.github.io/mimalloc) gives a full overview of the API. You can read more on the design of _mimalloc_ in the [technical report](https://www.microsoft.com/en-us/research/publication/mimalloc-free-list-sharding-in-action) which also has detailed benchmark results. @@ -56,6 +57,11 @@ Enjoy! ### Releases +* 2020-01-22, `v1.4.0`: stable release 1.4: improved performance for delayed OS page reset, +more eager concurrent free, addition of STL allocator, fixed potential memory leak. +* 2020-01-15, `v1.3.0`: stable release 1.3: bug fixes, improved randomness and [stronger +free list encoding](https://github.com/microsoft/mimalloc/blob/783e3377f79ee82af43a0793910a9f2d01ac7863/include/mimalloc-internal.h#L396) in secure mode. +* 2019-12-22, `v1.2.2`: stable release 1.2: minor updates. * 2019-11-22, `v1.2.0`: stable release 1.2: bug fixes, improved secure mode (free list corruption checks, double free mitigation). Improved dynamic overriding on Windows. * 2019-10-07, `v1.1.0`: stable release 1.1. * 2019-09-01, `v1.0.8`: pre-release 8: more robust windows dynamic overriding, initial huge page support. @@ -127,7 +133,7 @@ mimalloc uses only safe OS calls (`mmap` and `VirtualAlloc`) and can co-exist with other allocators linked to the same program. If you use `cmake`, you can simply use: ``` -find_package(mimalloc 1.0 REQUIRED) +find_package(mimalloc 1.4 REQUIRED) ``` in your `CMakeLists.txt` to find a locally installed mimalloc. Then use either: ``` @@ -141,7 +147,9 @@ to link with the static library. See `test\CMakeLists.txt` for an example. For best performance in C++ programs, it is also recommended to override the global `new` and `delete` operators. For convience, mimalloc provides -[mimalloc-new-delete.h](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) which does this for you -- just include it in a single(!) source file in your project. +[`mimalloc-new-delete.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) which does this for you -- just include it in a single(!) source file in your project. +In C++, mimalloc also provides the `mi_stl_allocator` struct which implements the `std::allocator` +interface. You can pass environment variables to print verbose messages (`MIMALLOC_VERBOSE=1`) and statistics (`MIMALLOC_SHOW_STATS=1`) (in the debug version): @@ -182,7 +190,7 @@ malloc requested: 32.8 mb The above model of using the `mi_` prefixed API is not always possible though in existing programs that already use the standard malloc interface, and another option is to override the standard malloc interface -completely and redirect all calls to the _mimalloc_ library instead. +completely and redirect all calls to the _mimalloc_ library instead . ## Environment Options @@ -192,23 +200,35 @@ or via environment variables. - `MIMALLOC_SHOW_STATS=1`: show statistics when the program terminates. - `MIMALLOC_VERBOSE=1`: show verbose messages. - `MIMALLOC_SHOW_ERRORS=1`: show error and warning messages. +- `MIMALLOC_PAGE_RESET=1`: reset (or purge) OS pages when not in use. This can reduce + memory fragmentation in long running (server) programs. If performance is impacted, + `MIMALLOC_RESET_DELAY=` can be set higher (100ms by default) to make the page + reset occur less frequently. - `MIMALLOC_LARGE_OS_PAGES=1`: use large OS pages when available; for some workloads this can significantly improve performance. Use `MIMALLOC_VERBOSE` to check if the large OS pages are enabled -- usually one needs to explicitly allow large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). However, sometimes the OS is very slow to reserve contiguous physical memory for large OS pages so use with care on systems that - can have fragmented memory. -- `MIMALLOC_EAGER_REGION_COMMIT=1`: on Windows, commit large (256MiB) regions eagerly. On Windows, these regions + can have fragmented memory (for that reason, we generally recommend to use `MIMALLOC_RESERVE_HUGE_OS_PAGES` instead when possible). + - `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where N is the number of 1GiB huge OS pages. This reserves the huge pages at - startup and can give quite a performance improvement on long running workloads. Usually it is better to not use + startup and can give quite a (latency) performance improvement on long running workloads. Usually it is better to not use `MIMALLOC_LARGE_OS_PAGES` in combination with this setting. Just like large OS pages, use with care as reserving - contiguous physical memory can take a long time when memory is fragmented. Still experimental. + contiguous physical memory can take a long time when memory is fragmented (but reserving the huge pages is done at + startup only once). + Note that we usually need to explicitly enable huge OS pages (as on [Windows][windows-huge] and [Linux][linux-huge])). With huge OS pages, it may be beneficial to set the setting + `MIMALLOC_EAGER_COMMIT_DELAY=N` (with usually `N` as 1) to delay the initial `N` segments + of a thread to not allocate in the huge OS pages; this prevents threads that are short lived + and allocate just a little to take up space in the huge OS page area (which cannot be reset). [linux-huge]: https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/5/html/tuning_and_optimizing_red_hat_enterprise_linux_for_oracle_9i_and_10g_databases/sect-oracle_9i_and_10g_tuning_guide-large_memory_optimization_big_pages_and_huge_pages-configuring_huge_pages_in_red_hat_enterprise_linux_4_or_5 [windows-huge]: https://docs.microsoft.com/en-us/sql/database-engine/configure-windows/enable-the-lock-pages-in-memory-option-windows?view=sql-server-2017 + # Overriding Malloc Overriding the standard `malloc` can be done either _dynamically_ or _statically_. @@ -217,7 +237,7 @@ Overriding the standard `malloc` can be done either _dynamically_ or _statically This is the recommended way to override the standard malloc interface. -### Linux, BSD +### Override on Linux, BSD On these ELF-based systems we preload the mimalloc shared library so all calls to the standard `malloc` interface are @@ -236,7 +256,7 @@ or run with the debug version to get detailed statistics: > env MIMALLOC_SHOW_STATS=1 LD_PRELOAD=/usr/lib/libmimalloc-debug.so myprogram ``` -### MacOS +### Override on MacOS On macOS we can also preload the mimalloc shared library so all calls to the standard `malloc` interface are @@ -248,13 +268,13 @@ resolved to the _mimalloc_ library. Note that certain security restrictions may apply when doing this from the [shell](https://stackoverflow.com/questions/43941322/dyld-insert-libraries-ignored-when-calling-application-through-bash). -Note: unfortunately, at this time, dynamic overriding on macOS seems broken but it is actively worked on to fix this -(see issue [`#50`](https://github.com/microsoft/mimalloc/issues/50)). +Note: unfortunately, at this time, dynamic overriding on macOS seems broken but it is +actively worked on to fix this (see issue [`#50`](https://github.com/microsoft/mimalloc/issues/50)). -### Windows +### Override on Windows -On Windows you need to link your program explicitly with the mimalloc -DLL and use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch). +Overriding on Windows is robust but requires that you link your program explicitly with +the mimalloc DLL and use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch). Moreover, you need to ensure the `mimalloc-redirect.dll` (or `mimalloc-redirect32.dll`) is available in the same folder as the main `mimalloc-override.dll` at runtime (as it is a dependency). The redirection DLL ensures that all calls to the C runtime malloc API get redirected to @@ -264,8 +284,8 @@ To ensure the mimalloc DLL is loaded at run-time it is easiest to insert some call to the mimalloc API in the `main` function, like `mi_version()` (or use the `/INCLUDE:mi_version` switch on the linker). See the `mimalloc-override-test` project for an example on how to use this. For best performance on Windows with C++, it -is highly recommended to also override the `new`/`delete` operations (as described -in the introduction). +is also recommended to also override the `new`/`delete` operations (by including +[`mimalloc-new-delete.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) a single(!) source file in your project). The environment variable `MIMALLOC_DISABLE_REDIRECT=1` can be used to disable dynamic overriding at run-time. Use `MIMALLOC_VERBOSE=1` to check if mimalloc was successfully redirected. @@ -297,68 +317,71 @@ under your control or otherwise mixing of pointers from different heaps may occu # Performance +Last update: 2020-01-20 + We tested _mimalloc_ against many other top allocators over a wide range of benchmarks, ranging from various real world programs to synthetic benchmarks that see how the allocator behaves under more -extreme circumstances. +extreme circumstances. In our benchmark suite, _mimalloc_ outperforms other leading +allocators (_jemalloc_, _tcmalloc_, _Hoard_, etc), and has a similar memory footprint. A nice property is that it +does consistently well over the wide range of benchmarks. -In our benchmarks, _mimalloc_ always outperforms all other leading -allocators (_jemalloc_, _tcmalloc_, _Hoard_, etc), and usually uses less -memory (up to 25% more in the worst case). A nice property is that it -does *consistently* well over the wide range of benchmarks. - -Allocators are interesting as there exists no algorithm that is generally +General memory allocators are interesting as there exists no algorithm that is optimal -- for a given allocator one can usually construct a workload where it does not do so well. The goal is thus to find an allocation strategy that performs well over a wide range of benchmarks without -suffering from underperformance in less common situations (which is what -the second half of our benchmark set tests for). +suffering from (too much) underperformance in less common situations. -We show here only the results on an AMD EPYC system (Apr 2019) -- for -specific details and further benchmarks we refer to the [technical report](https://www.microsoft.com/en-us/research/publication/mimalloc-free-list-sharding-in-action). +As always, interpret these results with care since some benchmarks test synthetic +or uncommon situations that may never apply to your workloads. For example, most +allocators do not do well on `xmalloc-testN` but that includes the best +industrial allocators like _jemalloc_ and _tcmalloc_ that are used in some of +the world's largest systems (like Chrome or FreeBSD). -The benchmark suite is scripted and available separately +We show here only an overview -- for +more specific details and further benchmarks we refer to the +[technical report](https://www.microsoft.com/en-us/research/publication/mimalloc-free-list-sharding-in-action). +The benchmark suite is automated and available separately as [mimalloc-bench](https://github.com/daanx/mimalloc-bench). -## Benchmark Results +## Benchmark Results on 36-core Intel -Testing on a big Amazon EC2 instance ([r5a.4xlarge](https://aws.amazon.com/ec2/instance-types/)) -consisting of a 16-core AMD EPYC 7000 at 2.5GHz -with 128GB ECC memory, running Ubuntu 18.04.1 with LibC 2.27 and GCC 7.3.0. -The measured allocators are _mimalloc_ (mi), -Google's [_tcmalloc_](https://github.com/gperftools/gperftools) (tc) used in Chrome, -[_jemalloc_](https://github.com/jemalloc/jemalloc) (je) by Jason Evans used in Firefox and FreeBSD, -[_snmalloc_](https://github.com/microsoft/snmalloc) (sn) by Liétar et al. \[8], [_rpmalloc_](https://github.com/rampantpixels/rpmalloc) (rp) by Mattias Jansson at Rampant Pixels, -[_Hoard_](https://github.com/emeryberger/Hoard) by Emery Berger \[1], -the system allocator (glibc) (based on _PtMalloc2_), and the Intel thread -building blocks [allocator](https://github.com/intel/tbb) (tbb). +Testing on a big Amazon EC2 compute instance +([c5.18xlarge](https://aws.amazon.com/ec2/instance-types/#Compute_Optimized)) +consisting of a 72 processor Intel Xeon at 3GHz +with 144GiB ECC memory, running Ubuntu 18.04.1 with LibC 2.27 and GCC 7.4.0. +The measured allocators are _mimalloc_ (xmi, tag:v1.4.0, page reset enabled) +and its secure build as _smi_, +Google's [_tcmalloc_](https://github.com/gperftools/gperftools) (tc, tag:gperftools-2.7) used in Chrome, +Facebook's [_jemalloc_](https://github.com/jemalloc/jemalloc) (je, tag:5.2.1) by Jason Evans used in Firefox and FreeBSD, +the Intel thread building blocks [allocator](https://github.com/intel/tbb) (tbb, tag:2020), +[rpmalloc](https://github.com/mjansson/rpmalloc) (rp,tag:1.4.0) by Mattias Jansson, +the original scalable [_Hoard_](https://github.com/emeryberger/Hoard) (tag:3.13) allocator by Emery Berger \[1], +the memory compacting [_Mesh_](https://github.com/plasma-umass/Mesh) (git:51222e7) allocator by +Bobby Powers _et al_ \[8], +and finally the default system allocator (glibc, 2.7.0) (based on _PtMalloc2_). -![bench-r5a-1](doc/bench-r5a-1.svg) -![bench-r5a-2](doc/bench-r5a-2.svg) + + -Memory usage: +Any benchmarks ending in `N` run on all processors in parallel. +Results are averaged over 10 runs and reported relative +to mimalloc (where 1.2 means it took 1.2× longer to run). +The legend also contains the _overall relative score_ between the +allocators where 100 points is the maximum if an allocator is fastest on +all benchmarks. -![bench-r5a-rss-1](doc/bench-r5a-rss-1.svg) -![bench-r5a-rss-1](doc/bench-r5a-rss-2.svg) +The single threaded _cfrac_ benchmark by Dave Barrett is an implementation of +continued fraction factorization which uses many small short-lived allocations. +All allocators do well on such common usage, where _mimalloc_ is just a tad +faster than _tcmalloc_ and +_jemalloc_. -(note: the _xmalloc-testN_ memory usage should be disregarded as it -allocates more the faster the program runs). - -In the first five benchmarks we can see _mimalloc_ outperforms the other -allocators moderately, but we also see that all these modern allocators -perform well -- the times of large performance differences in regular -workloads are over :-). -In _cfrac_ and _espresso_, _mimalloc_ is a tad faster than _tcmalloc_ and -_jemalloc_, but a solid 10\% faster than all other allocators on -_espresso_. The _tbb_ allocator does not do so well here and lags more than -20\% behind _mimalloc_. The _cfrac_ and _espresso_ programs do not use much -memory (~1.5MB) so it does not matter too much, but still _mimalloc_ uses -about half the resident memory of _tcmalloc_. - -The _leanN_ program is most interesting as a large realistic and -concurrent workload of the [Lean](https://github.com/leanprover/lean) theorem prover -compiling its own standard library, and there is a 8% speedup over _tcmalloc_. This is +The _leanN_ program is interesting as a large realistic and +concurrent workload of the [Lean](https://github.com/leanprover/lean) +theorem prover compiling its own standard library, and there is a 7% +speedup over _tcmalloc_. This is quite significant: if Lean spends 20% of its time in the allocator that means that _mimalloc_ is 1.3× faster than _tcmalloc_ here. (This is surprising as that is not measured in a pure @@ -367,19 +390,23 @@ outsized improvement here because _mimalloc_ has better locality in the allocation which improves performance for the *other* computations in a program as well). -The _redis_ benchmark shows more differences between the allocators where -_mimalloc_ is 14\% faster than _jemalloc_. On this benchmark _tbb_ (and _Hoard_) do -not do well and are over 40\% slower. +The single threaded _redis_ benchmark again show that most allocators do well on such workloads where _tcmalloc_ +did best this time. -The _larson_ server workload allocates and frees objects between -many threads. Larson and Krishnan \[2] observe this -behavior (which they call _bleeding_) in actual server applications, and the -benchmark simulates this. -Here, _mimalloc_ is more than 2.5× faster than _tcmalloc_ and _jemalloc_ -due to the object migration between different threads. This is a difficult -benchmark for other allocators too where _mimalloc_ is still 48% faster than the next -fastest (_snmalloc_). +The _larsonN_ server benchmark by Larson and Krishnan \[2] allocates and frees between threads. They observed this +behavior (which they call _bleeding_) in actual server applications, and the benchmark simulates this. +Here, _mimalloc_ is quite a bit faster than _tcmalloc_ and _jemalloc_ probably due to the object migration between different threads. +The _mstressN_ workload performs many allocations and re-allocations, +and migrates objects between threads (as in _larsonN_). However, it also +creates and destroys the _N_ worker threads a few times keeping some objects +alive beyond the life time of the allocating thread. We observed this +behavior in many larger server applications. + +The [_rptestN_](https://github.com/mjansson/rpmalloc-benchmark) benchmark +by Mattias Jansson is a allocator test originally designed +for _rpmalloc_, and tries to simulate realistic allocation patterns over +multiple threads. Here the differences between allocators become more apparent. The second benchmark set tests specific aspects of the allocators and shows even more extreme differences between them. @@ -388,46 +415,62 @@ The _alloc-test_, by [OLogN Technologies AG](http://ithare.com/testing-memory-allocators-ptmalloc2-tcmalloc-hoard-jemalloc-while-trying-to-simulate-real-world-loads/), is a very allocation intensive benchmark doing millions of allocations in various size classes. The test is scaled such that when an allocator performs almost identically on _alloc-test1_ as _alloc-testN_ it -means that it scales linearly. Here, _tcmalloc_, _snmalloc_, and -_Hoard_ seem to scale less well and do more than 10% worse on the -multi-core version. Even the best allocators (_tcmalloc_ and _jemalloc_) are -more than 10% slower as _mimalloc_ here. +means that it scales linearly. Here, _tcmalloc_, and +_Hoard_ seem to scale less well and do more than 10% worse on the multi-core version. Even the best industrial +allocators (_tcmalloc_, _jemalloc_, and _tbb_) are more than 10% slower as _mimalloc_ here. The _sh6bench_ and _sh8bench_ benchmarks are developed by [MicroQuill](http://www.microquill.com/) as part of SmartHeap. In _sh6bench_ _mimalloc_ does much -better than the others (more than 2× faster than _jemalloc_). +better than the others (more than 1.5× faster than _jemalloc_). We cannot explain this well but believe it is caused in part by the "reverse" free-ing pattern in _sh6bench_. -Again in _sh8bench_ the _mimalloc_ allocator handles object migration -between threads much better and is over 36% faster than the next best -allocator, _snmalloc_. Whereas _tcmalloc_ did well on _sh6bench_, the -addition of object migration caused it to be almost 3 times slower -than before. +The _sh8bench_ is a variation with object migration +between threads; whereas _tcmalloc_ did well on _sh6bench_, the addition of object migration causes it to be 10× slower than before. -The _xmalloc-testN_ benchmark by Lever and Boreham \[5] and Christian Eder, -simulates an asymmetric workload where -some threads only allocate, and others only free. The _snmalloc_ -allocator was especially developed to handle this case well as it -often occurs in concurrent message passing systems (like the [Pony] language -for which _snmalloc_ was initially developed). Here we see that +The _xmalloc-testN_ benchmark by Lever and Boreham \[5] and Christian Eder, simulates an asymmetric workload where +some threads only allocate, and others only free -- they observed this pattern in +larger server applications. Here we see that the _mimalloc_ technique of having non-contended sharded thread free -lists pays off as it even outperforms _snmalloc_ here. -Only _jemalloc_ also handles this reasonably well, while the -others underperform by a large margin. +lists pays off as it outperforms others by a very large margin. Only _rpmalloc_ and _tbb_ also scale well on this benchmark. -The _cache-scratch_ benchmark by Emery Berger \[1], and introduced with the Hoard -allocator to test for _passive-false_ sharing of cache lines. With a single thread they all +The _cache-scratch_ benchmark by Emery Berger \[1], and introduced with +the Hoard allocator to test for _passive-false_ sharing of cache lines. +With a single thread they all perform the same, but when running with multiple threads the potential allocator -induced false sharing of the cache lines causes large run-time -differences, where _mimalloc_ is more than 18× faster than _jemalloc_ and -_tcmalloc_! Crundal \[6] describes in detail why the false cache line -sharing occurs in the _tcmalloc_ design, and also discusses how this +induced false sharing of the cache lines can cause large run-time differences. +Crundal \[6] describes in detail why the false cache line sharing occurs in the _tcmalloc_ design, and also discusses how this can be avoided with some small implementation changes. -Only _snmalloc_ and _tbb_ also avoid the -cache line sharing like _mimalloc_. Kukanov and Voss \[7] describe in detail +Only the _tbb_, _rpmalloc_ and _mesh_ allocators also avoid the +cache line sharing completely, while _Hoard_ and _glibc_ seem to mitigate +the effects. Kukanov and Voss \[7] describe in detail how the design of _tbb_ avoids the false cache line sharing. +## On 24-core AMD Epyc + +For completeness, here are the results on a +[r5a.12xlarge](https://aws.amazon.com/ec2/instance-types/#Memory_Optimized) instance +having a 48 processor AMD Epyc 7000 at 2.5GHz with 384GiB of memory. +The results are similar to the Intel results but it is interesting to +see the differences in the _larsonN_, _mstressN_, and _xmalloc-testN_ benchmarks. + + + + + +## Peak Working Set + +The following figure shows the peak working set (rss) of the allocators +on the benchmarks (on the c5.18xlarge instance). + + + + +Note that the _xmalloc-testN_ memory usage should be disregarded as it +allocates more the faster the program runs. Similarly, memory usage of +_mstressN_, _rptestN_ and _sh8bench_ can vary depending on scheduling and +speed. Nevertheless, even though _mimalloc_ is fast on these benchmarks we +believe the memory usage is too high and hope to improve. # References @@ -437,14 +480,12 @@ how the design of _tbb_ avoids the false cache line sharing. the Ninth International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS-IX). Cambridge, MA, November 2000. [pdf](http://www.cs.utexas.edu/users/mckinley/papers/asplos-2000.pdf) - -- \[2] P. Larson and M. Krishnan. _Memory allocation for long-running server applications_. In ISMM, Vancouver, B.C., Canada, 1998. - [pdf](http://citeseer.ist.psu.edu/viewdoc/download;jsessionid=5F0BFB4F57832AEB6C11BF8257271088?doi=10.1.1.45.1947&rep=rep1&type=pdf) +- \[2] P. Larson and M. Krishnan. _Memory allocation for long-running server applications_. + In ISMM, Vancouver, B.C., Canada, 1998. [pdf](http://citeseer.ist.psu.edu/viewdoc/download?doi=10.1.1.45.1947&rep=rep1&type=pdf) - \[3] D. Grunwald, B. Zorn, and R. Henderson. _Improving the cache locality of memory allocation_. In R. Cartwright, editor, - Proceedings of the Conference on Programming Language Design and Implementation, pages 177–186, New York, NY, USA, June 1993. - [pdf](http://citeseer.ist.psu.edu/viewdoc/download?doi=10.1.1.43.6621&rep=rep1&type=pdf) + Proceedings of the Conference on Programming Language Design and Implementation, pages 177–186, New York, NY, USA, June 1993. [pdf](http://citeseer.ist.psu.edu/viewdoc/download?doi=10.1.1.43.6621&rep=rep1&type=pdf) - \[4] J. Barnes and P. Hut. _A hierarchical O(n*log(n)) force-calculation algorithm_. Nature, 324:446-449, 1986. @@ -452,17 +493,22 @@ how the design of _tbb_ avoids the false cache line sharing. In USENIX Annual Technical Conference, Freenix Session. San Diego, CA. Jun. 2000. Available at -- \[6] Timothy Crundal. _Reducing Active-False Sharing in TCMalloc._ - 2016. . CS16S1 project at the Australian National University. +- \[6] Timothy Crundal. _Reducing Active-False Sharing in TCMalloc_. 2016. CS16S1 project at the Australian National University. [pdf](http://courses.cecs.anu.edu.au/courses/CSPROJECTS/16S1/Reports/Timothy_Crundal_Report.pdf) - \[7] Alexey Kukanov, and Michael J Voss. _The Foundations for Scalable Multi-Core Software in Intel Threading Building Blocks._ Intel Technology Journal 11 (4). 2007 -- \[8] Paul Liétar, Theodore Butler, Sylvan Clebsch, Sophia Drossopoulou, Juliana Franco, Matthew J Parkinson, +- \[8] Bobby Powers, David Tench, Emery D. Berger, and Andrew McGregor. + _Mesh: Compacting Memory Management for C/C++_ + In Proceedings of the 40th ACM SIGPLAN Conference on Programming Language Design and Implementation (PLDI'19), June 2019, pages 333-–346. + + # Contributing diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 5a59a63a..55b0e041 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -79,7 +79,7 @@ mi_decl_allocator void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, siz mi_decl_allocator void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { size_t total; - if (mi_mul_overflow(count, size, &total)) return NULL; + if (mi_count_size_overflow(count, size, &total)) return NULL; return mi_heap_zalloc_aligned_at(heap, total, alignment, offset); } @@ -168,13 +168,13 @@ mi_decl_allocator void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_ mi_decl_allocator void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { size_t total; - if (mi_mul_overflow(newcount, size, &total)) return NULL; + if (mi_count_size_overflow(newcount, size, &total)) return NULL; return mi_heap_rezalloc_aligned_at(heap, p, total, alignment, offset); } mi_decl_allocator void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept { size_t total; - if (mi_mul_overflow(newcount, size, &total)) return NULL; + if (mi_count_size_overflow(newcount, size, &total)) return NULL; return mi_heap_rezalloc_aligned(heap, p, total, alignment); } diff --git a/src/alloc-override.c b/src/alloc-override.c index 002374bb..89c5126a 100644 --- a/src/alloc-override.c +++ b/src/alloc-override.c @@ -98,7 +98,7 @@ terms of the MIT license. A copy of the license can be found in the file void operator delete[](void* p, std::size_t n) MI_FORWARD02(mi_free_size,p,n); #endif - #if (__cplusplus > 201402L || defined(__cpp_aligned_new)) + #if (__cplusplus > 201402L || defined(__cpp_aligned_new)) && (!defined(__GNUC__) || (__GNUC__ > 5)) void operator delete (void* p, std::align_val_t al) noexcept { mi_free_aligned(p, static_cast(al)); } void operator delete[](void* p, std::align_val_t al) noexcept { mi_free_aligned(p, static_cast(al)); } void operator delete (void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast(al)); }; diff --git a/src/alloc.c b/src/alloc.c index e68b48d2..3f577f2f 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -21,8 +21,8 @@ terms of the MIT license. A copy of the license can be found in the file // Fast allocation in a page: just pop from the free list. // Fall back to generic allocation only if the list is empty. -extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept { - mi_assert_internal(page->block_size==0||page->block_size >= size); +extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept { + mi_assert_internal(page->xblock_size==0||mi_page_block_size(page) >= size); mi_block_t* block = page->free; if (mi_unlikely(block == NULL)) { return _mi_malloc_generic(heap, size); // slow path @@ -92,18 +92,18 @@ extern inline mi_decl_allocator void* mi_malloc(size_t size) mi_attr_noexcept { void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size) { // note: we need to initialize the whole block to zero, not just size // or the recalloc/rezalloc functions cannot safely expand in place (see issue #63) - UNUSED(size); + UNUSED_RELEASE(size); mi_assert_internal(p != NULL); - mi_assert_internal(size > 0 && page->block_size >= size); + mi_assert_internal(mi_page_block_size(page) >= size); // size can be zero mi_assert_internal(_mi_ptr_page(p)==page); if (page->is_zero) { // already zero initialized memory? ((mi_block_t*)p)->next = 0; // clear the free list pointer - mi_assert_expensive(mi_mem_is_zero(p,page->block_size)); + mi_assert_expensive(mi_mem_is_zero(p, mi_page_block_size(page))); } else { // otherwise memset - memset(p, 0, page->block_size); + memset(p, 0, mi_page_block_size(page)); } } @@ -125,7 +125,7 @@ mi_decl_allocator void* mi_zalloc(size_t size) mi_attr_noexcept { // ------------------------------------------------------ -// Check for double free in secure and debug mode +// Check for double free in secure and debug mode // This is somewhat expensive so only enabled for secure mode 4 // ------------------------------------------------------ @@ -139,32 +139,28 @@ static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, cons return false; } -static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block, const mi_block_t* n) { - size_t psize; - uint8_t* pstart = _mi_page_start(_mi_page_segment(page), page, &psize); - if (n == NULL || ((uint8_t*)n >= pstart && (uint8_t*)n < (pstart + psize))) { - // Suspicious: the decoded value is in the same page (or NULL). - // Walk the free lists to verify positively if it is already freed - if (mi_list_contains(page, page->free, block) || - mi_list_contains(page, page->local_free, block) || - mi_list_contains(page, (const mi_block_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&page->thread_free)), block)) - { - _mi_fatal_error("double free detected of block %p with size %zu\n", block, page->block_size); - return true; - } +static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block) { + // The decoded value is in the same page (or NULL). + // Walk the free lists to verify positively if it is already freed + if (mi_list_contains(page, page->free, block) || + mi_list_contains(page, page->local_free, block) || + mi_list_contains(page, mi_page_thread_free(page), block)) + { + _mi_error_message(EAGAIN, "double free detected of block %p with size %zu\n", block, mi_page_block_size(page)); + return true; } return false; } static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) { - mi_block_t* n = mi_block_nextx(page, block, page->cookie); // pretend it is freed, and get the decoded first field - if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer? - (n==NULL || mi_is_in_same_segment(block, n))) // quick check: in same segment or NULL? - { - // Suspicous: decoded value in block is in the same segment (or NULL) -- maybe a double free? + mi_block_t* n = mi_block_nextx(page, block, page->key[0], page->key[1]); // pretend it is freed, and get the decoded first field + if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer? + (n==NULL || mi_is_in_same_page(block, n))) // quick check: in same page or NULL? + { + // Suspicous: decoded value a in block is in the same page (or NULL) -- maybe a double free? // (continue in separate function to improve code generation) - return mi_check_is_double_freex(page, block, n); - } + return mi_check_is_double_freex(page, block); + } return false; } #else @@ -180,44 +176,50 @@ static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block // Free // ------------------------------------------------------ +// free huge block from another thread +static mi_decl_noinline void mi_free_huge_block_mt(mi_segment_t* segment, mi_page_t* page, mi_block_t* block) { + // huge page segments are always abandoned and can be freed immediately + mi_assert_internal(segment->page_kind==MI_PAGE_HUGE); + mi_assert_internal(segment == _mi_page_segment(page)); + mi_assert_internal(mi_atomic_read_relaxed(&segment->thread_id)==0); + + // claim it and free + mi_heap_t* heap = mi_get_default_heap(); + // paranoia: if this it the last reference, the cas should always succeed + if (mi_atomic_cas_strong(&segment->thread_id, heap->thread_id, 0)) { + mi_block_set_next(page, block, page->free); + page->free = block; + page->used--; + page->is_zero = false; + mi_assert(page->used == 0); + mi_tld_t* tld = heap->tld; + const size_t bsize = mi_page_block_size(page); + if (bsize > MI_HUGE_OBJ_SIZE_MAX) { + _mi_stat_decrease(&tld->stats.giant, bsize); + } + else { + _mi_stat_decrease(&tld->stats.huge, bsize); + } + _mi_segment_page_free(page, true, &tld->segments); + } +} + // multi-threaded free static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* block) { - mi_thread_free_t tfree; - mi_thread_free_t tfreex; - bool use_delayed; - + // huge page segments are always abandoned and can be freed immediately mi_segment_t* segment = _mi_page_segment(page); if (segment->page_kind==MI_PAGE_HUGE) { - // huge page segments are always abandoned and can be freed immediately - mi_assert_internal(mi_atomic_read_relaxed(&segment->thread_id)==0); - mi_assert_internal(mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&segment->abandoned_next))==NULL); - // claim it and free - mi_heap_t* heap = mi_get_default_heap(); - // paranoia: if this it the last reference, the cas should always succeed - if (mi_atomic_cas_strong(&segment->thread_id,heap->thread_id,0)) { - mi_block_set_next(page, block, page->free); - page->free = block; - page->used--; - page->is_zero = false; - mi_assert(page->used == 0); - mi_tld_t* tld = heap->tld; - if (page->block_size > MI_HUGE_OBJ_SIZE_MAX) { - _mi_stat_decrease(&tld->stats.giant, page->block_size); - } - else { - _mi_stat_decrease(&tld->stats.huge, page->block_size); - } - _mi_segment_page_free(page,true,&tld->segments); - } + mi_free_huge_block_mt(segment, page, block); return; } + mi_thread_free_t tfree; + mi_thread_free_t tfreex; + bool use_delayed; do { - tfree = page->thread_free; - use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE || - (mi_tf_delayed(tfree) == MI_NO_DELAYED_FREE && page->used == mi_atomic_read_relaxed(&page->thread_freed)+1) // data-race but ok, just optimizes early release of the page - ); + tfree = mi_atomic_read_relaxed(&page->xthread_free); + use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE); if (mi_unlikely(use_delayed)) { // unlikely: this only happens on the first concurrent free in a page that is in the full list tfreex = mi_tf_set_delayed(tfree,MI_DELAYED_FREEING); @@ -227,31 +229,27 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc mi_block_set_next(page, block, mi_tf_block(tfree)); tfreex = mi_tf_set_block(tfree,block); } - } while (!mi_atomic_cas_weak(mi_atomic_cast(uintptr_t,&page->thread_free), tfreex, tfree)); + } while (!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree)); - if (mi_likely(!use_delayed)) { - // increment the thread free count and return - mi_atomic_increment(&page->thread_freed); - } - else { + if (mi_unlikely(use_delayed)) { // racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`) - mi_heap_t* heap = (mi_heap_t*)mi_atomic_read_ptr(mi_atomic_cast(void*, &page->heap)); + mi_heap_t* heap = mi_page_heap(page); mi_assert_internal(heap != NULL); if (heap != NULL) { // add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity) mi_block_t* dfree; do { - dfree = (mi_block_t*)heap->thread_delayed_free; - mi_block_set_nextx(heap,block,dfree, heap->cookie); - } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), block, dfree)); + dfree = mi_atomic_read_ptr_relaxed(mi_block_t,&heap->thread_delayed_free); + mi_block_set_nextx(heap,block,dfree, heap->key[0], heap->key[1]); + } while (!mi_atomic_cas_ptr_weak(mi_block_t,&heap->thread_delayed_free, block, dfree)); } // and reset the MI_DELAYED_FREEING flag do { - tfreex = tfree = page->thread_free; - mi_assert_internal(mi_tf_delayed(tfree) == MI_NEVER_DELAYED_FREE || mi_tf_delayed(tfree) == MI_DELAYED_FREEING); - if (mi_tf_delayed(tfree) != MI_NEVER_DELAYED_FREE) tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE); - } while (!mi_atomic_cas_weak(mi_atomic_cast(uintptr_t,&page->thread_free), tfreex, tfree)); + tfreex = tfree = mi_atomic_read_relaxed(&page->xthread_free); + mi_assert_internal(mi_tf_delayed(tfree) == MI_DELAYED_FREEING); + tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE); + } while (!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree)); } } @@ -260,13 +258,13 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block) { #if (MI_DEBUG) - memset(block, MI_DEBUG_FREED, page->block_size); + memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); #endif // and push it on the free list if (mi_likely(local)) { // owning thread can free a block directly - if (mi_check_is_double_free(page, block)) return; + if (mi_unlikely(mi_check_is_double_free(page, block))) return; mi_block_set_next(page, block, page->local_free); page->local_free = block; page->used--; @@ -287,12 +285,13 @@ static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p) { mi_assert_internal(page!=NULL && p!=NULL); size_t diff = (uint8_t*)p - _mi_page_start(segment, page, NULL); - size_t adjust = (diff % page->block_size); + size_t adjust = (diff % mi_page_block_size(page)); return (mi_block_t*)((uintptr_t)p - adjust); } -static void mi_decl_noinline mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool local, void* p) { +static void mi_decl_noinline mi_free_generic(const mi_segment_t* segment, bool local, void* p) { + mi_page_t* page = _mi_segment_page_of(segment, p); mi_block_t* block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p); _mi_free_block(page, local, block); } @@ -302,7 +301,7 @@ void mi_free(void* p) mi_attr_noexcept { #if (MI_DEBUG>0) if (mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0)) { - _mi_error_message("trying to free an invalid (unaligned) pointer: %p\n", p); + _mi_error_message(EINVAL, "trying to free an invalid (unaligned) pointer: %p\n", p); return; } #endif @@ -312,16 +311,16 @@ void mi_free(void* p) mi_attr_noexcept #if (MI_DEBUG!=0) if (mi_unlikely(!mi_is_in_heap_region(p))) { - _mi_warning_message("possibly trying to free a pointer that does not point to a valid heap region: 0x%p\n" + _mi_warning_message("possibly trying to free a pointer that does not point to a valid heap region: %p\n" "(this may still be a valid very large allocation (over 64MiB))\n", p); if (mi_likely(_mi_ptr_cookie(segment) == segment->cookie)) { - _mi_warning_message("(yes, the previous pointer 0x%p was valid after all)\n", p); + _mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p); } } #endif #if (MI_DEBUG!=0 || MI_SECURE>=4) if (mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie)) { - _mi_error_message("trying to free a pointer that does not point to a valid heap space: %p\n", p); + _mi_error_message(EINVAL, "trying to free a pointer that does not point to a valid heap space: %p\n", p); return; } #endif @@ -332,24 +331,27 @@ void mi_free(void* p) mi_attr_noexcept #if (MI_STAT>1) mi_heap_t* heap = mi_heap_get_default(); mi_heap_stat_decrease(heap, malloc, mi_usable_size(p)); - if (page->block_size <= MI_LARGE_OBJ_SIZE_MAX) { - mi_heap_stat_decrease(heap, normal[_mi_bin(page->block_size)], 1); + if (page->xblock_size <= MI_LARGE_OBJ_SIZE_MAX) { + mi_heap_stat_decrease(heap, normal[_mi_bin(page->xblock_size)], 1); } // huge page stat is accounted for in `_mi_page_retire` #endif if (mi_likely(tid == segment->thread_id && page->flags.full_aligned == 0)) { // the thread id matches and it is not a full page, nor has aligned blocks // local, and not full or aligned - mi_block_t* block = (mi_block_t*)p; - if (mi_check_is_double_free(page,block)) return; + mi_block_t* const block = (mi_block_t*)p; + if (mi_unlikely(mi_check_is_double_free(page,block))) return; mi_block_set_next(page, block, page->local_free); page->local_free = block; page->used--; - if (mi_unlikely(mi_page_all_free(page))) { _mi_page_retire(page); } + if (mi_unlikely(mi_page_all_free(page))) { + _mi_page_retire(page); + } } else { // non-local, aligned blocks, or a full page; use the more generic path - mi_free_generic(segment, page, tid == segment->thread_id, p); + // note: recalc page in generic to improve code generation + mi_free_generic(segment, tid == segment->thread_id, p); } } @@ -359,13 +361,19 @@ bool _mi_free_delayed_block(mi_block_t* block) { mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); mi_assert_internal(_mi_thread_id() == segment->thread_id); mi_page_t* page = _mi_segment_page_of(segment, block); - if (mi_tf_delayed(page->thread_free) == MI_DELAYED_FREEING) { - // we might already start delayed freeing while another thread has not yet - // reset the delayed_freeing flag; in that case don't free it quite yet if - // this is the last block remaining. - if (page->used - page->thread_freed == 1) return false; - } - _mi_free_block(page,true,block); + + // Clear the no-delayed flag so delayed freeing is used again for this page. + // This must be done before collecting the free lists on this page -- otherwise + // some blocks may end up in the page `thread_free` list with no blocks in the + // heap `thread_delayed_free` list which may cause the page to be never freed! + // (it would only be freed if we happen to scan it in `mi_page_queue_find_free_ex`) + _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, false /* dont overwrite never delayed */); + + // collect all other non-local frees to ensure up-to-date `used` count + _mi_page_free_collect(page, false); + + // and free the block (possibly freeing the page as well since used is updated) + _mi_free_block(page, true, block); return true; } @@ -374,7 +382,7 @@ size_t mi_usable_size(const void* p) mi_attr_noexcept { if (p==NULL) return 0; const mi_segment_t* segment = _mi_ptr_segment(p); const mi_page_t* page = _mi_segment_page_of(segment,p); - size_t size = page->block_size; + size_t size = mi_page_block_size(page); if (mi_unlikely(mi_page_has_aligned(page))) { ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)_mi_page_ptr_unalign(segment,page,p); mi_assert_internal(adjust >= 0 && (size_t)adjust <= size); @@ -426,7 +434,7 @@ void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept { extern inline mi_decl_allocator void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept { size_t total; - if (mi_mul_overflow(count,size,&total)) return NULL; + if (mi_count_size_overflow(count,size,&total)) return NULL; return mi_heap_zalloc(heap,total); } @@ -437,7 +445,7 @@ mi_decl_allocator void* mi_calloc(size_t count, size_t size) mi_attr_noexcept { // Uninitialized `calloc` extern mi_decl_allocator void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept { size_t total; - if (mi_mul_overflow(count,size,&total)) return NULL; + if (mi_count_size_overflow(count, size, &total)) return NULL; return mi_heap_malloc(heap, total); } @@ -478,7 +486,7 @@ mi_decl_allocator void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize mi_decl_allocator void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept { size_t total; - if (mi_mul_overflow(count, size, &total)) return NULL; + if (mi_count_size_overflow(count, size, &total)) return NULL; return mi_heap_realloc(heap, p, total); } @@ -496,7 +504,7 @@ mi_decl_allocator void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsiz mi_decl_allocator void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept { size_t total; - if (mi_mul_overflow(count, size, &total)) return NULL; + if (mi_count_size_overflow(count, size, &total)) return NULL; return mi_heap_rezalloc(heap, p, total); } @@ -564,7 +572,6 @@ char* mi_strndup(const char* s, size_t n) mi_attr_noexcept { #define PATH_MAX MAX_PATH #endif #include -#include char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept { // todo: use GetFullPathNameW to allow longer file names char buf[PATH_MAX]; @@ -639,10 +646,6 @@ static bool mi_try_new_handler(bool nothrow) { } } #else -#include -#ifndef ENOMEM -#define ENOMEM 12 -#endif typedef void (*std_new_handler_t)(); #if (defined(__GNUC__) || defined(__clang__)) @@ -662,7 +665,7 @@ std_new_handler_t mi_get_new_handler() { static bool mi_try_new_handler(bool nothrow) { std_new_handler_t h = mi_get_new_handler(); if (h==NULL) { - if (!nothrow) exit(ENOMEM); + if (!nothrow) exit(ENOMEM); // cannot throw in plain C, use exit as we are out of memory anyway. return false; } else { @@ -672,36 +675,70 @@ static bool mi_try_new_handler(bool nothrow) { } #endif -static mi_decl_noinline void* mi_try_new(size_t n, bool nothrow ) { +static mi_decl_noinline void* mi_try_new(size_t size, bool nothrow ) { void* p = NULL; while(p == NULL && mi_try_new_handler(nothrow)) { - p = mi_malloc(n); + p = mi_malloc(size); } return p; } -void* mi_new(size_t n) { - void* p = mi_malloc(n); - if (mi_unlikely(p == NULL)) return mi_try_new(n,false); +void* mi_new(size_t size) { + void* p = mi_malloc(size); + if (mi_unlikely(p == NULL)) return mi_try_new(size,false); return p; } -void* mi_new_aligned(size_t n, size_t alignment) { +void* mi_new_nothrow(size_t size) { + void* p = mi_malloc(size); + if (mi_unlikely(p == NULL)) return mi_try_new(size, true); + return p; +} + +void* mi_new_aligned(size_t size, size_t alignment) { void* p; - do { p = mi_malloc_aligned(n, alignment); } + do { + p = mi_malloc_aligned(size, alignment); + } while(p == NULL && mi_try_new_handler(false)); return p; } -void* mi_new_nothrow(size_t n) { - void* p = mi_malloc(n); - if (mi_unlikely(p == NULL)) return mi_try_new(n,true); +void* mi_new_aligned_nothrow(size_t size, size_t alignment) { + void* p; + do { + p = mi_malloc_aligned(size, alignment); + } + while(p == NULL && mi_try_new_handler(true)); return p; } -void* mi_new_aligned_nothrow(size_t n, size_t alignment) { - void* p; - do { p = mi_malloc_aligned(n, alignment); } - while (p == NULL && mi_try_new_handler(true)); - return p; +void* mi_new_n(size_t count, size_t size) { + size_t total; + if (mi_unlikely(mi_count_size_overflow(count, size, &total))) { + mi_try_new_handler(false); // on overflow we invoke the try_new_handler once to potentially throw std::bad_alloc + return NULL; + } + else { + return mi_new(total); + } +} + +void* mi_new_realloc(void* p, size_t newsize) { + void* q; + do { + q = mi_realloc(p, newsize); + } while (q == NULL && mi_try_new_handler(false)); + return q; +} + +void* mi_new_reallocn(void* p, size_t newcount, size_t size) { + size_t total; + if (mi_unlikely(mi_count_size_overflow(newcount, size, &total))) { + mi_try_new_handler(false); // on overflow we invoke the try_new_handler once to potentially throw std::bad_alloc + return NULL; + } + else { + return mi_new_realloc(p, total); + } } diff --git a/src/arena.c b/src/arena.c new file mode 100644 index 00000000..7bf8099b --- /dev/null +++ b/src/arena.c @@ -0,0 +1,356 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2019, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +/* ---------------------------------------------------------------------------- +"Arenas" are fixed area's of OS memory from which we can allocate +large blocks (>= MI_ARENA_BLOCK_SIZE, 32MiB). +In contrast to the rest of mimalloc, the arenas are shared between +threads and need to be accessed using atomic operations. + +Currently arenas are only used to for huge OS page (1GiB) reservations, +otherwise it delegates to direct allocation from the OS. +In the future, we can expose an API to manually add more kinds of arenas +which is sometimes needed for embedded devices or shared memory for example. +(We can also employ this with WASI or `sbrk` systems to reserve large arenas + on demand and be able to reuse them efficiently). + +The arena allocation needs to be thread safe and we use an atomic +bitmap to allocate. The current implementation of the bitmap can +only do this within a field (`uintptr_t`) so we can allocate at most +blocks of 2GiB (64*32MiB) and no object can cross the boundary. This +can lead to fragmentation but fortunately most objects will be regions +of 256MiB in practice. +-----------------------------------------------------------------------------*/ +#include "mimalloc.h" +#include "mimalloc-internal.h" +#include "mimalloc-atomic.h" + +#include // memset + +#include "bitmap.inc.c" // atomic bitmap + + +// os.c +void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld); +void _mi_os_free(void* p, size_t size, mi_stats_t* stats); + +void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize); +void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats); + +bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); + +/* ----------------------------------------------------------- + Arena allocation +----------------------------------------------------------- */ + +#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE +#define MI_ARENA_BLOCK_SIZE (8*MI_SEGMENT_ALIGN) // 32MiB +#define MI_ARENA_MAX_OBJ_SIZE (MI_BITMAP_FIELD_BITS * MI_ARENA_BLOCK_SIZE) // 2GiB +#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 16MiB +#define MI_MAX_ARENAS (64) // not more than 256 (since we use 8 bits in the memid) + +// A memory arena descriptor +typedef struct mi_arena_s { + _Atomic(uint8_t*) start; // the start of the memory area + size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) + size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`) + int numa_node; // associated NUMA node + bool is_zero_init; // is the arena zero initialized? + bool is_committed; // is the memory committed + bool is_large; // large OS page allocated + volatile _Atomic(uintptr_t) search_idx; // optimization to start the search for free blocks + mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? + mi_bitmap_field_t* blocks_committed; // if `!is_committed`, are the blocks committed? + mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`) +} mi_arena_t; + + +// The available arenas +static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS]; +static mi_decl_cache_align _Atomic(uintptr_t) mi_arena_count; // = 0 + + +/* ----------------------------------------------------------- + Arena allocations get a memory id where the lower 8 bits are + the arena index +1, and the upper bits the block index. +----------------------------------------------------------- */ + +// Use `0` as a special id for direct OS allocated memory. +#define MI_MEMID_OS 0 + +static size_t mi_arena_id_create(size_t arena_index, mi_bitmap_index_t bitmap_index) { + mi_assert_internal(arena_index < 0xFE); + mi_assert_internal(((bitmap_index << 8) >> 8) == bitmap_index); // no overflow? + return ((bitmap_index << 8) | ((arena_index+1) & 0xFF)); +} + +static void mi_arena_id_indices(size_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) { + mi_assert_internal(memid != MI_MEMID_OS); + *arena_index = (memid & 0xFF) - 1; + *bitmap_index = (memid >> 8); +} + +static size_t mi_block_count_of_size(size_t size) { + return _mi_divide_up(size, MI_ARENA_BLOCK_SIZE); +} + +/* ----------------------------------------------------------- + Thread safe allocation in an arena +----------------------------------------------------------- */ +static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx) +{ + const size_t fcount = arena->field_count; + size_t idx = mi_atomic_read(&arena->search_idx); // start from last search + for (size_t visited = 0; visited < fcount; visited++, idx++) { + if (idx >= fcount) idx = 0; // wrap around + // try to atomically claim a range of bits + if (mi_bitmap_try_find_claim_field(arena->blocks_inuse, idx, blocks, bitmap_idx)) { + mi_atomic_write(&arena->search_idx, idx); // start search from here next time + return true; + } + } + return false; +} + + +/* ----------------------------------------------------------- + Arena Allocation +----------------------------------------------------------- */ + +static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, + bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) +{ + mi_bitmap_index_t bitmap_index; + if (!mi_arena_alloc(arena, needed_bcount, &bitmap_index)) return NULL; + + // claimed it! set the dirty bits (todo: no need for an atomic op here?) + void* p = arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE); + *memid = mi_arena_id_create(arena_index, bitmap_index); + *is_zero = mi_bitmap_claim(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL); + *large = arena->is_large; + if (arena->is_committed) { + // always committed + *commit = true; + } + else if (*commit) { + // arena not committed as a whole, but commit requested: ensure commit now + bool any_uncommitted; + mi_bitmap_claim(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted); + if (any_uncommitted) { + bool commit_zero; + _mi_os_commit(p, needed_bcount * MI_ARENA_BLOCK_SIZE, &commit_zero, tld->stats); + if (commit_zero) *is_zero = true; + } + } + else { + // no need to commit, but check if already fully committed + *commit = mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index); + } + return p; +} + +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, + bool* commit, bool* large, bool* is_zero, + size_t* memid, mi_os_tld_t* tld) +{ + mi_assert_internal(commit != NULL && large != NULL && is_zero != NULL && memid != NULL && tld != NULL); + mi_assert_internal(size > 0); + *memid = MI_MEMID_OS; + *is_zero = false; + + // try to allocate in an arena if the alignment is small enough + // and the object is not too large or too small. + if (alignment <= MI_SEGMENT_ALIGN && + size <= MI_ARENA_MAX_OBJ_SIZE && + size >= MI_ARENA_MIN_OBJ_SIZE) + { + const size_t bcount = mi_block_count_of_size(size); + const int numa_node = _mi_os_numa_node(tld); // current numa node + + mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE); + // try numa affine allocation + for (size_t i = 0; i < MI_MAX_ARENAS; i++) { + mi_arena_t* arena = mi_atomic_read_ptr_relaxed(mi_arena_t, &mi_arenas[i]); + if (arena==NULL) break; // end reached + if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local? + (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages + { + void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid, tld); + mi_assert_internal((uintptr_t)p % alignment == 0); + if (p != NULL) return p; + } + } + // try from another numa node instead.. + for (size_t i = 0; i < MI_MAX_ARENAS; i++) { + mi_arena_t* arena = mi_atomic_read_ptr_relaxed(mi_arena_t, &mi_arenas[i]); + if (arena==NULL) break; // end reached + if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local! + (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages + { + void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid, tld); + mi_assert_internal((uintptr_t)p % alignment == 0); + if (p != NULL) return p; + } + } + } + + // finally, fall back to the OS + *is_zero = true; + *memid = MI_MEMID_OS; + return _mi_os_alloc_aligned(size, alignment, *commit, large, tld); +} + +void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) +{ + return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, commit, large, is_zero, memid, tld); +} + +/* ----------------------------------------------------------- + Arena free +----------------------------------------------------------- */ + +void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) { + mi_assert_internal(size > 0 && stats != NULL); + if (p==NULL) return; + if (size==0) return; + if (memid == MI_MEMID_OS) { + // was a direct OS allocation, pass through + _mi_os_free(p, size, stats); + } + else { + // allocated in an arena + size_t arena_idx; + size_t bitmap_idx; + mi_arena_id_indices(memid, &arena_idx, &bitmap_idx); + mi_assert_internal(arena_idx < MI_MAX_ARENAS); + mi_arena_t* arena = mi_atomic_read_ptr_relaxed(mi_arena_t,&mi_arenas[arena_idx]); + mi_assert_internal(arena != NULL); + if (arena == NULL) { + _mi_error_message(EINVAL, "trying to free from non-existent arena: %p, size %zu, memid: 0x%zx\n", p, size, memid); + return; + } + mi_assert_internal(arena->field_count > mi_bitmap_index_field(bitmap_idx)); + if (arena->field_count <= mi_bitmap_index_field(bitmap_idx)) { + _mi_error_message(EINVAL, "trying to free from non-existent arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid); + return; + } + const size_t blocks = mi_block_count_of_size(size); + bool ones = mi_bitmap_unclaim(arena->blocks_inuse, arena->field_count, blocks, bitmap_idx); + if (!ones) { + _mi_error_message(EAGAIN, "trying to free an already freed block: %p, size %zu\n", p, size); + return; + }; + } +} + +/* ----------------------------------------------------------- + Add an arena. +----------------------------------------------------------- */ + +static bool mi_arena_add(mi_arena_t* arena) { + mi_assert_internal(arena != NULL); + mi_assert_internal((uintptr_t)mi_atomic_read_ptr_relaxed(uint8_t,&arena->start) % MI_SEGMENT_ALIGN == 0); + mi_assert_internal(arena->block_count > 0); + + uintptr_t i = mi_atomic_increment(&mi_arena_count); + if (i >= MI_MAX_ARENAS) { + mi_atomic_decrement(&mi_arena_count); + return false; + } + mi_atomic_write_ptr(mi_arena_t,&mi_arenas[i], arena); + return true; +} + + +/* ----------------------------------------------------------- + Reserve a huge page arena. +----------------------------------------------------------- */ +#include // ENOMEM + +// reserve at a specific numa node +int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept { + if (pages==0) return 0; + if (numa_node < -1) numa_node = -1; + if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count(); + size_t hsize = 0; + size_t pages_reserved = 0; + void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize); + if (p==NULL || pages_reserved==0) { + _mi_warning_message("failed to reserve %zu gb huge pages\n", pages); + return ENOMEM; + } + _mi_verbose_message("reserved %zu gb huge pages on numa node %i (of the %zu gb requested)\n", pages_reserved, numa_node, pages); + + size_t bcount = mi_block_count_of_size(hsize); + size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS); + size_t asize = sizeof(mi_arena_t) + (2*fields*sizeof(mi_bitmap_field_t)); + mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS? + if (arena == NULL) { + _mi_os_free_huge_pages(p, hsize, &_mi_stats_main); + return ENOMEM; + } + arena->block_count = bcount; + arena->field_count = fields; + arena->start = (uint8_t*)p; + arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1) + arena->is_large = true; + arena->is_zero_init = true; + arena->is_committed = true; + arena->search_idx = 0; + arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap + arena->blocks_committed = NULL; + // the bitmaps are already zero initialized due to os_alloc + // just claim leftover blocks if needed + ptrdiff_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount; + mi_assert_internal(post >= 0); + if (post > 0) { + // don't use leftover bits at the end + mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post); + mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL); + } + + mi_arena_add(arena); + return 0; +} + + +// reserve huge pages evenly among the given number of numa nodes (or use the available ones as detected) +int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept { + if (pages == 0) return 0; + + // pages per numa node + size_t numa_count = (numa_nodes > 0 ? numa_nodes : _mi_os_numa_node_count()); + if (numa_count <= 0) numa_count = 1; + const size_t pages_per = pages / numa_count; + const size_t pages_mod = pages % numa_count; + const size_t timeout_per = (timeout_msecs==0 ? 0 : (timeout_msecs / numa_count) + 50); + + // reserve evenly among numa nodes + for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { + size_t node_pages = pages_per; // can be 0 + if (numa_node < pages_mod) node_pages++; + int err = mi_reserve_huge_os_pages_at(node_pages, (int)numa_node, timeout_per); + if (err) return err; + if (pages < node_pages) { + pages = 0; + } + else { + pages -= node_pages; + } + } + + return 0; +} + +int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept { + UNUSED(max_secs); + _mi_warning_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n"); + if (pages_reserved != NULL) *pages_reserved = 0; + int err = mi_reserve_huge_os_pages_interleave(pages, 0, (size_t)(max_secs * 1000.0)); + if (err==0 && pages_reserved!=NULL) *pages_reserved = pages; + return err; +} diff --git a/src/bitmap.inc.c b/src/bitmap.inc.c new file mode 100644 index 00000000..c3813a44 --- /dev/null +++ b/src/bitmap.inc.c @@ -0,0 +1,240 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2019, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +/* ---------------------------------------------------------------------------- +This file is meant to be included in other files for efficiency. +It implements a bitmap that can set/reset sequences of bits atomically +and is used to concurrently claim memory ranges. + +A bitmap is an array of fields where each field is a machine word (`uintptr_t`) + +A current limitation is that the bit sequences cannot cross fields +and that the sequence must be smaller or equal to the bits in a field. +---------------------------------------------------------------------------- */ +#pragma once +#ifndef MI_BITMAP_C +#define MI_BITMAP_C + +#include "mimalloc.h" +#include "mimalloc-internal.h" + +/* ----------------------------------------------------------- + Bitmap definition +----------------------------------------------------------- */ + +#define MI_BITMAP_FIELD_BITS (8*MI_INTPTR_SIZE) +#define MI_BITMAP_FIELD_FULL (~((uintptr_t)0)) // all bits set + +// An atomic bitmap of `uintptr_t` fields +typedef volatile _Atomic(uintptr_t) mi_bitmap_field_t; +typedef mi_bitmap_field_t* mi_bitmap_t; + +// A bitmap index is the index of the bit in a bitmap. +typedef size_t mi_bitmap_index_t; + +// Create a bit index. +static inline mi_bitmap_index_t mi_bitmap_index_create(size_t idx, size_t bitidx) { + mi_assert_internal(bitidx < MI_BITMAP_FIELD_BITS); + return (idx*MI_BITMAP_FIELD_BITS) + bitidx; +} + +// Get the field index from a bit index. +static inline size_t mi_bitmap_index_field(mi_bitmap_index_t bitmap_idx) { + return (bitmap_idx / MI_BITMAP_FIELD_BITS); +} + +// Get the bit index in a bitmap field +static inline size_t mi_bitmap_index_bit_in_field(mi_bitmap_index_t bitmap_idx) { + return (bitmap_idx % MI_BITMAP_FIELD_BITS); +} + +// Get the full bit index +static inline size_t mi_bitmap_index_bit(mi_bitmap_index_t bitmap_idx) { + return bitmap_idx; +} + + +// The bit mask for a given number of blocks at a specified bit index. +static inline uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) { + mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS); + if (count == MI_BITMAP_FIELD_BITS) return MI_BITMAP_FIELD_FULL; + return ((((uintptr_t)1 << count) - 1) << bitidx); +} + + +/* ----------------------------------------------------------- + Use bit scan forward/reverse to quickly find the first zero bit if it is available +----------------------------------------------------------- */ +#if defined(_MSC_VER) +#define MI_HAVE_BITSCAN +#include +static inline size_t mi_bsf(uintptr_t x) { + if (x==0) return 8*MI_INTPTR_SIZE; + DWORD idx; + MI_64(_BitScanForward)(&idx, x); + return idx; +} +static inline size_t mi_bsr(uintptr_t x) { + if (x==0) return 8*MI_INTPTR_SIZE; + DWORD idx; + MI_64(_BitScanReverse)(&idx, x); + return idx; +} +#elif defined(__GNUC__) || defined(__clang__) +#include // LONG_MAX +#define MI_HAVE_BITSCAN +#if (INTPTR_MAX == LONG_MAX) +# define MI_L(x) x##l +#else +# define MI_L(x) x##ll +#endif +static inline size_t mi_bsf(uintptr_t x) { + return (x==0 ? 8*MI_INTPTR_SIZE : MI_L(__builtin_ctz)(x)); +} +static inline size_t mi_bsr(uintptr_t x) { + return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - MI_L(__builtin_clz)(x)); +} +#endif + +/* ----------------------------------------------------------- + Claim a bit sequence atomically +----------------------------------------------------------- */ + +// Try to atomically claim a sequence of `count` bits at in `idx` +// in the bitmap field. Returns `true` on success. +static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t bitmap_fields, const size_t count, mi_bitmap_index_t bitmap_idx) { + const size_t idx = mi_bitmap_index_field(bitmap_idx); + const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); + const uintptr_t mask = mi_bitmap_mask_(count, bitidx); + mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); + mi_assert_internal(bitidx + count <= MI_BITMAP_FIELD_BITS); + + uintptr_t field = mi_atomic_read_relaxed(&bitmap[idx]); + if ((field & mask) == 0) { // free? + if (mi_atomic_cas_strong(&bitmap[idx], (field|mask), field)) { + // claimed! + return true; + } + } + return false; +} + + +// Try to atomically claim a sequence of `count` bits in a single +// field at `idx` in `bitmap`. Returns `true` on success. +static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx) +{ + mi_assert_internal(bitmap_idx != NULL); + volatile _Atomic(uintptr_t)* field = &bitmap[idx]; + uintptr_t map = mi_atomic_read(field); + if (map==MI_BITMAP_FIELD_FULL) return false; // short cut + + // search for 0-bit sequence of length count + const uintptr_t mask = mi_bitmap_mask_(count, 0); + const size_t bitidx_max = MI_BITMAP_FIELD_BITS - count; + +#ifdef MI_HAVE_BITSCAN + size_t bitidx = mi_bsf(~map); // quickly find the first zero bit if possible +#else + size_t bitidx = 0; // otherwise start at 0 +#endif + uintptr_t m = (mask << bitidx); // invariant: m == mask shifted by bitidx + + // scan linearly for a free range of zero bits + while (bitidx <= bitidx_max) { + if ((map & m) == 0) { // are the mask bits free at bitidx? + mi_assert_internal((m >> bitidx) == mask); // no overflow? + const uintptr_t newmap = map | m; + mi_assert_internal((newmap^map) >> bitidx == mask); + if (!mi_atomic_cas_weak(field, newmap, map)) { // TODO: use strong cas here? + // no success, another thread claimed concurrently.. keep going + map = mi_atomic_read(field); + continue; + } + else { + // success, we claimed the bits! + *bitmap_idx = mi_bitmap_index_create(idx, bitidx); + return true; + } + } + else { + // on to the next bit range +#ifdef MI_HAVE_BITSCAN + const size_t shift = (count == 1 ? 1 : mi_bsr(map & m) - bitidx + 1); + mi_assert_internal(shift > 0 && shift <= count); +#else + const size_t shift = 1; +#endif + bitidx += shift; + m <<= shift; + } + } + // no bits found + return false; +} + + +// Find `count` bits of 0 and set them to 1 atomically; returns `true` on success. +// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never span fields. +static inline bool mi_bitmap_try_find_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t* bitmap_idx) { + for (size_t idx = 0; idx < bitmap_fields; idx++) { + if (mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) { + return true; + } + } + return false; +} + +// Set `count` bits at `bitmap_idx` to 0 atomically +// Returns `true` if all `count` bits were 1 previously. +static inline bool mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + const size_t idx = mi_bitmap_index_field(bitmap_idx); + const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); + const uintptr_t mask = mi_bitmap_mask_(count, bitidx); + mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); + // mi_assert_internal((bitmap[idx] & mask) == mask); + uintptr_t prev = mi_atomic_and(&bitmap[idx], ~mask); + return ((prev & mask) == mask); +} + + +// Set `count` bits at `bitmap_idx` to 1 atomically +// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. +static inline bool mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero) { + const size_t idx = mi_bitmap_index_field(bitmap_idx); + const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); + const uintptr_t mask = mi_bitmap_mask_(count, bitidx); + mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); + //mi_assert_internal(any_zero != NULL || (bitmap[idx] & mask) == 0); + uintptr_t prev = mi_atomic_or(&bitmap[idx], mask); + if (any_zero != NULL) *any_zero = ((prev & mask) != mask); + return ((prev & mask) == 0); +} + +// Returns `true` if all `count` bits were 1. `any_ones` is `true` if there was at least one bit set to one. +static inline bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_ones) { + const size_t idx = mi_bitmap_index_field(bitmap_idx); + const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); + const uintptr_t mask = mi_bitmap_mask_(count, bitidx); + mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); + uintptr_t field = mi_atomic_read_relaxed(&bitmap[idx]); + if (any_ones != NULL) *any_ones = ((field & mask) != 0); + return ((field & mask) == mask); +} + +static inline bool mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + return mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, NULL); +} + +static inline bool mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + bool any_ones; + mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, &any_ones); + return any_ones; +} + + +#endif diff --git a/src/heap.c b/src/heap.c index daa9b241..e76a147c 100644 --- a/src/heap.c +++ b/src/heap.c @@ -34,7 +34,7 @@ static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void mi_page_t* page = pq->first; while(page != NULL) { mi_page_t* next = page->next; // save next in case the page gets removed from the queue - mi_assert_internal(page->heap == heap); + mi_assert_internal(mi_page_heap(page) == heap); count++; if (!fn(heap, pq, page, arg1, arg2)) return false; page = next; // and continue @@ -45,21 +45,22 @@ static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void } -#if MI_DEBUG>1 -static bool _mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) { +#if MI_DEBUG>=2 +static bool mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) { UNUSED(arg1); UNUSED(arg2); UNUSED(pq); - mi_assert_internal(page->heap == heap); + mi_assert_internal(mi_page_heap(page) == heap); mi_segment_t* segment = _mi_page_segment(page); mi_assert_internal(segment->thread_id == heap->thread_id); mi_assert_expensive(_mi_page_is_valid(page)); return true; } - +#endif +#if MI_DEBUG>=3 static bool mi_heap_is_valid(mi_heap_t* heap) { mi_assert_internal(heap!=NULL); - mi_heap_visit_pages(heap, &_mi_heap_page_is_valid, NULL, NULL); + mi_heap_visit_pages(heap, &mi_heap_page_is_valid, NULL, NULL); return true; } #endif @@ -75,22 +76,24 @@ static bool mi_heap_is_valid(mi_heap_t* heap) { ----------------------------------------------------------- */ typedef enum mi_collect_e { - NORMAL, - FORCE, - ABANDON + MI_NORMAL, + MI_FORCE, + MI_ABANDON } mi_collect_t; static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg_collect, void* arg2 ) { UNUSED(arg2); UNUSED(heap); + mi_assert_internal(mi_heap_page_is_valid(heap, pq, page, NULL, NULL)); mi_collect_t collect = *((mi_collect_t*)arg_collect); - _mi_page_free_collect(page, collect >= ABANDON); + _mi_page_free_collect(page, collect >= MI_FORCE); if (mi_page_all_free(page)) { - // no more used blocks, free the page. TODO: should we retire here and be less aggressive? - _mi_page_free(page, pq, collect != NORMAL); + // no more used blocks, free the page. + // note: this will free retired pages as well. + _mi_page_free(page, pq, collect >= MI_FORCE); } - else if (collect == ABANDON) { + else if (collect == MI_ABANDON) { // still used blocks but the thread is done; abandon the page _mi_page_abandon(page, pq); } @@ -102,63 +105,62 @@ static bool mi_heap_page_never_delayed_free(mi_heap_t* heap, mi_page_queue_t* pq UNUSED(arg2); UNUSED(heap); UNUSED(pq); - _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE); + _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false); return true; // don't break } static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) { if (!mi_heap_is_initialized(heap)) return; - _mi_deferred_free(heap, collect > NORMAL); - - // collect (some) abandoned pages - if (collect >= NORMAL && !heap->no_reclaim) { - if (collect == NORMAL) { - // this may free some segments (but also take ownership of abandoned pages) - _mi_segment_try_reclaim_abandoned(heap, false, &heap->tld->segments); - } - #if MI_DEBUG - else if (collect == ABANDON && _mi_is_main_thread() && mi_heap_is_backing(heap)) { - // the main thread is abandoned, try to free all abandoned segments. - // if all memory is freed by now, all segments should be freed. - _mi_segment_try_reclaim_abandoned(heap, true, &heap->tld->segments); - } - #endif + _mi_deferred_free(heap, collect >= MI_FORCE); + + // note: never reclaim on collect but leave it to threads that need storage to reclaim + if ( + #ifdef NDEBUG + collect == MI_FORCE + #else + collect >= MI_FORCE + #endif + && _mi_is_main_thread() && mi_heap_is_backing(heap) && !heap->no_reclaim) + { + // the main thread is abandoned (end-of-program), try to reclaim all abandoned segments. + // if all memory is freed by now, all segments should be freed. + _mi_abandoned_reclaim_all(heap, &heap->tld->segments); } + // if abandoning, mark all pages to no longer add to delayed_free - if (collect == ABANDON) { - //for (mi_page_t* page = heap->pages[MI_BIN_FULL].first; page != NULL; page = page->next) { - // _mi_page_use_delayed_free(page, false); // set thread_free.delayed to MI_NO_DELAYED_FREE - //} + if (collect == MI_ABANDON) { mi_heap_visit_pages(heap, &mi_heap_page_never_delayed_free, NULL, NULL); } - // free thread delayed blocks. - // (if abandoning, after this there are no more local references into the pages.) + // free thread delayed blocks. + // (if abandoning, after this there are no more thread-delayed references into the pages.) _mi_heap_delayed_free(heap); // collect all pages owned by this thread mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL); - mi_assert_internal( collect != ABANDON || heap->thread_delayed_free == NULL ); - + mi_assert_internal( collect != MI_ABANDON || mi_atomic_read_ptr(mi_block_t,&heap->thread_delayed_free) == NULL ); + // collect segment caches - if (collect >= FORCE) { + if (collect >= MI_FORCE) { _mi_segment_thread_collect(&heap->tld->segments); } + #ifndef NDEBUG // collect regions - if (collect >= FORCE && _mi_is_main_thread()) { - _mi_mem_collect(&heap->tld->stats); + if (collect >= MI_FORCE && _mi_is_main_thread() && mi_heap_is_backing(heap)) { + _mi_mem_collect(&heap->tld->os); } + #endif } void _mi_heap_collect_abandon(mi_heap_t* heap) { - mi_heap_collect_ex(heap, ABANDON); + mi_heap_collect_ex(heap, MI_ABANDON); } void mi_heap_collect(mi_heap_t* heap, bool force) mi_attr_noexcept { - mi_heap_collect_ex(heap, (force ? FORCE : NORMAL)); + mi_heap_collect_ex(heap, (force ? MI_FORCE : MI_NORMAL)); } void mi_collect(bool force) mi_attr_noexcept { @@ -171,7 +173,7 @@ void mi_collect(bool force) mi_attr_noexcept { ----------------------------------------------------------- */ mi_heap_t* mi_heap_get_default(void) { - mi_thread_init(); + mi_thread_init(); return mi_get_default_heap(); } @@ -184,12 +186,6 @@ mi_heap_t* mi_heap_get_backing(void) { return bheap; } -uintptr_t _mi_heap_random(mi_heap_t* heap) { - uintptr_t r = heap->random; - heap->random = _mi_random_shuffle(r); - return r; -} - mi_heap_t* mi_heap_new(void) { mi_heap_t* bheap = mi_heap_get_backing(); mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t); @@ -197,12 +193,18 @@ mi_heap_t* mi_heap_new(void) { memcpy(heap, &_mi_heap_empty, sizeof(mi_heap_t)); heap->tld = bheap->tld; heap->thread_id = _mi_thread_id(); - heap->cookie = ((uintptr_t)heap ^ _mi_heap_random(bheap)) | 1; - heap->random = _mi_heap_random(bheap); + _mi_random_split(&bheap->random, &heap->random); + heap->cookie = _mi_heap_random_next(heap) | 1; + heap->key[0] = _mi_heap_random_next(heap); + heap->key[1] = _mi_heap_random_next(heap); heap->no_reclaim = true; // don't reclaim abandoned pages or otherwise destroy is unsafe return heap; } +uintptr_t _mi_heap_random_next(mi_heap_t* heap) { + return _mi_random_next(&heap->random); +} + // zero out the page queues static void mi_heap_reset_pages(mi_heap_t* heap) { mi_assert_internal(mi_heap_is_initialized(heap)); @@ -220,7 +222,7 @@ static void mi_heap_reset_pages(mi_heap_t* heap) { static void mi_heap_free(mi_heap_t* heap) { mi_assert_internal(mi_heap_is_initialized(heap)); if (mi_heap_is_backing(heap)) return; // dont free the backing heap - + // reset default if (mi_heap_is_default(heap)) { _mi_heap_set_default_direct(heap->tld->heap_backing); @@ -241,30 +243,35 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_ UNUSED(pq); // ensure no more thread_delayed_free will be added - _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE); + _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false); // stats - if (page->block_size > MI_LARGE_OBJ_SIZE_MAX) { - if (page->block_size > MI_HUGE_OBJ_SIZE_MAX) { - _mi_stat_decrease(&heap->tld->stats.giant,page->block_size); + const size_t bsize = mi_page_block_size(page); + if (bsize > MI_LARGE_OBJ_SIZE_MAX) { + if (bsize > MI_HUGE_OBJ_SIZE_MAX) { + _mi_stat_decrease(&heap->tld->stats.giant, bsize); } else { - _mi_stat_decrease(&heap->tld->stats.huge, page->block_size); + _mi_stat_decrease(&heap->tld->stats.huge, bsize); } } - #if (MI_STAT>1) - size_t inuse = page->used - page->thread_freed; - if (page->block_size <= MI_LARGE_OBJ_SIZE_MAX) { - mi_heap_stat_decrease(heap,normal[_mi_bin(page->block_size)], inuse); +#if (MI_STAT>1) + _mi_page_free_collect(page, false); // update used count + const size_t inuse = page->used; + if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { + mi_heap_stat_decrease(heap, normal[_mi_bin(bsize)], inuse); } - mi_heap_stat_decrease(heap,malloc, page->block_size * inuse); // todo: off for aligned blocks... - #endif + mi_heap_stat_decrease(heap, malloc, bsize * inuse); // todo: off for aligned blocks... +#endif - // pretend it is all free now - mi_assert_internal(page->thread_freed<=0xFFFF); - page->used = (uint16_t)page->thread_freed; + /// pretend it is all free now + mi_assert_internal(mi_page_thread_free(page) == NULL); + page->used = 0; // and free the page + // mi_page_free(page,false); + page->next = NULL; + page->prev = NULL; _mi_segment_page_free(page,false /* no force? */, &heap->tld->segments); return true; // keep going @@ -303,7 +310,7 @@ static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) { if (from==NULL || from->page_count == 0) return; // unfull all full pages in the `from` heap - mi_page_t* page = from->pages[MI_BIN_FULL].first; + mi_page_t* page = from->pages[MI_BIN_FULL].first; while (page != NULL) { mi_page_t* next = page->next; _mi_page_unfull(page); @@ -315,7 +322,7 @@ static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) { _mi_heap_delayed_free(from); // transfer all pages by appending the queues; this will set - // a new heap field which is ok as all pages are unfull'd and thus + // a new heap field which is ok as all pages are unfull'd and thus // other threads won't access this field anymore (see `mi_free_block_mt`) for (size_t i = 0; i < MI_BIN_FULL; i++) { mi_page_queue_t* pq = &heap->pages[i]; @@ -326,7 +333,7 @@ static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) { } mi_assert_internal(from->thread_delayed_free == NULL); mi_assert_internal(from->page_count == 0); - + // and reset the `from` heap mi_heap_reset_pages(from); } @@ -354,7 +361,7 @@ mi_heap_t* mi_heap_set_default(mi_heap_t* heap) { mi_assert(mi_heap_is_initialized(heap)); if (!mi_heap_is_initialized(heap)) return NULL; mi_assert_expensive(mi_heap_is_valid(heap)); - mi_heap_t* old = mi_get_default_heap(); + mi_heap_t* old = mi_get_default_heap(); _mi_heap_set_default_direct(heap); return old; } @@ -373,7 +380,7 @@ static mi_heap_t* mi_heap_of_block(const void* p) { bool valid = (_mi_ptr_cookie(segment) == segment->cookie); mi_assert_internal(valid); if (mi_unlikely(!valid)) return NULL; - return _mi_segment_page_of(segment,p)->heap; + return mi_page_heap(_mi_segment_page_of(segment,p)); } bool mi_heap_contains_block(mi_heap_t* heap, const void* p) { @@ -389,7 +396,7 @@ static bool mi_heap_page_check_owned(mi_heap_t* heap, mi_page_queue_t* pq, mi_pa bool* found = (bool*)vfound; mi_segment_t* segment = _mi_page_segment(page); void* start = _mi_page_start(segment, page, NULL); - void* end = (uint8_t*)start + (page->capacity * page->block_size); + void* end = (uint8_t*)start + (page->capacity * mi_page_block_size(page)); *found = (p >= start && p < end); return (!*found); // continue if not found } @@ -431,13 +438,14 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v mi_assert_internal(page->local_free == NULL); if (page->used == 0) return true; + const size_t bsize = mi_page_block_size(page); size_t psize; uint8_t* pstart = _mi_page_start(_mi_page_segment(page), page, &psize); if (page->capacity == 1) { // optimize page with one block mi_assert_internal(page->used == 1 && page->free == NULL); - return visitor(page->heap, area, pstart, page->block_size, arg); + return visitor(mi_page_heap(page), area, pstart, bsize, arg); } // create a bitmap of free blocks. @@ -450,8 +458,8 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v free_count++; mi_assert_internal((uint8_t*)block >= pstart && (uint8_t*)block < (pstart + psize)); size_t offset = (uint8_t*)block - pstart; - mi_assert_internal(offset % page->block_size == 0); - size_t blockidx = offset / page->block_size; // Todo: avoid division? + mi_assert_internal(offset % bsize == 0); + size_t blockidx = offset / bsize; // Todo: avoid division? mi_assert_internal( blockidx < MI_MAX_BLOCKS); size_t bitidx = (blockidx / sizeof(uintptr_t)); size_t bit = blockidx - (bitidx * sizeof(uintptr_t)); @@ -470,8 +478,8 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v } else if ((m & ((uintptr_t)1 << bit)) == 0) { used_count++; - uint8_t* block = pstart + (i * page->block_size); - if (!visitor(page->heap, area, block, page->block_size, arg)) return false; + uint8_t* block = pstart + (i * bsize); + if (!visitor(mi_page_heap(page), area, block, bsize, arg)) return false; } } mi_assert_internal(page->used == used_count); @@ -486,12 +494,13 @@ static bool mi_heap_visit_areas_page(mi_heap_t* heap, mi_page_queue_t* pq, mi_pa UNUSED(pq); mi_heap_area_visit_fun* fun = (mi_heap_area_visit_fun*)vfun; mi_heap_area_ex_t xarea; + const size_t bsize = mi_page_block_size(page); xarea.page = page; - xarea.area.reserved = page->reserved * page->block_size; - xarea.area.committed = page->capacity * page->block_size; + xarea.area.reserved = page->reserved * bsize; + xarea.area.committed = page->capacity * bsize; xarea.area.blocks = _mi_page_start(_mi_page_segment(page), page, NULL); - xarea.area.used = page->used - page->thread_freed; // race is ok - xarea.area.block_size = page->block_size; + xarea.area.used = page->used; + xarea.area.block_size = bsize; return fun(heap, &xarea, arg); } @@ -524,4 +533,3 @@ bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_blocks, mi_block_vis mi_visit_blocks_args_t args = { visit_blocks, visitor, arg }; return mi_heap_visit_areas(heap, &mi_heap_area_visitor, &args); } - diff --git a/src/init.c b/src/init.c index 81413aa9..f8411187 100644 --- a/src/init.c +++ b/src/init.c @@ -12,19 +12,22 @@ terms of the MIT license. A copy of the license can be found in the file // Empty page used to initialize the small free pages array const mi_page_t _mi_page_empty = { - 0, false, false, false, false, 0, 0, - { 0 }, false, + 0, false, false, false, false, + 0, // capacity + 0, // reserved capacity + { 0 }, // flags + false, // is_zero + 0, // retire_expire NULL, // free #if MI_ENCODE_FREELIST - 0, + { 0, 0 }, #endif 0, // used - NULL, - ATOMIC_VAR_INIT(0), ATOMIC_VAR_INIT(0), - 0, NULL, NULL, NULL - #if (MI_INTPTR_SIZE==8 && defined(MI_ENCODE_FREELIST)) || (MI_INTPTR_SIZE==4 && !defined(MI_ENCODE_FREELIST)) - , { NULL } // padding - #endif + 0, // xblock_size + NULL, // local_free + ATOMIC_VAR_INIT(0), // xthread_free + ATOMIC_VAR_INIT(0), // xheap + NULL, NULL }; #define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty) @@ -83,10 +86,11 @@ const mi_heap_t _mi_heap_empty = { MI_SMALL_PAGES_EMPTY, MI_PAGE_QUEUES_EMPTY, ATOMIC_VAR_INIT(NULL), - 0, - 0, - 0, - 0, + 0, // tid + 0, // cookie + { 0, 0 }, // keys + { {0}, {0}, 0 }, + 0, // page count false }; @@ -95,100 +99,48 @@ mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; #define tld_main_stats ((mi_stats_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,stats))) +#define tld_main_os ((mi_os_tld_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,os))) static mi_tld_t tld_main = { 0, false, &_mi_heap_main, - { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments - { 0, tld_main_stats }, // os - { MI_STATS_NULL } // stats + { { NULL, NULL }, {NULL ,NULL}, {NULL ,NULL, 0}, + 0, 0, 0, 0, 0, 0, NULL, + tld_main_stats, tld_main_os + }, // segments + { 0, tld_main_stats }, // os + { MI_STATS_NULL } // stats }; +#if MI_INTPTR_SIZE==8 +#define MI_INIT_COOKIE (0xCDCDCDCDCDCDCDCDUL) +#else +#define MI_INIT_COOKIE (0xCDCDCDCDUL) +#endif + mi_heap_t _mi_heap_main = { &tld_main, MI_SMALL_PAGES_EMPTY, MI_PAGE_QUEUES_EMPTY, - NULL, - 0, // thread id -#if MI_INTPTR_SIZE==8 // the cookie of the main heap can be fixed (unlike page cookies that need to be secure!) - 0xCDCDCDCDCDCDCDCDUL, -#else - 0xCDCDCDCDUL, -#endif - 0, // random - 0, // page count - false // can reclaim + ATOMIC_VAR_INIT(NULL), + 0, // thread id + MI_INIT_COOKIE, // initial cookie + { MI_INIT_COOKIE, MI_INIT_COOKIE }, // the key of the main heap can be fixed (unlike page keys that need to be secure!) + { {0}, {0}, 0 }, // random + 0, // page count + false // can reclaim }; bool _mi_process_is_initialized = false; // set to `true` in `mi_process_init`. mi_stats_t _mi_stats_main = { MI_STATS_NULL }; -/* ----------------------------------------------------------- - Initialization of random numbers ------------------------------------------------------------ */ - -#if defined(_WIN32) -#include -#elif defined(__APPLE__) -#include -#else -#include -#endif - -uintptr_t _mi_random_shuffle(uintptr_t x) { - #if (MI_INTPTR_SIZE==8) - // by Sebastiano Vigna, see: - x ^= x >> 30; - x *= 0xbf58476d1ce4e5b9UL; - x ^= x >> 27; - x *= 0x94d049bb133111ebUL; - x ^= x >> 31; - #elif (MI_INTPTR_SIZE==4) - // by Chris Wellons, see: - x ^= x >> 16; - x *= 0x7feb352dUL; - x ^= x >> 15; - x *= 0x846ca68bUL; - x ^= x >> 16; - #endif - return x; -} - -uintptr_t _mi_random_init(uintptr_t seed /* can be zero */) { -#ifdef __wasi__ // no ASLR when using WebAssembly, and time granularity may be coarse - uintptr_t x; - arc4random_buf(&x, sizeof x); -#else - // Hopefully, ASLR makes our function address random - uintptr_t x = (uintptr_t)((void*)&_mi_random_init); - x ^= seed; - // xor with high res time -#if defined(_WIN32) - LARGE_INTEGER pcount; - QueryPerformanceCounter(&pcount); - x ^= (uintptr_t)(pcount.QuadPart); -#elif defined(__APPLE__) - x ^= (uintptr_t)mach_absolute_time(); -#else - struct timespec time; - clock_gettime(CLOCK_MONOTONIC, &time); - x ^= (uintptr_t)time.tv_sec; - x ^= (uintptr_t)time.tv_nsec; -#endif - // and do a few randomization steps - uintptr_t max = ((x ^ (x >> 17)) & 0x0F) + 1; - for (uintptr_t i = 0; i < max; i++) { - x = _mi_random_shuffle(x); - } -#endif - return x; -} /* ----------------------------------------------------------- Initialization and freeing of the thread local heaps ----------------------------------------------------------- */ +// note: in x64 in release build `sizeof(mi_thread_data_t)` is under 4KiB (= OS page size). typedef struct mi_thread_data_s { mi_heap_t heap; // must come first due to cast in `_mi_heap_done` mi_tld_t tld; @@ -203,22 +155,25 @@ static bool _mi_heap_init(void) { mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_get_default_heap()); } else { - // use `_mi_os_alloc` to allocate directly from the OS + // use `_mi_os_alloc` to allocate directly from the OS mi_thread_data_t* td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t),&_mi_stats_main); // Todo: more efficient allocation? if (td == NULL) { - _mi_error_message("failed to allocate thread local heap memory\n"); + _mi_error_message(ENOMEM, "failed to allocate thread local heap memory\n"); return false; } + // OS allocated so already zero initialized mi_tld_t* tld = &td->tld; mi_heap_t* heap = &td->heap; memcpy(heap, &_mi_heap_empty, sizeof(*heap)); heap->thread_id = _mi_thread_id(); - heap->random = _mi_random_init(heap->thread_id); - heap->cookie = ((uintptr_t)heap ^ _mi_heap_random(heap)) | 1; - heap->tld = tld; - memset(tld, 0, sizeof(*tld)); + _mi_random_init(&heap->random); + heap->cookie = _mi_heap_random_next(heap) | 1; + heap->key[0] = _mi_heap_random_next(heap); + heap->key[1] = _mi_heap_random_next(heap); + heap->tld = tld; tld->heap_backing = heap; tld->segments.stats = &tld->stats; + tld->segments.os = &tld->os; tld->os.stats = &tld->stats; _mi_heap_set_default_direct(heap); } @@ -237,7 +192,7 @@ static bool _mi_heap_done(mi_heap_t* heap) { // switch to backing heap and free it heap = heap->tld->heap_backing; if (!mi_heap_is_initialized(heap)) return false; - + // collect if not the main thread if (heap != &_mi_heap_main) { _mi_heap_collect_abandon(heap); @@ -248,6 +203,7 @@ static bool _mi_heap_done(mi_heap_t* heap) { // free if not the main thread if (heap != &_mi_heap_main) { + mi_assert_internal(heap->tld->segments.count == 0); _mi_os_free(heap, sizeof(mi_thread_data_t), &_mi_stats_main); } #if (MI_DEBUG > 0) @@ -334,7 +290,7 @@ void mi_thread_init(void) mi_attr_noexcept mi_process_init(); // initialize the thread local default heap - // (this will call `_mi_heap_set_default_direct` and thus set the + // (this will call `_mi_heap_set_default_direct` and thus set the // fiber/pthread key to a non-zero value, ensuring `_mi_thread_done` is called) if (_mi_heap_init()) return; // returns true if already initialized @@ -368,9 +324,9 @@ void _mi_heap_set_default_direct(mi_heap_t* heap) { #if defined(_WIN32) && defined(MI_SHARED_LIB) // nothing to do as it is done in DllMain #elif defined(_WIN32) && !defined(MI_SHARED_LIB) - FlsSetValue(mi_fls_key, heap); + FlsSetValue(mi_fls_key, heap); #elif defined(MI_USE_PTHREADS) - pthread_setspecific(mi_pthread_key, heap); + pthread_setspecific(mi_pthread_key, heap); #endif } @@ -394,7 +350,7 @@ bool mi_is_redirected() mi_attr_noexcept { } // Communicate with the redirection module on Windows -#if defined(_WIN32) && defined(MI_SHARED_LIB) +#if defined(_WIN32) && defined(MI_SHARED_LIB) #ifdef __cplusplus extern "C" { #endif @@ -438,13 +394,7 @@ static void mi_process_load(void) { const char* msg = NULL; mi_allocator_init(&msg); if (msg != NULL && (mi_option_is_enabled(mi_option_verbose) || mi_option_is_enabled(mi_option_show_errors))) { - _mi_fputs(NULL,NULL,msg); - } - - if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { - size_t pages = mi_option_get(mi_option_reserve_huge_os_pages); - double max_secs = (double)pages / 2.0; // 0.5s per page (1GiB) - mi_reserve_huge_os_pages(pages, max_secs, NULL); + _mi_fputs(NULL,NULL,NULL,msg); } } @@ -455,16 +405,17 @@ void mi_process_init(void) mi_attr_noexcept { // access _mi_heap_default before setting _mi_process_is_initialized to ensure // that the TLS slot is allocated without getting into recursion on macOS // when using dynamic linking with interpose. - mi_heap_t* h = mi_get_default_heap(); + mi_get_default_heap(); _mi_process_is_initialized = true; _mi_heap_main.thread_id = _mi_thread_id(); _mi_verbose_message("process init: 0x%zx\n", _mi_heap_main.thread_id); - uintptr_t random = _mi_random_init(_mi_heap_main.thread_id) ^ (uintptr_t)h; - #ifndef __APPLE__ - _mi_heap_main.cookie = (uintptr_t)&_mi_heap_main ^ random; + _mi_random_init(&_mi_heap_main.random); + #ifndef __APPLE__ // TODO: fix this? cannot update cookie if allocation already happened.. + _mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main); + _mi_heap_main.key[0] = _mi_heap_random_next(&_mi_heap_main); + _mi_heap_main.key[1] = _mi_heap_random_next(&_mi_heap_main); #endif - _mi_heap_main.random = _mi_random_shuffle(random); mi_process_setup_auto_thread_done(); _mi_os_init(); #if (MI_DEBUG) @@ -473,6 +424,11 @@ void mi_process_init(void) mi_attr_noexcept { _mi_verbose_message("secure level: %d\n", MI_SECURE); mi_thread_init(); mi_stats_reset(); // only call stat reset *after* thread init (or the heap tld == NULL) + + if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { + size_t pages = mi_option_get(mi_option_reserve_huge_os_pages); + mi_reserve_huge_os_pages_interleave(pages, 0, pages*500); + } } // Called when the process is done (through `at_exit`) @@ -499,7 +455,7 @@ static void mi_process_done(void) { #if defined(_WIN32) && defined(MI_SHARED_LIB) - // Windows DLL: easy to hook into process_init and thread_done + // Windows DLL: easy to hook into process_init and thread_done __declspec(dllexport) BOOL WINAPI DllMain(HINSTANCE inst, DWORD reason, LPVOID reserved) { UNUSED(reserved); UNUSED(inst); diff --git a/src/memory.c b/src/memory.c deleted file mode 100644 index 39f05ffb..00000000 --- a/src/memory.c +++ /dev/null @@ -1,546 +0,0 @@ -/* ---------------------------------------------------------------------------- -Copyright (c) 2019, Microsoft Research, Daan Leijen -This is free software; you can redistribute it and/or modify it under the -terms of the MIT license. A copy of the license can be found in the file -"LICENSE" at the root of this distribution. ------------------------------------------------------------------------------*/ - -/* ---------------------------------------------------------------------------- -This implements a layer between the raw OS memory (VirtualAlloc/mmap/sbrk/..) -and the segment and huge object allocation by mimalloc. There may be multiple -implementations of this (one could be the identity going directly to the OS, -another could be a simple cache etc), but the current one uses large "regions". -In contrast to the rest of mimalloc, the "regions" are shared between threads and -need to be accessed using atomic operations. -We need this memory layer between the raw OS calls because of: -1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order - to reuse memory effectively. -2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of - an OS allocation/free is still (much) too expensive relative to the accesses in that - object :-( (`malloc-large` tests this). This means we need a cheaper way to - reuse memory. -3. This layer can help with a NUMA aware allocation in the future. - -Possible issues: -- (2) can potentially be addressed too with a small cache per thread which is much - simpler. Generally though that requires shrinking of huge pages, and may overuse - memory per thread. (and is not compatible with `sbrk`). -- Since the current regions are per-process, we need atomic operations to - claim blocks which may be contended -- In the worst case, we need to search the whole region map (16KiB for 256GiB) - linearly. At what point will direct OS calls be faster? Is there a way to - do this better without adding too much complexity? ------------------------------------------------------------------------------*/ -#include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" - -#include // memset - -// Internal raw OS interface -size_t _mi_os_large_page_size(); -bool _mi_os_protect(void* addr, size_t size); -bool _mi_os_unprotect(void* addr, size_t size); -bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); -bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats); -bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); -bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); -void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld); -void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats); -void* _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment); -bool _mi_os_is_huge_reserved(void* p); - -// Constants -#if (MI_INTPTR_SIZE==8) -#define MI_HEAP_REGION_MAX_SIZE (256 * (1ULL << 30)) // 256GiB => 16KiB for the region map -#elif (MI_INTPTR_SIZE==4) -#define MI_HEAP_REGION_MAX_SIZE (3 * (1UL << 30)) // 3GiB => 196 bytes for the region map -#else -#error "define the maximum heap space allowed for regions on this platform" -#endif - -#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE - -#define MI_REGION_MAP_BITS (MI_INTPTR_SIZE * 8) -#define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_REGION_MAP_BITS) -#define MI_REGION_MAX_ALLOC_SIZE ((MI_REGION_MAP_BITS/4)*MI_SEGMENT_SIZE) // 64MiB -#define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE) -#define MI_REGION_MAP_FULL UINTPTR_MAX - - -typedef uintptr_t mi_region_info_t; - -static inline mi_region_info_t mi_region_info_create(void* start, bool is_large, bool is_committed) { - return ((uintptr_t)start | ((uintptr_t)(is_large?1:0) << 1) | (is_committed?1:0)); -} - -static inline void* mi_region_info_read(mi_region_info_t info, bool* is_large, bool* is_committed) { - if (is_large) *is_large = ((info&0x02) != 0); - if (is_committed) *is_committed = ((info&0x01) != 0); - return (void*)(info & ~0x03); -} - - -// A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with -// a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block. -typedef struct mem_region_s { - volatile _Atomic(uintptr_t) map; // in-use bit per MI_SEGMENT_SIZE block - volatile _Atomic(mi_region_info_t) info; // start of virtual memory area, and flags - volatile _Atomic(uintptr_t) dirty_mask; // bit per block if the contents are not zero'd -} mem_region_t; - - -// The region map; 16KiB for a 256GiB HEAP_REGION_MAX -// TODO: in the future, maintain a map per NUMA node for numa aware allocation -static mem_region_t regions[MI_REGION_MAX]; - -static volatile _Atomic(uintptr_t) regions_count; // = 0; // allocated regions - - -/* ---------------------------------------------------------------------------- -Utility functions ------------------------------------------------------------------------------*/ - -// Blocks (of 4MiB) needed for the given size. -static size_t mi_region_block_count(size_t size) { - mi_assert_internal(size <= MI_REGION_MAX_ALLOC_SIZE); - return (size + MI_SEGMENT_SIZE - 1) / MI_SEGMENT_SIZE; -} - -// The bit mask for a given number of blocks at a specified bit index. -static uintptr_t mi_region_block_mask(size_t blocks, size_t bitidx) { - mi_assert_internal(blocks + bitidx <= MI_REGION_MAP_BITS); - return ((((uintptr_t)1 << blocks) - 1) << bitidx); -} - -// Return a rounded commit/reset size such that we don't fragment large OS pages into small ones. -static size_t mi_good_commit_size(size_t size) { - if (size > (SIZE_MAX - _mi_os_large_page_size())) return size; - return _mi_align_up(size, _mi_os_large_page_size()); -} - -// Return if a pointer points into a region reserved by us. -bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { - if (p==NULL) return false; - size_t count = mi_atomic_read_relaxed(®ions_count); - for (size_t i = 0; i < count; i++) { - uint8_t* start = (uint8_t*)mi_region_info_read( mi_atomic_read_relaxed(®ions[i].info), NULL, NULL); - if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true; - } - return false; -} - - -/* ---------------------------------------------------------------------------- -Commit from a region ------------------------------------------------------------------------------*/ - -// Commit the `blocks` in `region` at `idx` and `bitidx` of a given `size`. -// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written -// if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call. -// (not being able to claim is not considered an error so check for `p != NULL` afterwards). -static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bitidx, size_t blocks, - size_t size, bool* commit, bool* allow_large, bool* is_zero, void** p, size_t* id, mi_os_tld_t* tld) -{ - size_t mask = mi_region_block_mask(blocks,bitidx); - mi_assert_internal(mask != 0); - mi_assert_internal((mask & mi_atomic_read_relaxed(®ion->map)) == mask); - mi_assert_internal(®ions[idx] == region); - - // ensure the region is reserved - mi_region_info_t info = mi_atomic_read(®ion->info); - if (info == 0) - { - bool region_commit = mi_option_is_enabled(mi_option_eager_region_commit); - bool region_large = *allow_large; - void* start = NULL; - if (region_large) { - start = _mi_os_try_alloc_from_huge_reserved(MI_REGION_SIZE, MI_SEGMENT_ALIGN); - if (start != NULL) { region_commit = true; } - } - if (start == NULL) { - start = _mi_os_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, region_commit, ®ion_large, tld); - } - mi_assert_internal(!(region_large && !*allow_large)); - - if (start == NULL) { - // failure to allocate from the OS! unclaim the blocks and fail - size_t map; - do { - map = mi_atomic_read_relaxed(®ion->map); - } while (!mi_atomic_cas_weak(®ion->map, map & ~mask, map)); - return false; - } - - // set the newly allocated region - info = mi_region_info_create(start,region_large,region_commit); - if (mi_atomic_cas_strong(®ion->info, info, 0)) { - // update the region count - mi_atomic_increment(®ions_count); - } - else { - // failed, another thread allocated just before us! - // we assign it to a later slot instead (up to 4 tries). - for(size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) { - if (mi_atomic_cas_strong(®ions[idx+i].info, info, 0)) { - mi_atomic_increment(®ions_count); - start = NULL; - break; - } - } - if (start != NULL) { - // free it if we didn't succeed to save it to some other region - _mi_os_free_ex(start, MI_REGION_SIZE, region_commit, tld->stats); - } - // and continue with the memory at our index - info = mi_atomic_read(®ion->info); - } - } - mi_assert_internal(info == mi_atomic_read(®ion->info)); - mi_assert_internal(info != 0); - - // Commit the blocks to memory - bool region_is_committed = false; - bool region_is_large = false; - void* start = mi_region_info_read(info,®ion_is_large,®ion_is_committed); - mi_assert_internal(!(region_is_large && !*allow_large)); - mi_assert_internal(start!=NULL); - - // set dirty bits - uintptr_t m; - do { - m = mi_atomic_read(®ion->dirty_mask); - } while (!mi_atomic_cas_weak(®ion->dirty_mask, m | mask, m)); - *is_zero = ((m & mask) == 0); // no dirty bit set in our claimed range? - - void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE); - if (*commit && !region_is_committed) { - // ensure commit - bool commit_zero = false; - _mi_os_commit(blocks_start, mi_good_commit_size(size), &commit_zero, tld->stats); // only commit needed size (unless using large OS pages) - if (commit_zero) *is_zero = true; - } - else if (!*commit && region_is_committed) { - // but even when no commit is requested, we might have committed anyway (in a huge OS page for example) - *commit = true; - } - - // and return the allocation - mi_assert_internal(blocks_start != NULL); - *allow_large = region_is_large; - *p = blocks_start; - *id = (idx*MI_REGION_MAP_BITS) + bitidx; - return true; -} - -// Use bit scan forward to quickly find the first zero bit if it is available -#if defined(_MSC_VER) -#define MI_HAVE_BITSCAN -#include -static inline size_t mi_bsf(uintptr_t x) { - if (x==0) return 8*MI_INTPTR_SIZE; - DWORD idx; - #if (MI_INTPTR_SIZE==8) - _BitScanForward64(&idx, x); - #else - _BitScanForward(&idx, x); - #endif - return idx; -} -static inline size_t mi_bsr(uintptr_t x) { - if (x==0) return 8*MI_INTPTR_SIZE; - DWORD idx; - #if (MI_INTPTR_SIZE==8) - _BitScanReverse64(&idx, x); - #else - _BitScanReverse(&idx, x); - #endif - return idx; -} -#elif defined(__GNUC__) || defined(__clang__) -#define MI_HAVE_BITSCAN -static inline size_t mi_bsf(uintptr_t x) { - return (x==0 ? 8*MI_INTPTR_SIZE : __builtin_ctzl(x)); -} -static inline size_t mi_bsr(uintptr_t x) { - return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - __builtin_clzl(x)); -} -#endif - -// Allocate `blocks` in a `region` at `idx` of a given `size`. -// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written -// if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call. -// (not being able to claim is not considered an error so check for `p != NULL` afterwards). -static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t blocks, size_t size, - bool* commit, bool* allow_large, bool* is_zero, void** p, size_t* id, mi_os_tld_t* tld) -{ - mi_assert_internal(p != NULL && id != NULL); - mi_assert_internal(blocks < MI_REGION_MAP_BITS); - - const uintptr_t mask = mi_region_block_mask(blocks, 0); - const size_t bitidx_max = MI_REGION_MAP_BITS - blocks; - uintptr_t map = mi_atomic_read(®ion->map); - if (map==MI_REGION_MAP_FULL) return true; - - #ifdef MI_HAVE_BITSCAN - size_t bitidx = mi_bsf(~map); // quickly find the first zero bit if possible - #else - size_t bitidx = 0; // otherwise start at 0 - #endif - uintptr_t m = (mask << bitidx); // invariant: m == mask shifted by bitidx - - // scan linearly for a free range of zero bits - while(bitidx <= bitidx_max) { - if ((map & m) == 0) { // are the mask bits free at bitidx? - mi_assert_internal((m >> bitidx) == mask); // no overflow? - uintptr_t newmap = map | m; - mi_assert_internal((newmap^map) >> bitidx == mask); - if (!mi_atomic_cas_weak(®ion->map, newmap, map)) { // TODO: use strong cas here? - // no success, another thread claimed concurrently.. keep going - map = mi_atomic_read(®ion->map); - continue; - } - else { - // success, we claimed the bits - // now commit the block memory -- this can still fail - return mi_region_commit_blocks(region, idx, bitidx, blocks, - size, commit, allow_large, is_zero, p, id, tld); - } - } - else { - // on to the next bit range - #ifdef MI_HAVE_BITSCAN - size_t shift = (blocks == 1 ? 1 : mi_bsr(map & m) - bitidx + 1); - mi_assert_internal(shift > 0 && shift <= blocks); - #else - size_t shift = 1; - #endif - bitidx += shift; - m <<= shift; - } - } - // no error, but also no bits found - return true; -} - -// Try to allocate `blocks` in a `region` at `idx` of a given `size`. Does a quick check before trying to claim. -// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written -// if the blocks were successfully claimed so ensure they are initialized to NULL/0 before the call. -// (not being able to claim is not considered an error so check for `p != NULL` afterwards). -static bool mi_region_try_alloc_blocks(size_t idx, size_t blocks, size_t size, - bool* commit, bool* allow_large, bool* is_zero, - void** p, size_t* id, mi_os_tld_t* tld) -{ - // check if there are available blocks in the region.. - mi_assert_internal(idx < MI_REGION_MAX); - mem_region_t* region = ®ions[idx]; - uintptr_t m = mi_atomic_read_relaxed(®ion->map); - if (m != MI_REGION_MAP_FULL) { // some bits are zero - bool ok = (*commit || *allow_large); // committing or allow-large is always ok - if (!ok) { - // otherwise skip incompatible regions if possible. - // this is not guaranteed due to multiple threads allocating at the same time but - // that's ok. In secure mode, large is never allowed for any thread, so that works out; - // otherwise we might just not be able to reset/decommit individual pages sometimes. - mi_region_info_t info = mi_atomic_read_relaxed(®ion->info); - bool is_large; - bool is_committed; - void* start = mi_region_info_read(info,&is_large,&is_committed); - ok = (start == NULL || (*commit || !is_committed) || (*allow_large || !is_large)); // Todo: test with one bitmap operation? - } - if (ok) { - return mi_region_alloc_blocks(region, idx, blocks, size, commit, allow_large, is_zero, p, id, tld); - } - } - return true; // no error, but no success either -} - -/* ---------------------------------------------------------------------------- - Allocation ------------------------------------------------------------------------------*/ - -// Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`. -// (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`) -void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, - size_t* id, mi_os_tld_t* tld) -{ - mi_assert_internal(id != NULL && tld != NULL); - mi_assert_internal(size > 0); - *id = SIZE_MAX; - *is_zero = false; - bool default_large = false; - if (large==NULL) large = &default_large; // ensure `large != NULL` - - // use direct OS allocation for huge blocks or alignment (with `id = SIZE_MAX`) - if (size > MI_REGION_MAX_ALLOC_SIZE || alignment > MI_SEGMENT_ALIGN) { - *is_zero = true; - return _mi_os_alloc_aligned(mi_good_commit_size(size), alignment, *commit, large, tld); // round up size - } - - // always round size to OS page size multiple (so commit/decommit go over the entire range) - // TODO: use large OS page size here? - size = _mi_align_up(size, _mi_os_page_size()); - - // calculate the number of needed blocks - size_t blocks = mi_region_block_count(size); - mi_assert_internal(blocks > 0 && blocks <= 8*MI_INTPTR_SIZE); - - // find a range of free blocks - void* p = NULL; - size_t count = mi_atomic_read(®ions_count); - size_t idx = tld->region_idx; // start at 0 to reuse low addresses? Or, use tld->region_idx to reduce contention? - for (size_t visited = 0; visited < count; visited++, idx++) { - if (idx >= count) idx = 0; // wrap around - if (!mi_region_try_alloc_blocks(idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error - if (p != NULL) break; - } - - if (p == NULL) { - // no free range in existing regions -- try to extend beyond the count.. but at most 8 regions - for (idx = count; idx < mi_atomic_read_relaxed(®ions_count) + 8 && idx < MI_REGION_MAX; idx++) { - if (!mi_region_try_alloc_blocks(idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error - if (p != NULL) break; - } - } - - if (p == NULL) { - // we could not find a place to allocate, fall back to the os directly - _mi_warning_message("unable to allocate from region: size %zu\n", size); - *is_zero = true; - p = _mi_os_alloc_aligned(size, alignment, commit, large, tld); - } - else { - tld->region_idx = idx; // next start of search? currently not used as we use first-fit - } - - mi_assert_internal( p == NULL || (uintptr_t)p % alignment == 0); - return p; -} - - - -/* ---------------------------------------------------------------------------- -Free ------------------------------------------------------------------------------*/ - -// Free previously allocated memory with a given id. -void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { - mi_assert_internal(size > 0 && stats != NULL); - if (p==NULL) return; - if (size==0) return; - if (id == SIZE_MAX) { - // was a direct OS allocation, pass through - _mi_os_free(p, size, stats); - } - else { - // allocated in a region - mi_assert_internal(size <= MI_REGION_MAX_ALLOC_SIZE); if (size > MI_REGION_MAX_ALLOC_SIZE) return; - // we can align the size up to page size (as we allocate that way too) - // this ensures we fully commit/decommit/reset - size = _mi_align_up(size, _mi_os_page_size()); - size_t idx = (id / MI_REGION_MAP_BITS); - size_t bitidx = (id % MI_REGION_MAP_BITS); - size_t blocks = mi_region_block_count(size); - size_t mask = mi_region_block_mask(blocks, bitidx); - mi_assert_internal(idx < MI_REGION_MAX); if (idx >= MI_REGION_MAX) return; // or `abort`? - mem_region_t* region = ®ions[idx]; - mi_assert_internal((mi_atomic_read_relaxed(®ion->map) & mask) == mask ); // claimed? - mi_region_info_t info = mi_atomic_read(®ion->info); - bool is_large; - bool is_eager_committed; - void* start = mi_region_info_read(info,&is_large,&is_eager_committed); - mi_assert_internal(start != NULL); - void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE); - mi_assert_internal(blocks_start == p); // not a pointer in our area? - mi_assert_internal(bitidx + blocks <= MI_REGION_MAP_BITS); - if (blocks_start != p || bitidx + blocks > MI_REGION_MAP_BITS) return; // or `abort`? - - // decommit (or reset) the blocks to reduce the working set. - // TODO: implement delayed decommit/reset as these calls are too expensive - // if the memory is reused soon. - // reset: 10x slowdown on malloc-large, decommit: 17x slowdown on malloc-large - if (!is_large) { - if (mi_option_is_enabled(mi_option_segment_reset)) { - if (!is_eager_committed && // cannot reset large pages - (mi_option_is_enabled(mi_option_eager_commit) || // cannot reset halfway committed segments, use `option_page_reset` instead - mi_option_is_enabled(mi_option_reset_decommits))) // but we can decommit halfway committed segments - { - _mi_os_reset(p, size, stats); - //_mi_os_decommit(p, size, stats); // todo: and clear dirty bits? - } - } - } - if (!is_eager_committed) { - // adjust commit statistics as we commit again when re-using the same slot - _mi_stat_decrease(&stats->committed, mi_good_commit_size(size)); - } - - // TODO: should we free empty regions? currently only done _mi_mem_collect. - // this frees up virtual address space which might be useful on 32-bit systems? - - // and unclaim - uintptr_t map; - uintptr_t newmap; - do { - map = mi_atomic_read_relaxed(®ion->map); - newmap = map & ~mask; - } while (!mi_atomic_cas_weak(®ion->map, newmap, map)); - } -} - - -/* ---------------------------------------------------------------------------- - collection ------------------------------------------------------------------------------*/ -void _mi_mem_collect(mi_stats_t* stats) { - // free every region that has no segments in use. - for (size_t i = 0; i < regions_count; i++) { - mem_region_t* region = ®ions[i]; - if (mi_atomic_read_relaxed(®ion->map) == 0) { - // if no segments used, try to claim the whole region - uintptr_t m; - do { - m = mi_atomic_read_relaxed(®ion->map); - } while(m == 0 && !mi_atomic_cas_weak(®ion->map, ~((uintptr_t)0), 0 )); - if (m == 0) { - // on success, free the whole region (unless it was huge reserved) - bool is_eager_committed; - void* start = mi_region_info_read(mi_atomic_read(®ion->info), NULL, &is_eager_committed); - if (start != NULL && !_mi_os_is_huge_reserved(start)) { - _mi_os_free_ex(start, MI_REGION_SIZE, is_eager_committed, stats); - } - // and release - mi_atomic_write(®ion->info,0); - mi_atomic_write(®ion->map,0); - } - } - } -} - -/* ---------------------------------------------------------------------------- - Other ------------------------------------------------------------------------------*/ - -bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats) { - return _mi_os_commit(p, size, is_zero, stats); -} - -bool _mi_mem_decommit(void* p, size_t size, mi_stats_t* stats) { - return _mi_os_decommit(p, size, stats); -} - -bool _mi_mem_reset(void* p, size_t size, mi_stats_t* stats) { - return _mi_os_reset(p, size, stats); -} - -bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats) { - return _mi_os_unreset(p, size, is_zero, stats); -} - -bool _mi_mem_protect(void* p, size_t size) { - return _mi_os_protect(p, size); -} - -bool _mi_mem_unprotect(void* p, size_t size) { - return _mi_os_unprotect(p, size); -} diff --git a/src/options.c b/src/options.c index 0bee74e0..af051aa2 100644 --- a/src/options.c +++ b/src/options.c @@ -28,7 +28,7 @@ int mi_version(void) mi_attr_noexcept { // -------------------------------------------------------- // Options -// These can be accessed by multiple threads and may be +// These can be accessed by multiple threads and may be // concurrently initialized, but an initializing data race // is ok since they resolve to the same value. // -------------------------------------------------------- @@ -56,22 +56,25 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(verbose) }, // the following options are experimental and not all combinations make sense. - { 1, UNINIT, MI_OPTION(eager_commit) }, // note: needs to be on when eager_region_commit is enabled - #ifdef _WIN32 // and BSD? - { 0, UNINIT, MI_OPTION(eager_region_commit) }, // don't commit too eagerly on windows (just for looks...) + { 1, UNINIT, MI_OPTION(eager_commit) }, // commit on demand + #if defined(_WIN32) || (MI_INTPTR_SIZE <= 4) // and other OS's without overcommit? + { 0, UNINIT, MI_OPTION(eager_region_commit) }, + { 1, UNINIT, MI_OPTION(reset_decommits) }, // reset decommits memory #else { 1, UNINIT, MI_OPTION(eager_region_commit) }, + { 0, UNINIT, MI_OPTION(reset_decommits) }, // reset uses MADV_FREE/MADV_DONTNEED #endif { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread - { 0, UNINIT, MI_OPTION(page_reset) }, - { 0, UNINIT, MI_OPTION(cache_reset) }, - { 0, UNINIT, MI_OPTION(reset_decommits) }, // note: cannot enable this if secure is on - { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed + { 1, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free + { 0, UNINIT, MI_OPTION(abandoned_page_reset) },// reset free page memory when a thread terminates { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) + { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed + { 100, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds + { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose - { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output + { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output }; static void mi_option_init(mi_option_desc_t* desc); @@ -96,7 +99,7 @@ long mi_option_get(mi_option_t option) { mi_option_desc_t* desc = &options[option]; mi_assert(desc->option == option); // index should match the option if (mi_unlikely(desc->init == UNINIT)) { - mi_option_init(desc); + mi_option_init(desc); } return desc->value; } @@ -138,9 +141,10 @@ void mi_option_disable(mi_option_t option) { } -static void mi_out_stderr(const char* msg) { +static void mi_out_stderr(const char* msg, void* arg) { + UNUSED(arg); #ifdef _WIN32 - // on windows with redirection, the C runtime cannot handle locale dependent output + // on windows with redirection, the C runtime cannot handle locale dependent output // after the main thread closes so we use direct console output. if (!_mi_preloading()) { _cputs(msg); } #else @@ -158,13 +162,14 @@ static void mi_out_stderr(const char* msg) { static char out_buf[MI_MAX_DELAY_OUTPUT+1]; static _Atomic(uintptr_t) out_len; -static void mi_out_buf(const char* msg) { +static void mi_out_buf(const char* msg, void* arg) { + UNUSED(arg); if (msg==NULL) return; if (mi_atomic_read_relaxed(&out_len)>=MI_MAX_DELAY_OUTPUT) return; size_t n = strlen(msg); if (n==0) return; // claim space - uintptr_t start = mi_atomic_addu(&out_len, n); + uintptr_t start = mi_atomic_add(&out_len, n); if (start >= MI_MAX_DELAY_OUTPUT) return; // check bound if (start+n >= MI_MAX_DELAY_OUTPUT) { @@ -173,25 +178,25 @@ static void mi_out_buf(const char* msg) { memcpy(&out_buf[start], msg, n); } -static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf) { +static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf, void* arg) { if (out==NULL) return; // claim (if `no_more_buf == true`, no more output will be added after this point) - size_t count = mi_atomic_addu(&out_len, (no_more_buf ? MI_MAX_DELAY_OUTPUT : 1)); + size_t count = mi_atomic_add(&out_len, (no_more_buf ? MI_MAX_DELAY_OUTPUT : 1)); // and output the current contents if (count>MI_MAX_DELAY_OUTPUT) count = MI_MAX_DELAY_OUTPUT; out_buf[count] = 0; - out(out_buf); + out(out_buf,arg); if (!no_more_buf) { - out_buf[count] = '\n'; // if continue with the buffer, insert a newline + out_buf[count] = '\n'; // if continue with the buffer, insert a newline } } // Once this module is loaded, switch to this routine // which outputs to stderr and the delayed output buffer. -static void mi_out_buf_stderr(const char* msg) { - mi_out_stderr(msg); - mi_out_buf(msg); +static void mi_out_buf_stderr(const char* msg, void* arg) { + mi_out_stderr(msg,arg); + mi_out_buf(msg,arg); } @@ -204,60 +209,65 @@ static void mi_out_buf_stderr(const char* msg) { // For now, don't register output from multiple threads. #pragma warning(suppress:4180) static mi_output_fun* volatile mi_out_default; // = NULL +static volatile _Atomic(void*) mi_out_arg; // = NULL -static mi_output_fun* mi_out_get_default(void) { +static mi_output_fun* mi_out_get_default(void** parg) { + if (parg != NULL) { *parg = mi_atomic_read_ptr(void,&mi_out_arg); } mi_output_fun* out = mi_out_default; return (out == NULL ? &mi_out_buf : out); } -void mi_register_output(mi_output_fun* out) mi_attr_noexcept { +void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept { mi_out_default = (out == NULL ? &mi_out_stderr : out); // stop using the delayed output buffer - if (out!=NULL) mi_out_buf_flush(out,true); // output all the delayed output now + mi_atomic_write_ptr(void,&mi_out_arg, arg); + if (out!=NULL) mi_out_buf_flush(out,true,arg); // output all the delayed output now } // add stderr to the delayed output after the module is loaded static void mi_add_stderr_output() { - mi_out_buf_flush(&mi_out_stderr, false); // flush current contents to stderr - mi_out_default = &mi_out_buf_stderr; // and add stderr to the delayed output + mi_assert_internal(mi_out_default == NULL); + mi_out_buf_flush(&mi_out_stderr, false, NULL); // flush current contents to stderr + mi_out_default = &mi_out_buf_stderr; // and add stderr to the delayed output } // -------------------------------------------------------- // Messages, all end up calling `_mi_fputs`. // -------------------------------------------------------- -#define MAX_ERROR_COUNT (10) static volatile _Atomic(uintptr_t) error_count; // = 0; // when MAX_ERROR_COUNT stop emitting errors and warnings // When overriding malloc, we may recurse into mi_vfprintf if an allocation // inside the C runtime causes another message. static mi_decl_thread bool recurse = false; -void _mi_fputs(mi_output_fun* out, const char* prefix, const char* message) { +void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message) { if (recurse) return; - if (out==NULL || (FILE*)out==stdout || (FILE*)out==stderr) out = mi_out_get_default(); + if (out==NULL || (FILE*)out==stdout || (FILE*)out==stderr) { // TODO: use mi_out_stderr for stderr? + out = mi_out_get_default(&arg); + } recurse = true; - if (prefix != NULL) out(prefix); - out(message); + if (prefix != NULL) out(prefix,arg); + out(message,arg); recurse = false; return; } // Define our own limited `fprintf` that avoids memory allocation. // We do this using `snprintf` with a limited buffer. -static void mi_vfprintf( mi_output_fun* out, const char* prefix, const char* fmt, va_list args ) { +static void mi_vfprintf( mi_output_fun* out, void* arg, const char* prefix, const char* fmt, va_list args ) { char buf[512]; if (fmt==NULL) return; if (recurse) return; recurse = true; vsnprintf(buf,sizeof(buf)-1,fmt,args); recurse = false; - _mi_fputs(out,prefix,buf); + _mi_fputs(out,arg,prefix,buf); } -void _mi_fprintf( mi_output_fun* out, const char* fmt, ... ) { +void _mi_fprintf( mi_output_fun* out, void* arg, const char* fmt, ... ) { va_list args; va_start(args,fmt); - mi_vfprintf(out,NULL,fmt,args); + mi_vfprintf(out,arg,NULL,fmt,args); va_end(args); } @@ -265,7 +275,7 @@ void _mi_trace_message(const char* fmt, ...) { if (mi_option_get(mi_option_verbose) <= 1) return; // only with verbose level 2 or higher va_list args; va_start(args, fmt); - mi_vfprintf(NULL, "mimalloc: ", fmt, args); + mi_vfprintf(NULL, NULL, "mimalloc: ", fmt, args); va_end(args); } @@ -273,18 +283,14 @@ void _mi_verbose_message(const char* fmt, ...) { if (!mi_option_is_enabled(mi_option_verbose)) return; va_list args; va_start(args,fmt); - mi_vfprintf(NULL, "mimalloc: ", fmt, args); + mi_vfprintf(NULL, NULL, "mimalloc: ", fmt, args); va_end(args); } -void _mi_error_message(const char* fmt, ...) { +static void mi_show_error_message(const char* fmt, va_list args) { if (!mi_option_is_enabled(mi_option_show_errors) && !mi_option_is_enabled(mi_option_verbose)) return; if (mi_atomic_increment(&error_count) > mi_max_error_count) return; - va_list args; - va_start(args,fmt); - mi_vfprintf(NULL, "mimalloc: error: ", fmt, args); - va_end(args); - mi_assert(false); + mi_vfprintf(NULL, NULL, "mimalloc: error: ", fmt, args); } void _mi_warning_message(const char* fmt, ...) { @@ -292,26 +298,52 @@ void _mi_warning_message(const char* fmt, ...) { if (mi_atomic_increment(&error_count) > mi_max_error_count) return; va_list args; va_start(args,fmt); - mi_vfprintf(NULL, "mimalloc: warning: ", fmt, args); + mi_vfprintf(NULL, NULL, "mimalloc: warning: ", fmt, args); va_end(args); } #if MI_DEBUG void _mi_assert_fail(const char* assertion, const char* fname, unsigned line, const char* func ) { - _mi_fprintf(NULL,"mimalloc: assertion failed: at \"%s\":%u, %s\n assertion: \"%s\"\n", fname, line, (func==NULL?"":func), assertion); + _mi_fprintf(NULL, NULL, "mimalloc: assertion failed: at \"%s\":%u, %s\n assertion: \"%s\"\n", fname, line, (func==NULL?"":func), assertion); abort(); } #endif -mi_attr_noreturn void _mi_fatal_error(const char* fmt, ...) { +// -------------------------------------------------------- +// Errors +// -------------------------------------------------------- + +static mi_error_fun* volatile mi_error_handler; // = NULL +static volatile _Atomic(void*) mi_error_arg; // = NULL + +static void mi_error_default(int err) { + UNUSED(err); +#if (MI_SECURE>0) + if (err==EFAULT) { // abort on serious errors in secure mode (corrupted meta-data) + abort(); + } +#endif +} + +void mi_register_error(mi_error_fun* fun, void* arg) { + mi_error_handler = fun; // can be NULL + mi_atomic_write_ptr(void,&mi_error_arg, arg); +} + +void _mi_error_message(int err, const char* fmt, ...) { + // show detailed error message va_list args; va_start(args, fmt); - mi_vfprintf(NULL, "mimalloc: fatal: ", fmt, args); + mi_show_error_message(fmt, args); va_end(args); - #if (MI_SECURE>=0) - abort(); - #endif + // and call the error handler which may abort (or return normally) + if (mi_error_handler != NULL) { + mi_error_handler(err, mi_atomic_read_ptr(void,&mi_error_arg)); + } + else { + mi_error_default(err); + } } // -------------------------------------------------------- @@ -339,7 +371,7 @@ static void mi_strlcat(char* dest, const char* src, size_t dest_size) { #include static bool mi_getenv(const char* name, char* result, size_t result_size) { result[0] = 0; - size_t len = GetEnvironmentVariableA(name, result, (DWORD)result_size); + size_t len = GetEnvironmentVariableA(name, result, (DWORD)result_size); return (len > 0 && len < result_size); } #else @@ -365,7 +397,11 @@ static bool mi_getenv(const char* name, char* result, size_t result_size) { } } #endif -static void mi_option_init(mi_option_desc_t* desc) { +static void mi_option_init(mi_option_desc_t* desc) { + #ifndef _WIN32 + // cannot call getenv() when still initializing the C runtime. + if (_mi_preloading()) return; + #endif // Read option value from the environment char buf[64+1]; mi_strlcpy(buf, "mimalloc_", sizeof(buf)); diff --git a/src/os.c b/src/os.c index d5d734fe..b8dfaa70 100644 --- a/src/os.c +++ b/src/os.c @@ -13,7 +13,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc-atomic.h" #include // strerror -#include + #if defined(_WIN32) #include @@ -36,8 +36,6 @@ terms of the MIT license. A copy of the license can be found in the file large OS pages (if MIMALLOC_LARGE_OS_PAGES is true). ----------------------------------------------------------- */ bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); -bool _mi_os_is_huge_reserved(void* p); -void* _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment); static void* mi_align_up_ptr(void* p, size_t alignment) { return (void*)_mi_align_up((uintptr_t)p, alignment); @@ -150,7 +148,7 @@ void _mi_os_init(void) { FreeLibrary(hDll); } hDll = LoadLibrary(TEXT("ntdll.dll")); - if (hDll != NULL) { + if (hDll != NULL) { pNtAllocateVirtualMemoryEx = (PNtAllocateVirtualMemoryEx)(void (*)(void))GetProcAddress(hDll, "NtAllocateVirtualMemoryEx"); FreeLibrary(hDll); } @@ -171,9 +169,7 @@ void _mi_os_init() { os_page_size = (size_t)result; os_alloc_granularity = os_page_size; } - if (mi_option_is_enabled(mi_option_large_os_pages)) { - large_os_page_size = (1UL << 21); // 2MiB - } + large_os_page_size = 2*MiB; // TODO: can we query the OS for this? } #endif @@ -184,7 +180,7 @@ void _mi_os_init() { static bool mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats_t* stats) { - if (addr == NULL || size == 0 || _mi_os_is_huge_reserved(addr)) return true; + if (addr == NULL || size == 0) return true; // || _mi_os_is_huge_reserved(addr) bool err = false; #if defined(_WIN32) err = (VirtualFree(addr, 0, MEM_RELEASE) == 0); @@ -209,31 +205,6 @@ static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size); #ifdef _WIN32 static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment, DWORD flags) { -#if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) - // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages - if ((size % ((uintptr_t)1 << 30)) == 0 /* 1GiB multiple */ - && (flags & MEM_LARGE_PAGES) != 0 && (flags & MEM_COMMIT) != 0 && (flags & MEM_RESERVE) != 0 - && (addr != NULL || try_alignment == 0 || try_alignment % _mi_os_page_size() == 0) - && pNtAllocateVirtualMemoryEx != NULL) - { - #ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE - #define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE (0x10) - #endif - MEM_EXTENDED_PARAMETER param = { 0, 0 }; - param.Type = 5; // == MemExtendedParameterAttributeFlags; - param.ULong64 = MEM_EXTENDED_PARAMETER_NONPAGED_HUGE; - SIZE_T psize = size; - void* base = addr; - NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, ¶m, 1); - if (err == 0) { - return base; - } - else { - // else fall back to regular large OS pages - _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error 0x%lx)\n", err); - } - } -#endif #if (MI_INTPTR_SIZE >= 8) // on 64-bit systems, try to use the virtual address area after 4TiB for 4MiB aligned allocations void* hint; @@ -327,7 +298,10 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro #if !defined(MAP_ANONYMOUS) #define MAP_ANONYMOUS MAP_ANON #endif - int flags = MAP_PRIVATE | MAP_ANONYMOUS; + #if !defined(MAP_NORESERVE) + #define MAP_NORESERVE 0 + #endif + int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE; int fd = -1; #if defined(MAP_ALIGNED) // BSD if (try_alignment > 0) { @@ -357,7 +331,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro mi_atomic_cas_weak(&large_page_try_ok, try_ok - 1, try_ok); } else { - int lflags = flags; + int lflags = flags & ~MAP_NORESERVE; // using NORESERVE on huge pages seems to fail on Linux int lfd = fd; #ifdef MAP_ALIGNED_SUPER lflags |= MAP_ALIGNED_SUPER; @@ -366,7 +340,8 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro lflags |= MAP_HUGETLB; #endif #ifdef MAP_HUGE_1GB - if ((size % ((uintptr_t)1 << 30)) == 0) { + static bool mi_huge_pages_available = true; + if ((size % GiB) == 0 && mi_huge_pages_available) { lflags |= MAP_HUGE_1GB; } else @@ -385,6 +360,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd); #ifdef MAP_HUGE_1GB if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) { + mi_huge_pages_available = false; // don't try huge 1GiB pages again _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error %i)\n", errno); lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB); p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd); @@ -402,10 +378,10 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, flags, fd); #if defined(MADV_HUGEPAGE) // Many Linux systems don't allow MAP_HUGETLB but they support instead - // transparent huge pages (TPH). It is not required to call `madvise` with MADV_HUGE + // transparent huge pages (THP). It is not required to call `madvise` with MADV_HUGE // though since properly aligned allocations will already use large pages if available // in that case -- in particular for our large regions (in `memory.c`). - // However, some systems only allow TPH if called with explicit `madvise`, so + // However, some systems only allow THP if called with explicit `madvise`, so // when large OS pages are enabled for mimalloc, we call `madvice` anyways. if (allow_large && use_large_os_page(size, try_alignment)) { if (madvise(p, size, MADV_HUGEPAGE) == 0) { @@ -421,20 +397,20 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro // On 64-bit systems, we can do efficient aligned allocation by using // the 4TiB to 30TiB area to allocate them. #if (MI_INTPTR_SIZE >= 8) && (defined(_WIN32) || (defined(MI_OS_USE_MMAP) && !defined(MAP_ALIGNED))) -static volatile _Atomic(intptr_t) aligned_base; +static volatile mi_decl_cache_align _Atomic(uintptr_t) aligned_base; // Return a 4MiB aligned address that is probably available static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { if (try_alignment == 0 || try_alignment > MI_SEGMENT_SIZE) return NULL; if ((size%MI_SEGMENT_SIZE) != 0) return NULL; - intptr_t hint = mi_atomic_add(&aligned_base, size); + uintptr_t hint = mi_atomic_add(&aligned_base, size); if (hint == 0 || hint > ((intptr_t)30<<40)) { // try to wrap around after 30TiB (area after 32TiB is used for huge OS pages) - intptr_t init = ((intptr_t)4 << 40); // start at 4TiB area + uintptr_t init = ((uintptr_t)4 << 40); // start at 4TiB area #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode - uintptr_t r = _mi_random_init((uintptr_t)&mi_os_get_aligned_hint ^ hint); - init = init + (MI_SEGMENT_SIZE * ((r>>17) & 0xFFFF)); // (randomly 0-64k)*4MiB == 0 to 256GiB + uintptr_t r = _mi_heap_random_next(mi_get_default_heap()); + init = init + (MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)); // (randomly 20 bits)*4MiB == 0 to 4TiB #endif - mi_atomic_cas_strong(mi_atomic_cast(uintptr_t, &aligned_base), init, hint + size); + mi_atomic_cas_strong(&aligned_base, init, hint + size); hint = mi_atomic_add(&aligned_base, size); // this may still give 0 or > 30TiB but that is ok, it is a hint after all } if (hint%try_alignment != 0) return NULL; @@ -620,15 +596,27 @@ static void* mi_os_page_align_area_conservative(void* addr, size_t size, size_t* return mi_os_page_align_areax(true, addr, size, newsize); } +static void mi_mprotect_hint(int err) { +#if defined(MI_OS_USE_MMAP) && (MI_SECURE>=2) // guard page around every mimalloc page + if (err == ENOMEM) { + _mi_warning_message("the previous warning may have been caused by a low memory map limit.\n" + " On Linux this is controlled by the vm.max_map_count. For example:\n" + " > sudo sysctl -w vm.max_map_count=262144\n"); + } +#else + UNUSED(err); +#endif +} + // Commit/Decommit memory. // Usuelly commit is aligned liberal, while decommit is aligned conservative. // (but not for the reset version where we want commit to be conservative as well) static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservative, bool* is_zero, mi_stats_t* stats) { // page align in the range, commit liberally, decommit conservative - *is_zero = false; + if (is_zero != NULL) { *is_zero = false; } size_t csize; void* start = mi_os_page_align_areax(conservative, addr, size, &csize); - if (csize == 0 || _mi_os_is_huge_reserved(addr)) return true; + if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr)) int err = 0; if (commit) { _mi_stat_increase(&stats->committed, csize); @@ -651,31 +639,42 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ } #elif defined(__wasi__) // WebAssembly guests can't control memory protection + #elif defined(MAP_FIXED) + if (!commit) { + // use mmap with MAP_FIXED to discard the existing memory (and reduce commit charge) + void* p = mmap(start, csize, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), -1, 0); + if (p != start) { err = errno; } + } + else { + // for commit, just change the protection + err = mprotect(start, csize, (PROT_READ | PROT_WRITE)); + if (err != 0) { err = errno; } + } #else err = mprotect(start, csize, (commit ? (PROT_READ | PROT_WRITE) : PROT_NONE)); if (err != 0) { err = errno; } #endif if (err != 0) { - _mi_warning_message("commit/decommit error: start: 0x%p, csize: 0x%x, err: %i\n", start, csize, err); + _mi_warning_message("%s error: start: %p, csize: 0x%x, err: %i\n", commit ? "commit" : "decommit", start, csize, err); + mi_mprotect_hint(err); } mi_assert_internal(err == 0); return (err == 0); } bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) { - return mi_os_commitx(addr, size, true, false /* conservative? */, is_zero, stats); + return mi_os_commitx(addr, size, true, false /* liberal */, is_zero, stats); } bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats) { bool is_zero; - return mi_os_commitx(addr, size, false, true /* conservative? */, &is_zero, stats); + return mi_os_commitx(addr, size, false, true /* conservative */, &is_zero, stats); } bool _mi_os_commit_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) { - return mi_os_commitx(addr, size, true, true /* conservative? */, is_zero, stats); + return mi_os_commitx(addr, size, true, true /* conservative */, is_zero, stats); } - // Signal to the OS that the address range is no longer in use // but may be used later again. This will release physical memory // pages and reduce swapping while keeping the memory committed. @@ -684,7 +683,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) // page align conservatively within the range size_t csize; void* start = mi_os_page_align_area_conservative(addr, size, &csize); - if (csize == 0 || _mi_os_is_huge_reserved(addr)) return true; + if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr) if (reset) _mi_stat_increase(&stats->reset, csize); else _mi_stat_decrease(&stats->reset, csize); if (!reset) return true; // nothing to do on unreset! @@ -720,7 +719,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) int err = madvise(start, csize, MADV_DONTNEED); #endif if (err != 0) { - _mi_warning_message("madvise reset error: start: 0x%p, csize: 0x%x, errno: %i\n", start, csize, errno); + _mi_warning_message("madvise reset error: start: %p, csize: 0x%x, errno: %i\n", start, csize, errno); } //mi_assert(err == 0); if (err != 0) return false; @@ -734,7 +733,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) // We page align to a conservative area inside the range to reset. bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) { if (mi_option_is_enabled(mi_option_reset_decommits)) { - return _mi_os_decommit(addr,size,stats); + return _mi_os_decommit(addr, size, stats); } else { return mi_os_resetx(addr, size, true, stats); @@ -758,9 +757,11 @@ static bool mi_os_protectx(void* addr, size_t size, bool protect) { size_t csize = 0; void* start = mi_os_page_align_area_conservative(addr, size, &csize); if (csize == 0) return false; + /* if (_mi_os_is_huge_reserved(addr)) { _mi_warning_message("cannot mprotect memory allocated in huge OS pages\n"); } + */ int err = 0; #ifdef _WIN32 DWORD oldprotect = 0; @@ -773,7 +774,8 @@ static bool mi_os_protectx(void* addr, size_t size, bool protect) { if (err != 0) { err = errno; } #endif if (err != 0) { - _mi_warning_message("mprotect error: start: 0x%p, csize: 0x%x, err: %i\n", start, csize, err); + _mi_warning_message("mprotect error: start: %p, csize: 0x%x, err: %i\n", start, csize, err); + mi_mprotect_hint(err); } return (err == 0); } @@ -810,140 +812,267 @@ bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize, mi_stats_t* stats) { /* ---------------------------------------------------------------------------- -Support for huge OS pages (1Gib) that are reserved up-front and never -released. Only regions are allocated in here (see `memory.c`) so the memory -will be reused. +Support for allocating huge OS pages (1Gib) that are reserved up-front +and possibly associated with a specific NUMA node. (use `numa_node>=0`) -----------------------------------------------------------------------------*/ -#define MI_HUGE_OS_PAGE_SIZE ((size_t)1 << 30) // 1GiB +#define MI_HUGE_OS_PAGE_SIZE (GiB) -typedef struct mi_huge_info_s { - volatile _Atomic(void*) start; // start of huge page area (32TiB) - volatile _Atomic(size_t) reserved; // total reserved size - volatile _Atomic(size_t) used; // currently allocated -} mi_huge_info_t; - -static mi_huge_info_t os_huge_reserved = { NULL, 0, ATOMIC_VAR_INIT(0) }; - -bool _mi_os_is_huge_reserved(void* p) { - return (mi_atomic_read_ptr(&os_huge_reserved.start) != NULL && - p >= mi_atomic_read_ptr(&os_huge_reserved.start) && - (uint8_t*)p < (uint8_t*)mi_atomic_read_ptr(&os_huge_reserved.start) + mi_atomic_read(&os_huge_reserved.reserved)); -} - -void* _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment) +#if defined(WIN32) && (MI_INTPTR_SIZE >= 8) +static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) { - // only allow large aligned allocations (e.g. regions) - if (size < MI_SEGMENT_SIZE || (size % MI_SEGMENT_SIZE) != 0) return NULL; - if (try_alignment > MI_SEGMENT_SIZE) return NULL; - if (mi_atomic_read_ptr(&os_huge_reserved.start)==NULL) return NULL; - if (mi_atomic_read(&os_huge_reserved.used) >= mi_atomic_read(&os_huge_reserved.reserved)) return NULL; // already full + mi_assert_internal(size%GiB == 0); + mi_assert_internal(addr != NULL); + const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE; - // always aligned - mi_assert_internal(mi_atomic_read(&os_huge_reserved.used) % MI_SEGMENT_SIZE == 0 ); - mi_assert_internal( (uintptr_t)mi_atomic_read_ptr(&os_huge_reserved.start) % MI_SEGMENT_SIZE == 0 ); + mi_win_enable_large_os_pages(); - // try to reserve space - size_t base = mi_atomic_addu( &os_huge_reserved.used, size ); - if ((base + size) > os_huge_reserved.reserved) { - // "free" our over-allocation - mi_atomic_subu( &os_huge_reserved.used, size); - return NULL; - } - - // success! - uint8_t* p = (uint8_t*)mi_atomic_read_ptr(&os_huge_reserved.start) + base; - mi_assert_internal( (uintptr_t)p % MI_SEGMENT_SIZE == 0 ); - return p; -} - -/* -static void mi_os_free_huge_reserved() { - uint8_t* addr = os_huge_reserved.start; - size_t total = os_huge_reserved.reserved; - os_huge_reserved.reserved = 0; - os_huge_reserved.start = NULL; - for( size_t current = 0; current < total; current += MI_HUGE_OS_PAGE_SIZE) { - _mi_os_free(addr + current, MI_HUGE_OS_PAGE_SIZE, &_mi_stats_main); - } -} -*/ - -#if !(MI_INTPTR_SIZE >= 8 && (defined(_WIN32) || defined(MI_OS_USE_MMAP))) -int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept { - UNUSED(pages); UNUSED(max_secs); - if (pages_reserved != NULL) *pages_reserved = 0; - return ENOMEM; -} -#else -int mi_reserve_huge_os_pages( size_t pages, double max_secs, size_t* pages_reserved ) mi_attr_noexcept -{ - if (pages_reserved != NULL) *pages_reserved = 0; - if (max_secs==0) return ETIMEDOUT; // timeout - if (pages==0) return 0; // ok - if (!mi_atomic_cas_ptr_strong(&os_huge_reserved.start,(void*)1,NULL)) return ETIMEDOUT; // already reserved - - // Set the start address after the 32TiB area - uint8_t* start = (uint8_t*)((uintptr_t)32 << 40); // 32TiB virtual start address - #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode - uintptr_t r = _mi_random_init((uintptr_t)&mi_reserve_huge_os_pages); - start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF)); // (randomly 0-1024)*1GiB == 0 to 1TiB - #endif - - // Allocate one page at the time but try to place them contiguously - // We allocate one page at the time to be able to abort if it takes too long - double start_t = _mi_clock_start(); - uint8_t* addr = start; // current top of the allocations - for (size_t page = 0; page < pages; page++, addr += MI_HUGE_OS_PAGE_SIZE ) { - // allocate a page - void* p = NULL; - bool is_large = true; - #ifdef _WIN32 - if (page==0) { mi_win_enable_large_os_pages(); } - p = mi_win_virtual_alloc(addr, MI_HUGE_OS_PAGE_SIZE, 0, MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE, true, true, &is_large); - #elif defined(MI_OS_USE_MMAP) - p = mi_unix_mmap(addr, MI_HUGE_OS_PAGE_SIZE, 0, PROT_READ | PROT_WRITE, true, true, &is_large); - #else - // always fail + #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) + MEM_EXTENDED_PARAMETER params[3] = { {0,0},{0,0},{0,0} }; + // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages + static bool mi_huge_pages_available = true; + if (pNtAllocateVirtualMemoryEx != NULL && mi_huge_pages_available) { + #ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE + #define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE (0x10) #endif - - // Did we succeed at a contiguous address? - if (p != addr) { - // no success, issue a warning and return with an error - if (p != NULL) { - _mi_warning_message("could not allocate contiguous huge page %zu at 0x%p\n", page, addr); - _mi_os_free(p, MI_HUGE_OS_PAGE_SIZE, &_mi_stats_main ); - } - else { - #ifdef _WIN32 - int err = GetLastError(); - #else - int err = errno; - #endif - _mi_warning_message("could not allocate huge page %zu at 0x%p, error: %i\n", page, addr, err); - } - return ENOMEM; + params[0].Type = 5; // == MemExtendedParameterAttributeFlags; + params[0].ULong64 = MEM_EXTENDED_PARAMETER_NONPAGED_HUGE; + ULONG param_count = 1; + if (numa_node >= 0) { + param_count++; + params[1].Type = MemExtendedParameterNumaNode; + params[1].ULong = (unsigned)numa_node; } - // success, record it - if (page==0) { - mi_atomic_write_ptr(&os_huge_reserved.start, addr); // don't switch the order of these writes - mi_atomic_write(&os_huge_reserved.reserved, MI_HUGE_OS_PAGE_SIZE); + SIZE_T psize = size; + void* base = addr; + NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, params, param_count); + if (err == 0 && base != NULL) { + return base; } else { - mi_atomic_addu(&os_huge_reserved.reserved,MI_HUGE_OS_PAGE_SIZE); - } - _mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE); - _mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE); - if (pages_reserved != NULL) { *pages_reserved = page + 1; } - - // check for timeout - double elapsed = _mi_clock_end(start_t); - if (elapsed > max_secs) return ETIMEDOUT; - if (page >= 1) { - double estimate = ((elapsed / (double)(page+1)) * (double)pages); - if (estimate > 1.5*max_secs) return ETIMEDOUT; // seems like we are going to timeout + // fall back to regular large pages + mi_huge_pages_available = false; // don't try further huge pages + _mi_warning_message("unable to allocate using huge (1GiB) pages, trying large (2MiB) pages instead (status 0x%lx)\n", err); } } - _mi_verbose_message("reserved %zu huge pages\n", pages); + // on modern Windows try use VirtualAlloc2 for numa aware large OS page allocation + if (pVirtualAlloc2 != NULL && numa_node >= 0) { + params[0].Type = MemExtendedParameterNumaNode; + params[0].ULong = (unsigned)numa_node; + return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1); + } + #endif + // otherwise use regular virtual alloc on older windows + return VirtualAlloc(addr, size, flags, PAGE_READWRITE); +} + +#elif defined(MI_OS_USE_MMAP) && (MI_INTPTR_SIZE >= 8) +#include +#ifndef MPOL_PREFERRED +#define MPOL_PREFERRED 1 +#endif +#if defined(SYS_mbind) +static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) { + return syscall(SYS_mbind, start, len, mode, nmask, maxnode, flags); +} +#else +static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) { + UNUSED(start); UNUSED(len); UNUSED(mode); UNUSED(nmask); UNUSED(maxnode); UNUSED(flags); return 0; } #endif +static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) { + mi_assert_internal(size%GiB == 0); + bool is_large = true; + void* p = mi_unix_mmap(addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); + if (p == NULL) return NULL; + if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes + uintptr_t numa_mask = (1UL << numa_node); + // TODO: does `mbind` work correctly for huge OS pages? should we + // use `set_mempolicy` before calling mmap instead? + // see: + long err = mi_os_mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); + if (err != 0) { + _mi_warning_message("failed to bind huge (1GiB) pages to NUMA node %d: %s\n", numa_node, strerror(errno)); + } + } + return p; +} +#else +static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) { + return NULL; +} +#endif + +#if (MI_INTPTR_SIZE >= 8) +// To ensure proper alignment, use our own area for huge OS pages +static mi_decl_cache_align _Atomic(uintptr_t) mi_huge_start; // = 0 + +// Claim an aligned address range for huge pages +static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { + if (total_size != NULL) *total_size = 0; + const size_t size = pages * MI_HUGE_OS_PAGE_SIZE; + + uintptr_t start = 0; + uintptr_t end = 0; + uintptr_t expected; + do { + start = expected = mi_atomic_read_relaxed(&mi_huge_start); + if (start == 0) { + // Initialize the start address after the 32TiB area + start = ((uintptr_t)32 << 40); // 32TiB virtual start address +#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode + uintptr_t r = _mi_heap_random_next(mi_get_default_heap()); + start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x0FFF)); // (randomly 12bits)*1GiB == between 0 to 4TiB +#endif + } + end = start + size; + mi_assert_internal(end % MI_SEGMENT_SIZE == 0); + } while (!mi_atomic_cas_strong(&mi_huge_start, end, expected)); + + if (total_size != NULL) *total_size = size; + return (uint8_t*)start; +} +#else +static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { + if (total_size != NULL) *total_size = 0; + return NULL; +} +#endif + +// Allocate MI_SEGMENT_SIZE aligned huge pages +void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize) { + if (psize != NULL) *psize = 0; + if (pages_reserved != NULL) *pages_reserved = 0; + size_t size = 0; + uint8_t* start = mi_os_claim_huge_pages(pages, &size); + if (start == NULL) return NULL; // or 32-bit systems + + // Allocate one page at the time but try to place them contiguously + // We allocate one page at the time to be able to abort if it takes too long + // or to at least allocate as many as available on the system. + mi_msecs_t start_t = _mi_clock_start(); + size_t page; + for (page = 0; page < pages; page++) { + // allocate a page + void* addr = start + (page * MI_HUGE_OS_PAGE_SIZE); + void* p = mi_os_alloc_huge_os_pagesx(addr, MI_HUGE_OS_PAGE_SIZE, numa_node); + + // Did we succeed at a contiguous address? + if (p != addr) { + // no success, issue a warning and break + if (p != NULL) { + _mi_warning_message("could not allocate contiguous huge page %zu at %p\n", page, addr); + _mi_os_free(p, MI_HUGE_OS_PAGE_SIZE, &_mi_stats_main); + } + break; + } + + // success, record it + _mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE); + _mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE); + + // check for timeout + if (max_msecs > 0) { + mi_msecs_t elapsed = _mi_clock_end(start_t); + if (page >= 1) { + mi_msecs_t estimate = ((elapsed / (page+1)) * pages); + if (estimate > 2*max_msecs) { // seems like we are going to timeout, break + elapsed = max_msecs + 1; + } + } + if (elapsed > max_msecs) { + _mi_warning_message("huge page allocation timed out\n"); + break; + } + } + } + mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size); + if (pages_reserved != NULL) *pages_reserved = page; + if (psize != NULL) *psize = page * MI_HUGE_OS_PAGE_SIZE; + return (page == 0 ? NULL : start); +} + +// free every huge page in a range individually (as we allocated per page) +// note: needed with VirtualAlloc but could potentially be done in one go on mmap'd systems. +void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) { + if (p==NULL || size==0) return; + uint8_t* base = (uint8_t*)p; + while (size >= MI_HUGE_OS_PAGE_SIZE) { + _mi_os_free(base, MI_HUGE_OS_PAGE_SIZE, stats); + size -= MI_HUGE_OS_PAGE_SIZE; + } +} + +/* ---------------------------------------------------------------------------- +Support NUMA aware allocation +-----------------------------------------------------------------------------*/ +#ifdef WIN32 +static size_t mi_os_numa_nodex() { + PROCESSOR_NUMBER pnum; + USHORT numa_node = 0; + GetCurrentProcessorNumberEx(&pnum); + GetNumaProcessorNodeEx(&pnum,&numa_node); + return numa_node; +} + +static size_t mi_os_numa_node_countx(void) { + ULONG numa_max = 0; + GetNumaHighestNodeNumber(&numa_max); + return (numa_max + 1); +} +#elif defined(__linux__) +#include // getcpu +#include // access + +static size_t mi_os_numa_nodex(void) { +#ifdef SYS_getcpu + unsigned long node = 0; + unsigned long ncpu = 0; + long err = syscall(SYS_getcpu, &ncpu, &node, NULL); + if (err != 0) return 0; + return node; +#else + return 0; +#endif +} +static size_t mi_os_numa_node_countx(void) { + char buf[128]; + unsigned node = 0; + for(node = 0; node < 256; node++) { + // enumerate node entries -- todo: it there a more efficient way to do this? (but ensure there is no allocation) + snprintf(buf, 127, "/sys/devices/system/node/node%u", node + 1); + if (access(buf,R_OK) != 0) break; + } + return (node+1); +} +#else +static size_t mi_os_numa_nodex(void) { + return 0; +} +static size_t mi_os_numa_node_countx(void) { + return 1; +} +#endif + +size_t _mi_numa_node_count = 0; // cache the node count + +size_t _mi_os_numa_node_count_get(void) { + if (mi_unlikely(_mi_numa_node_count <= 0)) { + long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly? + if (ncount <= 0) ncount = (long)mi_os_numa_node_countx(); // or detect dynamically + _mi_numa_node_count = (size_t)(ncount <= 0 ? 1 : ncount); + _mi_verbose_message("using %zd numa regions\n", _mi_numa_node_count); + } + mi_assert_internal(_mi_numa_node_count >= 1); + return _mi_numa_node_count; +} + +int _mi_os_numa_node_get(mi_os_tld_t* tld) { + UNUSED(tld); + size_t numa_count = _mi_os_numa_node_count(); + if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0 + // never more than the node count and >= 0 + size_t numa_node = mi_os_numa_nodex(); + if (numa_node >= numa_count) { numa_node = numa_node % numa_count; } + return (int)numa_node; +} diff --git a/src/page-queue.c b/src/page-queue.c index 95443a69..68e2aaa4 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -178,20 +178,20 @@ static bool mi_heap_contains_queue(const mi_heap_t* heap, const mi_page_queue_t* #endif static mi_page_queue_t* mi_page_queue_of(const mi_page_t* page) { - uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : _mi_bin(page->block_size)); - mi_heap_t* heap = page->heap; + uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : _mi_bin(page->xblock_size)); + mi_heap_t* heap = mi_page_heap(page); mi_assert_internal(heap != NULL && bin <= MI_BIN_FULL); mi_page_queue_t* pq = &heap->pages[bin]; - mi_assert_internal(bin >= MI_BIN_HUGE || page->block_size == pq->block_size); + mi_assert_internal(bin >= MI_BIN_HUGE || page->xblock_size == pq->block_size); mi_assert_expensive(mi_page_queue_contains(pq, page)); return pq; } static mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page) { - uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : _mi_bin(page->block_size)); + uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : _mi_bin(page->xblock_size)); mi_assert_internal(bin <= MI_BIN_FULL); mi_page_queue_t* pq = &heap->pages[bin]; - mi_assert_internal(mi_page_is_in_full(page) || page->block_size == pq->block_size); + mi_assert_internal(mi_page_is_in_full(page) || page->xblock_size == pq->block_size); return pq; } @@ -246,35 +246,35 @@ static bool mi_page_queue_is_empty(mi_page_queue_t* queue) { static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) { mi_assert_internal(page != NULL); mi_assert_expensive(mi_page_queue_contains(queue, page)); - mi_assert_internal(page->block_size == queue->block_size || (page->block_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue)) || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); + mi_assert_internal(page->xblock_size == queue->block_size || (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue)) || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); + mi_heap_t* heap = mi_page_heap(page); if (page->prev != NULL) page->prev->next = page->next; if (page->next != NULL) page->next->prev = page->prev; if (page == queue->last) queue->last = page->prev; if (page == queue->first) { queue->first = page->next; // update first - mi_heap_t* heap = page->heap; mi_assert_internal(mi_heap_contains_queue(heap, queue)); mi_heap_queue_first_update(heap,queue); } - page->heap->page_count--; + heap->page_count--; page->next = NULL; page->prev = NULL; - mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL); + // mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL); mi_page_set_in_full(page,false); } static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_t* page) { - mi_assert_internal(page->heap == NULL); + mi_assert_internal(mi_page_heap(page) == heap); mi_assert_internal(!mi_page_queue_contains(queue, page)); mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE); - mi_assert_internal(page->block_size == queue->block_size || - (page->block_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue)) || + mi_assert_internal(page->xblock_size == queue->block_size || + (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue)) || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); mi_page_set_in_full(page, mi_page_queue_is_full(queue)); - mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), heap); + // mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), heap); page->next = queue->first; page->prev = NULL; if (queue->first != NULL) { @@ -296,19 +296,19 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro mi_assert_internal(page != NULL); mi_assert_expensive(mi_page_queue_contains(from, page)); mi_assert_expensive(!mi_page_queue_contains(to, page)); - mi_assert_internal((page->block_size == to->block_size && page->block_size == from->block_size) || - (page->block_size == to->block_size && mi_page_queue_is_full(from)) || - (page->block_size == from->block_size && mi_page_queue_is_full(to)) || - (page->block_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(to)) || - (page->block_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_full(to))); + mi_assert_internal((page->xblock_size == to->block_size && page->xblock_size == from->block_size) || + (page->xblock_size == to->block_size && mi_page_queue_is_full(from)) || + (page->xblock_size == from->block_size && mi_page_queue_is_full(to)) || + (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(to)) || + (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_full(to))); + mi_heap_t* heap = mi_page_heap(page); if (page->prev != NULL) page->prev->next = page->next; if (page->next != NULL) page->next->prev = page->prev; if (page == from->last) from->last = page->prev; if (page == from->first) { from->first = page->next; // update first - mi_heap_t* heap = page->heap; mi_assert_internal(mi_heap_contains_queue(heap, from)); mi_heap_queue_first_update(heap, from); } @@ -316,14 +316,14 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro page->prev = to->last; page->next = NULL; if (to->last != NULL) { - mi_assert_internal(page->heap == to->last->heap); + mi_assert_internal(heap == mi_page_heap(to->last)); to->last->next = page; to->last = page; } else { to->first = page; to->last = page; - mi_heap_queue_first_update(page->heap, to); + mi_heap_queue_first_update(heap, to); } mi_page_set_in_full(page, mi_page_queue_is_full(to)); @@ -338,7 +338,7 @@ size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue // set append pages to new heap and count size_t count = 0; for (mi_page_t* page = append->first; page != NULL; page = page->next) { - mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), heap); + mi_page_set_heap(page,heap); count++; } diff --git a/src/page.c b/src/page.c index 437cd0a5..edbc7411 100644 --- a/src/page.c +++ b/src/page.c @@ -29,16 +29,17 @@ terms of the MIT license. A copy of the license can be found in the file ----------------------------------------------------------- */ // Index a block in a page -static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_start, size_t i) { +static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_start, size_t block_size, size_t i) { + UNUSED(page); mi_assert_internal(page != NULL); mi_assert_internal(i <= page->reserved); - return (mi_block_t*)((uint8_t*)page_start + (i * page->block_size)); + return (mi_block_t*)((uint8_t*)page_start + (i * block_size)); } -static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_stats_t* stats); +static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_tld_t* tld); +static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld); - -#if (MI_DEBUG>1) +#if (MI_DEBUG>=3) static size_t mi_page_list_count(mi_page_t* page, mi_block_t* head) { size_t count = 0; while (head != NULL) { @@ -69,13 +70,14 @@ static bool mi_page_list_is_valid(mi_page_t* page, mi_block_t* p) { } static bool mi_page_is_valid_init(mi_page_t* page) { - mi_assert_internal(page->block_size > 0); + mi_assert_internal(page->xblock_size > 0); mi_assert_internal(page->used <= page->capacity); mi_assert_internal(page->capacity <= page->reserved); + const size_t bsize = mi_page_block_size(page); mi_segment_t* segment = _mi_page_segment(page); uint8_t* start = _mi_page_start(segment,page,NULL); - mi_assert_internal(start == _mi_segment_page_start(segment,page,page->block_size,NULL)); + mi_assert_internal(start == _mi_segment_page_start(segment,page,bsize,NULL,NULL)); //mi_assert_internal(start + page->capacity*page->block_size == page->top); mi_assert_internal(mi_page_list_is_valid(page,page->free)); @@ -89,10 +91,10 @@ static bool mi_page_is_valid_init(mi_page_t* page) { } #endif - mi_block_t* tfree = mi_tf_block(page->thread_free); + mi_block_t* tfree = mi_page_thread_free(page); mi_assert_internal(mi_page_list_is_valid(page, tfree)); - size_t tfree_count = mi_page_list_count(page, tfree); - mi_assert_internal(tfree_count <= page->thread_freed + 1); + //size_t tfree_count = mi_page_list_count(page, tfree); + //mi_assert_internal(tfree_count <= page->thread_freed + 1); size_t free_count = mi_page_list_count(page, page->free) + mi_page_list_count(page, page->local_free); mi_assert_internal(page->used + free_count == page->capacity); @@ -103,42 +105,44 @@ static bool mi_page_is_valid_init(mi_page_t* page) { bool _mi_page_is_valid(mi_page_t* page) { mi_assert_internal(mi_page_is_valid_init(page)); #if MI_SECURE - mi_assert_internal(page->cookie != 0); + mi_assert_internal(page->key != 0); #endif - if (page->heap!=NULL) { + if (mi_page_heap(page)!=NULL) { mi_segment_t* segment = _mi_page_segment(page); - mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == page->heap->thread_id || segment->thread_id==0); + mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == mi_page_heap(page)->thread_id || segment->thread_id==0); if (segment->page_kind != MI_PAGE_HUGE) { mi_page_queue_t* pq = mi_page_queue_of(page); mi_assert_internal(mi_page_queue_contains(pq, page)); - mi_assert_internal(pq->block_size==page->block_size || page->block_size > MI_LARGE_OBJ_SIZE_MAX || mi_page_is_in_full(page)); - mi_assert_internal(mi_heap_contains_queue(page->heap,pq)); + mi_assert_internal(pq->block_size==mi_page_block_size(page) || mi_page_block_size(page) > MI_LARGE_OBJ_SIZE_MAX || mi_page_is_in_full(page)); + mi_assert_internal(mi_heap_contains_queue(mi_page_heap(page),pq)); } } return true; } #endif - -void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay ) { +void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never) { mi_thread_free_t tfree; mi_thread_free_t tfreex; - + mi_delayed_t old_delay; do { - tfreex = tfree = page->thread_free; - if (mi_unlikely(mi_tf_delayed(tfree) < MI_DELAYED_FREEING)) { - tfreex = mi_tf_set_delayed(tfree,delay); - } - else if (mi_unlikely(mi_tf_delayed(tfree) == MI_DELAYED_FREEING)) { + tfree = mi_atomic_read(&page->xthread_free); // note: must acquire as we can break this loop and not do a CAS + tfreex = mi_tf_set_delayed(tfree, delay); + old_delay = mi_tf_delayed(tfree); + if (mi_unlikely(old_delay == MI_DELAYED_FREEING)) { mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done. - continue; // and try again + // tfree = mi_tf_set_delayed(tfree, MI_NO_DELAYED_FREE); // will cause CAS to busy fail } - } - while((mi_tf_delayed(tfreex) != mi_tf_delayed(tfree)) && // avoid atomic operation if already equal - !mi_atomic_cas_weak(mi_atomic_cast(uintptr_t,&page->thread_free), tfreex, tfree)); + else if (delay == old_delay) { + break; // avoid atomic operation if already equal + } + else if (!override_never && old_delay == MI_NEVER_DELAYED_FREE) { + break; // leave never-delayed flag set + } + } while ((old_delay == MI_DELAYED_FREEING) || + !mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree)); } - /* ----------------------------------------------------------- Page collect the `local_free` and `thread_free` lists ----------------------------------------------------------- */ @@ -153,17 +157,17 @@ static void _mi_page_thread_free_collect(mi_page_t* page) mi_thread_free_t tfree; mi_thread_free_t tfreex; do { - tfree = page->thread_free; + tfree = mi_atomic_read_relaxed(&page->xthread_free); head = mi_tf_block(tfree); tfreex = mi_tf_set_block(tfree,NULL); - } while (!mi_atomic_cas_weak(mi_atomic_cast(uintptr_t,&page->thread_free), tfreex, tfree)); + } while (!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree)); // return if the list is empty if (head == NULL) return; // find the tail -- also to get a proper count (without data races) - uintptr_t max_count = page->capacity; // cannot collect more than capacity - uintptr_t count = 1; + uint32_t max_count = page->capacity; // cannot collect more than capacity + uint32_t count = 1; mi_block_t* tail = head; mi_block_t* next; while ((next = mi_block_next(page,tail)) != NULL && count <= max_count) { @@ -172,7 +176,7 @@ static void _mi_page_thread_free_collect(mi_page_t* page) } // if `count > max_count` there was a memory corruption (possibly infinite list due to double multi-threaded free) if (count > max_count) { - _mi_fatal_error("corrupted thread-free list\n"); + _mi_error_message(EFAULT, "corrupted thread-free list\n"); return; // the thread-free items cannot be freed } @@ -181,7 +185,6 @@ static void _mi_page_thread_free_collect(mi_page_t* page) page->local_free = head; // update counts now - mi_atomic_subu(&page->thread_freed, count); page->used -= count; } @@ -189,7 +192,7 @@ void _mi_page_free_collect(mi_page_t* page, bool force) { mi_assert_internal(page!=NULL); // collect the thread free list - if (force || mi_tf_block(page->thread_free) != NULL) { // quick test to avoid an atomic operation + if (force || mi_page_thread_free(page) != NULL) { // quick test to avoid an atomic operation _mi_page_thread_free_collect(page); } @@ -227,10 +230,12 @@ void _mi_page_free_collect(mi_page_t* page, bool force) { // called from segments when reclaiming abandoned pages void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { mi_assert_expensive(mi_page_is_valid_init(page)); - mi_assert_internal(page->heap == NULL); + mi_assert_internal(mi_page_heap(page) == heap); + mi_assert_internal(mi_page_thread_free_flag(page) != MI_NEVER_DELAYED_FREE); mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE); - _mi_page_free_collect(page,false); - mi_page_queue_t* pq = mi_page_queue(heap, page->block_size); + mi_assert_internal(!page->is_reset); + // TODO: push on full queue immediately if it is full? + mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page)); mi_page_queue_push(heap, pq, page); mi_assert_expensive(_mi_page_is_valid(page)); } @@ -238,11 +243,16 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { // allocate a fresh page from a segment static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size_t block_size) { mi_assert_internal(pq==NULL||mi_heap_contains_queue(heap, pq)); - mi_page_t* page = _mi_segment_page_alloc(block_size, &heap->tld->segments, &heap->tld->os); - if (page == NULL) return NULL; + mi_assert_internal(pq==NULL||block_size == pq->block_size); + mi_page_t* page = _mi_segment_page_alloc(heap, block_size, &heap->tld->segments, &heap->tld->os); + if (page == NULL) { + // this may be out-of-memory, or an abandoned page was reclaimed (and in our queue) + return NULL; + } + // a fresh page was found, initialize it mi_assert_internal(pq==NULL || _mi_page_segment(page)->page_kind != MI_PAGE_HUGE); - mi_page_init(heap, page, block_size, &heap->tld->stats); - _mi_stat_increase( &heap->tld->stats.pages, 1); + mi_page_init(heap, page, block_size, heap->tld); + _mi_stat_increase(&heap->tld->stats.pages, 1); if (pq!=NULL) mi_page_queue_push(heap, pq, page); // huge pages use pq==NULL mi_assert_expensive(_mi_page_is_valid(page)); return page; @@ -251,22 +261,10 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size // Get a fresh page to use static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) { mi_assert_internal(mi_heap_contains_queue(heap, pq)); - - // try to reclaim an abandoned page first - mi_page_t* page = pq->first; - if (!heap->no_reclaim && - _mi_segment_try_reclaim_abandoned(heap, false, &heap->tld->segments) && - page != pq->first) - { - // we reclaimed, and we got lucky with a reclaimed page in our queue - page = pq->first; - if (page->free != NULL) return page; - } - // otherwise allocate the page - page = mi_page_fresh_alloc(heap, pq, pq->block_size); + mi_page_t* page = mi_page_fresh_alloc(heap, pq, pq->block_size); if (page==NULL) return NULL; - mi_assert_internal(pq->block_size==page->block_size); - mi_assert_internal(pq==mi_page_queue(heap,page->block_size)); + mi_assert_internal(pq->block_size==mi_page_block_size(page)); + mi_assert_internal(pq==mi_page_queue(heap, mi_page_block_size(page))); return page; } @@ -275,25 +273,24 @@ static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) { (put there by other threads if they deallocated in a full page) ----------------------------------------------------------- */ void _mi_heap_delayed_free(mi_heap_t* heap) { - // take over the list + // take over the list (note: no atomic exchange is it is often NULL) mi_block_t* block; do { - block = (mi_block_t*)heap->thread_delayed_free; - } while (block != NULL && !mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), NULL, block)); + block = mi_atomic_read_ptr_relaxed(mi_block_t,&heap->thread_delayed_free); + } while (block != NULL && !mi_atomic_cas_ptr_weak(mi_block_t,&heap->thread_delayed_free, NULL, block)); // and free them all while(block != NULL) { - mi_block_t* next = mi_block_nextx(heap,block, heap->cookie); + mi_block_t* next = mi_block_nextx(heap,block, heap->key[0], heap->key[1]); // use internal free instead of regular one to keep stats etc correct if (!_mi_free_delayed_block(block)) { // we might already start delayed freeing while another thread has not yet // reset the delayed_freeing flag; in that case delay it further by reinserting. mi_block_t* dfree; do { - dfree = (mi_block_t*)heap->thread_delayed_free; - mi_block_set_nextx(heap, block, dfree, heap->cookie); - } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), block, dfree)); - + dfree = mi_atomic_read_ptr_relaxed(mi_block_t,&heap->thread_delayed_free); + mi_block_set_nextx(heap, block, dfree, heap->key[0], heap->key[1]); + } while (!mi_atomic_cas_ptr_weak(mi_block_t,&heap->thread_delayed_free, block, dfree)); } block = next; } @@ -308,11 +305,9 @@ void _mi_page_unfull(mi_page_t* page) { mi_assert_internal(page != NULL); mi_assert_expensive(_mi_page_is_valid(page)); mi_assert_internal(mi_page_is_in_full(page)); - - _mi_page_use_delayed_free(page, MI_NO_DELAYED_FREE); if (!mi_page_is_in_full(page)) return; - mi_heap_t* heap = page->heap; + mi_heap_t* heap = mi_page_heap(page); mi_page_queue_t* pqfull = &heap->pages[MI_BIN_FULL]; mi_page_set_in_full(page, false); // to get the right queue mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page); @@ -325,10 +320,8 @@ static void mi_page_to_full(mi_page_t* page, mi_page_queue_t* pq) { mi_assert_internal(!mi_page_immediate_available(page)); mi_assert_internal(!mi_page_is_in_full(page)); - _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE); if (mi_page_is_in_full(page)) return; - - mi_page_queue_enqueue_from(&page->heap->pages[MI_BIN_FULL], pq, page); + mi_page_queue_enqueue_from(&mi_page_heap(page)->pages[MI_BIN_FULL], pq, page); _mi_page_free_collect(page,false); // try to collect right away in case another thread freed just before MI_USE_DELAYED_FREE was set } @@ -341,28 +334,27 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) { mi_assert_internal(page != NULL); mi_assert_expensive(_mi_page_is_valid(page)); mi_assert_internal(pq == mi_page_queue_of(page)); - mi_assert_internal(page->heap != NULL); + mi_assert_internal(mi_page_heap(page) != NULL); -#if MI_DEBUG > 1 - mi_heap_t* pheap = (mi_heap_t*)mi_atomic_read_ptr(mi_atomic_cast(void*, &page->heap)); -#endif + mi_heap_t* pheap = mi_page_heap(page); // remove from our page list - mi_segments_tld_t* segments_tld = &page->heap->tld->segments; + mi_segments_tld_t* segments_tld = &pheap->tld->segments; mi_page_queue_remove(pq, page); // page is no longer associated with our heap - mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL); + mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE); + mi_page_set_heap(page, NULL); #if MI_DEBUG>1 // check there are no references left.. - for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->cookie)) { + for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->key[0], pheap->key[1])) { mi_assert_internal(_mi_ptr_page(block) != page); } #endif // and abandon it - mi_assert_internal(page->heap == NULL); + mi_assert_internal(mi_page_heap(page) == NULL); _mi_segment_page_abandon(page,segments_tld); } @@ -373,36 +365,23 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { mi_assert_expensive(_mi_page_is_valid(page)); mi_assert_internal(pq == mi_page_queue_of(page)); mi_assert_internal(mi_page_all_free(page)); - #if MI_DEBUG>1 - // check if we can safely free - mi_thread_free_t free = mi_tf_set_delayed(page->thread_free,MI_NEVER_DELAYED_FREE); - free = mi_atomic_exchange(&page->thread_free, free); - mi_assert_internal(mi_tf_delayed(free) != MI_DELAYED_FREEING); - #endif + mi_assert_internal(mi_page_thread_free_flag(page)!=MI_DELAYED_FREEING); + // no more aligned blocks in here mi_page_set_has_aligned(page, false); - // account for huge pages here - // (note: no longer necessary as huge pages are always abandoned) - if (page->block_size > MI_LARGE_OBJ_SIZE_MAX) { - if (page->block_size > MI_HUGE_OBJ_SIZE_MAX) { - _mi_stat_decrease(&page->heap->tld->stats.giant, page->block_size); - } - else { - _mi_stat_decrease(&page->heap->tld->stats.huge, page->block_size); - } - } - // remove from the page list // (no need to do _mi_heap_delayed_free first as all blocks are already free) - mi_segments_tld_t* segments_tld = &page->heap->tld->segments; + mi_segments_tld_t* segments_tld = &mi_page_heap(page)->tld->segments; mi_page_queue_remove(pq, page); // and free it - mi_assert_internal(page->heap == NULL); + mi_page_set_heap(page,NULL); _mi_segment_page_free(page, force, segments_tld); } +#define MI_MAX_RETIRE_SIZE (4*MI_SMALL_SIZE_MAX) + // Retire a page with no more used blocks // Important to not retire too quickly though as new // allocations might coming. @@ -420,20 +399,40 @@ void _mi_page_retire(mi_page_t* page) { // (or we end up retiring and re-allocating most of the time) // NOTE: refine this more: we should not retire if this // is the only page left with free blocks. It is not clear - // how to check this efficiently though... + // how to check this efficiently though... // for now, we don't retire if it is the only page left of this size class. mi_page_queue_t* pq = mi_page_queue_of(page); - if (mi_likely(page->block_size <= (MI_SMALL_SIZE_MAX/4))) { - // if (mi_page_mostly_used(page->prev) && mi_page_mostly_used(page->next)) { - if (pq->last==page && pq->first==page) { + if (mi_likely(page->xblock_size <= MI_MAX_RETIRE_SIZE && !mi_page_is_in_full(page))) { + if (pq->last==page && pq->first==page) { // the only page in the queue? mi_stat_counter_increase(_mi_stats_main.page_no_retire,1); - return; // dont't retire after all + page->retire_expire = 16; + mi_assert_internal(mi_page_all_free(page)); + return; // dont't free after all } } _mi_page_free(page, pq, false); } +// free retired pages: we don't need to look at the entire queues +// since we only retire pages that are the last one in a queue. +void _mi_heap_collect_retired(mi_heap_t* heap, bool force) { + for(mi_page_queue_t* pq = heap->pages; pq->block_size <= MI_MAX_RETIRE_SIZE; pq++) { + mi_page_t* page = pq->first; + if (page != NULL && page->retire_expire != 0) { + if (mi_page_all_free(page)) { + page->retire_expire--; + if (force || page->retire_expire == 0) { + _mi_page_free(pq->first, pq, force); + } + } + else { + page->retire_expire = 0; + } + } + } +} + /* ----------------------------------------------------------- Initialize the initial free list in a page. @@ -445,15 +444,15 @@ void _mi_page_retire(mi_page_t* page) { #define MI_MAX_SLICES (1UL << MI_MAX_SLICE_SHIFT) #define MI_MIN_SLICES (2) -static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* const page, const size_t extend, mi_stats_t* const stats) { +static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* const page, const size_t bsize, const size_t extend, mi_stats_t* const stats) { UNUSED(stats); #if (MI_SECURE<=2) mi_assert_internal(page->free == NULL); mi_assert_internal(page->local_free == NULL); #endif mi_assert_internal(page->capacity + extend <= page->reserved); + mi_assert_internal(bsize == mi_page_block_size(page)); void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL); - const size_t bsize = page->block_size; // initialize a randomized free list // set up `slice_count` slices to alternate between @@ -467,18 +466,19 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co mi_block_t* blocks[MI_MAX_SLICES]; // current start of the slice size_t counts[MI_MAX_SLICES]; // available objects in the slice for (size_t i = 0; i < slice_count; i++) { - blocks[i] = mi_page_block_at(page, page_area, page->capacity + i*slice_extend); + blocks[i] = mi_page_block_at(page, page_area, bsize, page->capacity + i*slice_extend); counts[i] = slice_extend; } counts[slice_count-1] += (extend % slice_count); // final slice holds the modulus too (todo: distribute evenly?) // and initialize the free list by randomly threading through them // set up first element - size_t current = _mi_heap_random(heap) % slice_count; + const uintptr_t r = _mi_heap_random_next(heap); + size_t current = r % slice_count; counts[current]--; mi_block_t* const free_start = blocks[current]; - // and iterate through the rest - uintptr_t rnd = heap->random; + // and iterate through the rest; use `random_shuffle` for performance + uintptr_t rnd = _mi_random_shuffle(r|1); // ensure not 0 for (size_t i = 1; i < extend; i++) { // call random_shuffle only every INTPTR_SIZE rounds const size_t round = i%MI_INTPTR_SIZE; @@ -499,10 +499,9 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co // prepend to the free list (usually NULL) mi_block_set_next(page, blocks[current], page->free); // end of the list page->free = free_start; - heap->random = _mi_random_shuffle(rnd); } -static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t extend, mi_stats_t* const stats) +static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t bsize, const size_t extend, mi_stats_t* const stats) { UNUSED(stats); #if (MI_SECURE <= 2) @@ -510,18 +509,19 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co mi_assert_internal(page->local_free == NULL); #endif mi_assert_internal(page->capacity + extend <= page->reserved); + mi_assert_internal(bsize == mi_page_block_size(page)); void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL ); - const size_t bsize = page->block_size; - mi_block_t* const start = mi_page_block_at(page, page_area, page->capacity); - + + mi_block_t* const start = mi_page_block_at(page, page_area, bsize, page->capacity); + // initialize a sequential free list - mi_block_t* const last = mi_page_block_at(page, page_area, page->capacity + extend - 1); + mi_block_t* const last = mi_page_block_at(page, page_area, bsize, page->capacity + extend - 1); mi_block_t* block = start; while(block <= last) { mi_block_t* next = (mi_block_t*)((uint8_t*)block + bsize); mi_block_set_next(page,block,next); block = next; - } + } // prepend to free list (usually `NULL`) mi_block_set_next(page, last, page->free); page->free = start; @@ -543,8 +543,7 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co // Note: we also experimented with "bump" allocation on the first // allocations but this did not speed up any benchmark (due to an // extra test in malloc? or cache effects?) -static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* stats) { - UNUSED(stats); +static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) { mi_assert_expensive(mi_page_is_valid_init(page)); #if (MI_SECURE<=2) mi_assert(page->free == NULL); @@ -554,12 +553,13 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* st if (page->capacity >= page->reserved) return; size_t page_size; - _mi_page_start(_mi_page_segment(page), page, &page_size); - mi_stat_counter_increase(stats->pages_extended, 1); + uint8_t* page_start = _mi_page_start(_mi_page_segment(page), page, &page_size); + mi_stat_counter_increase(tld->stats.pages_extended, 1); // calculate the extend count + const size_t bsize = (page->xblock_size < MI_HUGE_BLOCK_SIZE ? page->xblock_size : page_size); size_t extend = page->reserved - page->capacity; - size_t max_extend = (page->block_size >= MI_MAX_EXTEND_SIZE ? MI_MIN_EXTEND : MI_MAX_EXTEND_SIZE/(uint32_t)page->block_size); + size_t max_extend = (bsize >= MI_MAX_EXTEND_SIZE ? MI_MIN_EXTEND : MI_MAX_EXTEND_SIZE/(uint32_t)bsize); if (max_extend < MI_MIN_EXTEND) max_extend = MI_MIN_EXTEND; if (extend > max_extend) { @@ -571,16 +571,22 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* st mi_assert_internal(extend > 0 && extend + page->capacity <= page->reserved); mi_assert_internal(extend < (1UL<<16)); + // commit on-demand for large and huge pages? + if (_mi_page_segment(page)->page_kind >= MI_PAGE_LARGE && !mi_option_is_enabled(mi_option_eager_page_commit)) { + uint8_t* start = page_start + (page->capacity * bsize); + _mi_mem_commit(start, extend * bsize, NULL, &tld->os); + } + // and append the extend the free list if (extend < MI_MIN_SLICES || MI_SECURE==0) { //!mi_option_is_enabled(mi_option_secure)) { - mi_page_free_list_extend(page, extend, stats ); + mi_page_free_list_extend(page, bsize, extend, &tld->stats ); } else { - mi_page_free_list_extend_secure(heap, page, extend, stats); + mi_page_free_list_extend_secure(heap, page, bsize, extend, &tld->stats); } // enable the new free list page->capacity += (uint16_t)extend; - mi_stat_increase(stats->page_committed, extend * page->block_size); + mi_stat_increase(tld->stats.page_committed, extend * bsize); // extension into zero initialized memory preserves the zero'd free list if (!page->is_zero_init) { @@ -590,37 +596,40 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* st } // Initialize a fresh page -static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi_stats_t* stats) { +static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi_tld_t* tld) { mi_assert(page != NULL); mi_segment_t* segment = _mi_page_segment(page); mi_assert(segment != NULL); mi_assert_internal(block_size > 0); // set fields + mi_page_set_heap(page, heap); size_t page_size; - _mi_segment_page_start(segment, page, block_size, &page_size); - page->block_size = block_size; + _mi_segment_page_start(segment, page, block_size, &page_size, NULL); + page->xblock_size = (block_size < MI_HUGE_BLOCK_SIZE ? (uint32_t)block_size : MI_HUGE_BLOCK_SIZE); mi_assert_internal(page_size / block_size < (1L<<16)); page->reserved = (uint16_t)(page_size / block_size); #ifdef MI_ENCODE_FREELIST - page->cookie = _mi_heap_random(heap) | 1; + page->key[0] = _mi_heap_random_next(heap); + page->key[1] = _mi_heap_random_next(heap); #endif page->is_zero = page->is_zero_init; mi_assert_internal(page->capacity == 0); mi_assert_internal(page->free == NULL); mi_assert_internal(page->used == 0); - mi_assert_internal(page->thread_free == 0); - mi_assert_internal(page->thread_freed == 0); + mi_assert_internal(page->xthread_free == 0); mi_assert_internal(page->next == NULL); mi_assert_internal(page->prev == NULL); + mi_assert_internal(page->retire_expire == 0); mi_assert_internal(!mi_page_has_aligned(page)); #if (MI_ENCODE_FREELIST) - mi_assert_internal(page->cookie != 0); + mi_assert_internal(page->key[0] != 0); + mi_assert_internal(page->key[1] != 0); #endif mi_assert_expensive(mi_page_is_valid_init(page)); // initialize an initial free list - mi_page_extend_free(heap,page,stats); + mi_page_extend_free(heap,page,tld); mi_assert(mi_page_immediate_available(page)); } @@ -630,42 +639,27 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi -------------------------------------------------------------*/ // Find a page with free blocks of `page->block_size`. -static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq) +static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq, bool first_try) { // search through the pages in "next fit" order - mi_page_t* rpage = NULL; size_t count = 0; - size_t page_free_count = 0; mi_page_t* page = pq->first; - while( page != NULL) + while (page != NULL) { mi_page_t* next = page->next; // remember next count++; // 0. collect freed blocks by us and other threads - _mi_page_free_collect(page,false); + _mi_page_free_collect(page, false); // 1. if the page contains free blocks, we are done if (mi_page_immediate_available(page)) { - // If all blocks are free, we might retire this page instead. - // do this at most 8 times to bound allocation time. - // (note: this can happen if a page was earlier not retired due - // to having neighbours that were mostly full or due to concurrent frees) - if (page_free_count < 8 && mi_page_all_free(page)) { - page_free_count++; - if (rpage != NULL) _mi_page_free(rpage,pq,false); - rpage = page; - page = next; - continue; // and keep looking - } - else { - break; // pick this one - } + break; // pick this one } // 2. Try to extend if (page->capacity < page->reserved) { - mi_page_extend_free(heap, page, &heap->tld->stats); + mi_page_extend_free(heap, page, heap->tld); mi_assert_internal(mi_page_immediate_available(page)); break; } @@ -673,50 +667,50 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p // 3. If the page is completely full, move it to the `mi_pages_full` // queue so we don't visit long-lived pages too often. mi_assert_internal(!mi_page_is_in_full(page) && !mi_page_immediate_available(page)); - mi_page_to_full(page,pq); + mi_page_to_full(page, pq); page = next; } // for each page - mi_stat_counter_increase(heap->tld->stats.searches,count); - - if (page == NULL) { - page = rpage; - rpage = NULL; - } - if (rpage != NULL) { - _mi_page_free(rpage,pq,false); - } + mi_stat_counter_increase(heap->tld->stats.searches, count); if (page == NULL) { + _mi_heap_collect_retired(heap, false); // perhaps make a page available page = mi_page_fresh(heap, pq); + if (page == NULL && first_try) { + // out-of-memory _or_ an abandoned page with free blocks was reclaimed, try once again + page = mi_page_queue_find_free_ex(heap, pq, false); + } } else { mi_assert(pq->first == page); + page->retire_expire = 0; } mi_assert_internal(page == NULL || mi_page_immediate_available(page)); return page; } + // Find a page with free blocks of `size`. static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) { mi_page_queue_t* pq = mi_page_queue(heap,size); mi_page_t* page = pq->first; if (page != NULL) { - if ((MI_SECURE >= 3) && page->capacity < page->reserved && ((_mi_heap_random(heap) & 1) == 1)) { + if ((MI_SECURE >= 3) && page->capacity < page->reserved && ((_mi_heap_random_next(heap) & 1) == 1)) { // in secure mode, we extend half the time to increase randomness - mi_page_extend_free(heap, page, &heap->tld->stats); + mi_page_extend_free(heap, page, heap->tld); mi_assert_internal(mi_page_immediate_available(page)); } else { _mi_page_free_collect(page,false); } if (mi_page_immediate_available(page)) { + page->retire_expire = 0; return page; // fast path } } - return mi_page_queue_find_free_ex(heap, pq); + return mi_page_queue_find_free_ex(heap, pq, true); } @@ -728,18 +722,20 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) { ----------------------------------------------------------- */ static mi_deferred_free_fun* volatile deferred_free = NULL; +static volatile _Atomic(void*) deferred_arg; // = NULL void _mi_deferred_free(mi_heap_t* heap, bool force) { heap->tld->heartbeat++; if (deferred_free != NULL && !heap->tld->recurse) { heap->tld->recurse = true; - deferred_free(force, heap->tld->heartbeat); + deferred_free(force, heap->tld->heartbeat, mi_atomic_read_ptr_relaxed(void,&deferred_arg)); heap->tld->recurse = false; } } -void mi_register_deferred_free(mi_deferred_free_fun* fn) mi_attr_noexcept { +void mi_register_deferred_free(mi_deferred_free_fun* fn, void* arg) mi_attr_noexcept { deferred_free = fn; + mi_atomic_write_ptr(void,&deferred_arg, arg); } @@ -753,17 +749,18 @@ void mi_register_deferred_free(mi_deferred_free_fun* fn) mi_attr_noexcept { // that frees the block can free the whole page and segment directly. static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) { size_t block_size = _mi_os_good_alloc_size(size); - mi_assert_internal(_mi_bin(block_size) == MI_BIN_HUGE); + mi_assert_internal(_mi_bin(block_size) == MI_BIN_HUGE); mi_page_t* page = mi_page_fresh_alloc(heap,NULL,block_size); if (page != NULL) { + const size_t bsize = mi_page_block_size(page); mi_assert_internal(mi_page_immediate_available(page)); - mi_assert_internal(page->block_size == block_size); + mi_assert_internal(bsize >= size); mi_assert_internal(_mi_page_segment(page)->page_kind==MI_PAGE_HUGE); mi_assert_internal(_mi_page_segment(page)->used==1); mi_assert_internal(_mi_page_segment(page)->thread_id==0); // abandoned, not in the huge queue - mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL); + mi_page_set_heap(page, NULL); - if (page->block_size > MI_HUGE_OBJ_SIZE_MAX) { + if (bsize > MI_HUGE_OBJ_SIZE_MAX) { _mi_stat_increase(&heap->tld->stats.giant, block_size); _mi_stat_counter_increase(&heap->tld->stats.giant_count, 1); } @@ -771,7 +768,7 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) { _mi_stat_increase(&heap->tld->stats.huge, block_size); _mi_stat_counter_increase(&heap->tld->stats.huge_count, 1); } - } + } return page; } @@ -798,7 +795,8 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_page_t* page; if (mi_unlikely(size > MI_LARGE_OBJ_SIZE_MAX)) { if (mi_unlikely(size > PTRDIFF_MAX)) { // we don't allocate more than PTRDIFF_MAX (see ) - page = NULL; + _mi_error_message(EOVERFLOW, "allocation request is too large (%zu b requested)\n", size); + return NULL; } else { page = mi_huge_page_alloc(heap,size); @@ -808,10 +806,13 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept // otherwise find a page with free blocks in our size segregated queues page = mi_find_free_page(heap,size); } - if (page == NULL) return NULL; // out of memory + if (mi_unlikely(page == NULL)) { // out of memory + _mi_error_message(ENOMEM, "cannot allocate memory (%zu bytes requested)\n", size); + return NULL; + } mi_assert_internal(mi_page_immediate_available(page)); - mi_assert_internal(page->block_size >= size); + mi_assert_internal(mi_page_block_size(page) >= size); // and try again, this time succeeding! (i.e. this should never recurse) return _mi_page_malloc(heap, page, size); diff --git a/src/random.c b/src/random.c new file mode 100644 index 00000000..6fef2434 --- /dev/null +++ b/src/random.c @@ -0,0 +1,328 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2019, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#include "mimalloc.h" +#include "mimalloc-internal.h" + +#include // memset + +/* ---------------------------------------------------------------------------- +We use our own PRNG to keep predictable performance of random number generation +and to avoid implementations that use a lock. We only use the OS provided +random source to initialize the initial seeds. Since we do not need ultimate +performance but we do rely on the security (for secret cookies in secure mode) +we use a cryptographically secure generator (chacha20). +-----------------------------------------------------------------------------*/ + +#define MI_CHACHA_ROUNDS (20) // perhaps use 12 for better performance? + + +/* ---------------------------------------------------------------------------- +Chacha20 implementation as the original algorithm with a 64-bit nonce +and counter: https://en.wikipedia.org/wiki/Salsa20 +The input matrix has sixteen 32-bit values: +Position 0 to 3: constant key +Position 4 to 11: the key +Position 12 to 13: the counter. +Position 14 to 15: the nonce. + +The implementation uses regular C code which compiles very well on modern compilers. +(gcc x64 has no register spills, and clang 6+ uses SSE instructions) +-----------------------------------------------------------------------------*/ + +static inline uint32_t rotl(uint32_t x, uint32_t shift) { + return (x << shift) | (x >> (32 - shift)); +} + +static inline void qround(uint32_t x[16], size_t a, size_t b, size_t c, size_t d) { + x[a] += x[b]; x[d] = rotl(x[d] ^ x[a], 16); + x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 12); + x[a] += x[b]; x[d] = rotl(x[d] ^ x[a], 8); + x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 7); +} + +static void chacha_block(mi_random_ctx_t* ctx) +{ + // scramble into `x` + uint32_t x[16]; + for (size_t i = 0; i < 16; i++) { + x[i] = ctx->input[i]; + } + for (size_t i = 0; i < MI_CHACHA_ROUNDS; i += 2) { + qround(x, 0, 4, 8, 12); + qround(x, 1, 5, 9, 13); + qround(x, 2, 6, 10, 14); + qround(x, 3, 7, 11, 15); + qround(x, 0, 5, 10, 15); + qround(x, 1, 6, 11, 12); + qround(x, 2, 7, 8, 13); + qround(x, 3, 4, 9, 14); + } + + // add scrambled data to the initial state + for (size_t i = 0; i < 16; i++) { + ctx->output[i] = x[i] + ctx->input[i]; + } + ctx->output_available = 16; + + // increment the counter for the next round + ctx->input[12] += 1; + if (ctx->input[12] == 0) { + ctx->input[13] += 1; + if (ctx->input[13] == 0) { // and keep increasing into the nonce + ctx->input[14] += 1; + } + } +} + +static uint32_t chacha_next32(mi_random_ctx_t* ctx) { + if (ctx->output_available <= 0) { + chacha_block(ctx); + ctx->output_available = 16; // (assign again to suppress static analysis warning) + } + const uint32_t x = ctx->output[16 - ctx->output_available]; + ctx->output[16 - ctx->output_available] = 0; // reset once the data is handed out + ctx->output_available--; + return x; +} + +static inline uint32_t read32(const uint8_t* p, size_t idx32) { + const size_t i = 4*idx32; + return ((uint32_t)p[i+0] | (uint32_t)p[i+1] << 8 | (uint32_t)p[i+2] << 16 | (uint32_t)p[i+3] << 24); +} + +static void chacha_init(mi_random_ctx_t* ctx, const uint8_t key[32], uint64_t nonce) +{ + // since we only use chacha for randomness (and not encryption) we + // do not _need_ to read 32-bit values as little endian but we do anyways + // just for being compatible :-) + memset(ctx, 0, sizeof(*ctx)); + for (size_t i = 0; i < 4; i++) { + const uint8_t* sigma = (uint8_t*)"expand 32-byte k"; + ctx->input[i] = read32(sigma,i); + } + for (size_t i = 0; i < 8; i++) { + ctx->input[i + 4] = read32(key,i); + } + ctx->input[12] = 0; + ctx->input[13] = 0; + ctx->input[14] = (uint32_t)nonce; + ctx->input[15] = (uint32_t)(nonce >> 32); +} + +static void chacha_split(mi_random_ctx_t* ctx, uint64_t nonce, mi_random_ctx_t* ctx_new) { + memset(ctx_new, 0, sizeof(*ctx_new)); + memcpy(ctx_new->input, ctx->input, sizeof(ctx_new->input)); + ctx_new->input[12] = 0; + ctx_new->input[13] = 0; + ctx_new->input[14] = (uint32_t)nonce; + ctx_new->input[15] = (uint32_t)(nonce >> 32); + mi_assert_internal(ctx->input[14] != ctx_new->input[14] || ctx->input[15] != ctx_new->input[15]); // do not reuse nonces! + chacha_block(ctx_new); +} + + +/* ---------------------------------------------------------------------------- +Random interface +-----------------------------------------------------------------------------*/ + +#if MI_DEBUG>1 +static bool mi_random_is_initialized(mi_random_ctx_t* ctx) { + return (ctx != NULL && ctx->input[0] != 0); +} +#endif + +void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* ctx_new) { + mi_assert_internal(mi_random_is_initialized(ctx)); + mi_assert_internal(ctx != ctx_new); + chacha_split(ctx, (uintptr_t)ctx_new /*nonce*/, ctx_new); +} + +uintptr_t _mi_random_next(mi_random_ctx_t* ctx) { + mi_assert_internal(mi_random_is_initialized(ctx)); + #if MI_INTPTR_SIZE <= 4 + return chacha_next32(ctx); + #elif MI_INTPTR_SIZE == 8 + return (((uintptr_t)chacha_next32(ctx) << 32) | chacha_next32(ctx)); + #else + # error "define mi_random_next for this platform" + #endif +} + + +/* ---------------------------------------------------------------------------- +To initialize a fresh random context we rely on the OS: +- Windows : BCryptGenRandom +- osX,bsd,wasi: arc4random_buf +- Linux : getrandom,/dev/urandom +If we cannot get good randomness, we fall back to weak randomness based on a timer and ASLR. +-----------------------------------------------------------------------------*/ + +#if defined(_WIN32) +#pragma comment (lib,"bcrypt.lib") +#include +static bool os_random_buf(void* buf, size_t buf_len) { + return (BCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0); +} +/* +#define SystemFunction036 NTAPI SystemFunction036 +#include +#undef SystemFunction036 +static bool os_random_buf(void* buf, size_t buf_len) { + RtlGenRandom(buf, (ULONG)buf_len); + return true; +} +*/ +#elif defined(ANDROID) || defined(XP_DARWIN) || defined(__APPLE__) || defined(__DragonFly__) || \ + defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ + defined(__wasi__) +#include +static bool os_random_buf(void* buf, size_t buf_len) { + arc4random_buf(buf, buf_len); + return true; +} +#elif defined(__linux__) +#include +#include +#include +#include +#include +#include +static bool os_random_buf(void* buf, size_t buf_len) { + // Modern Linux provides `getrandom` but different distributions either use `sys/random.h` or `linux/random.h` + // and for the latter the actual `getrandom` call is not always defined. + // (see ) + // We therefore use a syscall directly and fall back dynamically to /dev/urandom when needed. +#ifdef SYS_getrandom + #ifndef GRND_NONBLOCK + #define GRND_NONBLOCK (1) + #endif + static volatile _Atomic(uintptr_t) no_getrandom; // = 0 + if (mi_atomic_read(&no_getrandom)==0) { + ssize_t ret = syscall(SYS_getrandom, buf, buf_len, GRND_NONBLOCK); + if (ret >= 0) return (buf_len == (size_t)ret); + if (ret != ENOSYS) return false; + mi_atomic_write(&no_getrandom,1); // don't call again, and fall back to /dev/urandom + } +#endif + int flags = O_RDONLY; + #if defined(O_CLOEXEC) + flags |= O_CLOEXEC; + #endif + int fd = open("/dev/urandom", flags, 0); + if (fd < 0) return false; + size_t count = 0; + while(count < buf_len) { + ssize_t ret = read(fd, (char*)buf + count, buf_len - count); + if (ret<=0) { + if (errno!=EAGAIN && errno!=EINTR) break; + } + else { + count += ret; + } + } + close(fd); + return (count==buf_len); +} +#else +static bool os_random_buf(void* buf, size_t buf_len) { + return false; +} +#endif + +#if defined(_WIN32) +#include +#elif defined(__APPLE__) +#include +#else +#include +#endif + +static uintptr_t os_random_weak(uintptr_t extra_seed) { + uintptr_t x = (uintptr_t)&os_random_weak ^ extra_seed; // ASLR makes the address random + #if defined(_WIN32) + LARGE_INTEGER pcount; + QueryPerformanceCounter(&pcount); + x ^= (uintptr_t)(pcount.QuadPart); + #elif defined(__APPLE__) + x ^= (uintptr_t)mach_absolute_time(); + #else + struct timespec time; + clock_gettime(CLOCK_MONOTONIC, &time); + x ^= (uintptr_t)time.tv_sec; + x ^= (uintptr_t)time.tv_nsec; + #endif + // and do a few randomization steps + uintptr_t max = ((x ^ (x >> 17)) & 0x0F) + 1; + for (uintptr_t i = 0; i < max; i++) { + x = _mi_random_shuffle(x); + } + mi_assert_internal(x != 0); + return x; +} + +void _mi_random_init(mi_random_ctx_t* ctx) { + uint8_t key[32]; + if (!os_random_buf(key, sizeof(key))) { + // if we fail to get random data from the OS, we fall back to a + // weak random source based on the current time + _mi_warning_message("unable to use secure randomness\n"); + uintptr_t x = os_random_weak(0); + for (size_t i = 0; i < 8; i++) { // key is eight 32-bit words. + x = _mi_random_shuffle(x); + ((uint32_t*)key)[i] = (uint32_t)x; + } + } + chacha_init(ctx, key, (uintptr_t)ctx /*nonce*/ ); +} + +/* -------------------------------------------------------- +test vectors from +----------------------------------------------------------- */ +/* +static bool array_equals(uint32_t* x, uint32_t* y, size_t n) { + for (size_t i = 0; i < n; i++) { + if (x[i] != y[i]) return false; + } + return true; +} +static void chacha_test(void) +{ + uint32_t x[4] = { 0x11111111, 0x01020304, 0x9b8d6f43, 0x01234567 }; + uint32_t x_out[4] = { 0xea2a92f4, 0xcb1cf8ce, 0x4581472e, 0x5881c4bb }; + qround(x, 0, 1, 2, 3); + mi_assert_internal(array_equals(x, x_out, 4)); + + uint32_t y[16] = { + 0x879531e0, 0xc5ecf37d, 0x516461b1, 0xc9a62f8a, + 0x44c20ef3, 0x3390af7f, 0xd9fc690b, 0x2a5f714c, + 0x53372767, 0xb00a5631, 0x974c541a, 0x359e9963, + 0x5c971061, 0x3d631689, 0x2098d9d6, 0x91dbd320 }; + uint32_t y_out[16] = { + 0x879531e0, 0xc5ecf37d, 0xbdb886dc, 0xc9a62f8a, + 0x44c20ef3, 0x3390af7f, 0xd9fc690b, 0xcfacafd2, + 0xe46bea80, 0xb00a5631, 0x974c541a, 0x359e9963, + 0x5c971061, 0xccc07c79, 0x2098d9d6, 0x91dbd320 }; + qround(y, 2, 7, 8, 13); + mi_assert_internal(array_equals(y, y_out, 16)); + + mi_random_ctx_t r = { + { 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574, + 0x03020100, 0x07060504, 0x0b0a0908, 0x0f0e0d0c, + 0x13121110, 0x17161514, 0x1b1a1918, 0x1f1e1d1c, + 0x00000001, 0x09000000, 0x4a000000, 0x00000000 }, + {0}, + 0 + }; + uint32_t r_out[16] = { + 0xe4e7f110, 0x15593bd1, 0x1fdd0f50, 0xc47120a3, + 0xc7f4d1c7, 0x0368c033, 0x9aaa2204, 0x4e6cd4c3, + 0x466482d2, 0x09aa9f07, 0x05d7c214, 0xa2028bd9, + 0xd19c12b5, 0xb94e16de, 0xe883d0cb, 0x4e3c50a2 }; + chacha_block(&r); + mi_assert_internal(array_equals(r.output, r_out, 16)); +} +*/ diff --git a/src/region.c b/src/region.c new file mode 100644 index 00000000..fd7d4544 --- /dev/null +++ b/src/region.c @@ -0,0 +1,496 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2019, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +/* ---------------------------------------------------------------------------- +This implements a layer between the raw OS memory (VirtualAlloc/mmap/sbrk/..) +and the segment and huge object allocation by mimalloc. There may be multiple +implementations of this (one could be the identity going directly to the OS, +another could be a simple cache etc), but the current one uses large "regions". +In contrast to the rest of mimalloc, the "regions" are shared between threads and +need to be accessed using atomic operations. +We need this memory layer between the raw OS calls because of: +1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order + to reuse memory effectively. +2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of + an OS allocation/free is still (much) too expensive relative to the accesses + in that object :-( (`malloc-large` tests this). This means we need a cheaper + way to reuse memory. +3. This layer allows for NUMA aware allocation. + +Possible issues: +- (2) can potentially be addressed too with a small cache per thread which is much + simpler. Generally though that requires shrinking of huge pages, and may overuse + memory per thread. (and is not compatible with `sbrk`). +- Since the current regions are per-process, we need atomic operations to + claim blocks which may be contended +- In the worst case, we need to search the whole region map (16KiB for 256GiB) + linearly. At what point will direct OS calls be faster? Is there a way to + do this better without adding too much complexity? +-----------------------------------------------------------------------------*/ +#include "mimalloc.h" +#include "mimalloc-internal.h" +#include "mimalloc-atomic.h" + +#include // memset + +#include "bitmap.inc.c" + +// Internal raw OS interface +size_t _mi_os_large_page_size(); +bool _mi_os_protect(void* addr, size_t size); +bool _mi_os_unprotect(void* addr, size_t size); +bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); +bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats); +bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); +bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); + +// arena.c +void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats); +void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld); + + + +// Constants +#if (MI_INTPTR_SIZE==8) +#define MI_HEAP_REGION_MAX_SIZE (256 * GiB) // 64KiB for the region map +#elif (MI_INTPTR_SIZE==4) +#define MI_HEAP_REGION_MAX_SIZE (3 * GiB) // ~ KiB for the region map +#else +#error "define the maximum heap space allowed for regions on this platform" +#endif + +#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE + +#define MI_REGION_MAX_BLOCKS MI_BITMAP_FIELD_BITS +#define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS) // 256MiB (64MiB on 32 bits) +#define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE) // 1024 (48 on 32 bits) +#define MI_REGION_MAX_OBJ_BLOCKS (MI_REGION_MAX_BLOCKS/4) // 64MiB +#define MI_REGION_MAX_OBJ_SIZE (MI_REGION_MAX_OBJ_BLOCKS*MI_SEGMENT_SIZE) + +// Region info +typedef union mi_region_info_u { + uintptr_t value; + struct { + bool valid; // initialized? + bool is_large; // allocated in fixed large/huge OS pages + short numa_node; // the associated NUMA node (where -1 means no associated node) + } x; +} mi_region_info_t; + + +// A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with +// a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block. +typedef struct mem_region_s { + volatile _Atomic(uintptr_t) info; // mi_region_info_t.value + volatile _Atomic(void*) start; // start of the memory area + mi_bitmap_field_t in_use; // bit per in-use block + mi_bitmap_field_t dirty; // track if non-zero per block + mi_bitmap_field_t commit; // track if committed per block + mi_bitmap_field_t reset; // track if reset per block + volatile _Atomic(uintptr_t) arena_memid; // if allocated from a (huge page) arena + uintptr_t padding; // round to 8 fields +} mem_region_t; + +// The region map +static mem_region_t regions[MI_REGION_MAX]; + +// Allocated regions +static volatile _Atomic(uintptr_t) regions_count; // = 0; + + +/* ---------------------------------------------------------------------------- +Utility functions +-----------------------------------------------------------------------------*/ + +// Blocks (of 4MiB) needed for the given size. +static size_t mi_region_block_count(size_t size) { + return _mi_divide_up(size, MI_SEGMENT_SIZE); +} + +/* +// Return a rounded commit/reset size such that we don't fragment large OS pages into small ones. +static size_t mi_good_commit_size(size_t size) { + if (size > (SIZE_MAX - _mi_os_large_page_size())) return size; + return _mi_align_up(size, _mi_os_large_page_size()); +} +*/ + +// Return if a pointer points into a region reserved by us. +bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { + if (p==NULL) return false; + size_t count = mi_atomic_read_relaxed(®ions_count); + for (size_t i = 0; i < count; i++) { + uint8_t* start = mi_atomic_read_ptr_relaxed(uint8_t,®ions[i].start); + if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true; + } + return false; +} + + +static void* mi_region_blocks_start(const mem_region_t* region, mi_bitmap_index_t bit_idx) { + uint8_t* start = mi_atomic_read_ptr(uint8_t,®ion->start); + mi_assert_internal(start != NULL); + return (start + (bit_idx * MI_SEGMENT_SIZE)); +} + +static size_t mi_memid_create(mem_region_t* region, mi_bitmap_index_t bit_idx) { + mi_assert_internal(bit_idx < MI_BITMAP_FIELD_BITS); + size_t idx = region - regions; + mi_assert_internal(®ions[idx] == region); + return (idx*MI_BITMAP_FIELD_BITS + bit_idx)<<1; +} + +static size_t mi_memid_create_from_arena(size_t arena_memid) { + return (arena_memid << 1) | 1; +} + + +static bool mi_memid_is_arena(size_t id, mem_region_t** region, mi_bitmap_index_t* bit_idx, size_t* arena_memid) { + if ((id&1)==1) { + if (arena_memid != NULL) *arena_memid = (id>>1); + return true; + } + else { + size_t idx = (id >> 1) / MI_BITMAP_FIELD_BITS; + *bit_idx = (mi_bitmap_index_t)(id>>1) % MI_BITMAP_FIELD_BITS; + *region = ®ions[idx]; + return false; + } +} + + +/* ---------------------------------------------------------------------------- + Allocate a region is allocated from the OS (or an arena) +-----------------------------------------------------------------------------*/ + +static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) +{ + // not out of regions yet? + if (mi_atomic_read_relaxed(®ions_count) >= MI_REGION_MAX - 1) return false; + + // try to allocate a fresh region from the OS + bool region_commit = (commit && mi_option_is_enabled(mi_option_eager_region_commit)); + bool region_large = (commit && allow_large); + bool is_zero = false; + size_t arena_memid = 0; + void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, ®ion_commit, ®ion_large, &is_zero, &arena_memid, tld); + if (start == NULL) return false; + mi_assert_internal(!(region_large && !allow_large)); + mi_assert_internal(!region_large || region_commit); + + // claim a fresh slot + const uintptr_t idx = mi_atomic_increment(®ions_count); + if (idx >= MI_REGION_MAX) { + mi_atomic_decrement(®ions_count); + _mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats); + _mi_warning_message("maximum regions used: %zu GiB (perhaps recompile with a larger setting for MI_HEAP_REGION_MAX_SIZE)", _mi_divide_up(MI_HEAP_REGION_MAX_SIZE, GiB)); + return false; + } + + // allocated, initialize and claim the initial blocks + mem_region_t* r = ®ions[idx]; + r->arena_memid = arena_memid; + mi_atomic_write(&r->in_use, 0); + mi_atomic_write(&r->dirty, (is_zero ? 0 : MI_BITMAP_FIELD_FULL)); + mi_atomic_write(&r->commit, (region_commit ? MI_BITMAP_FIELD_FULL : 0)); + mi_atomic_write(&r->reset, 0); + *bit_idx = 0; + mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL); + mi_atomic_write_ptr(uint8_t*,&r->start, start); + + // and share it + mi_region_info_t info; + info.x.valid = true; + info.x.is_large = region_large; + info.x.numa_node = (short)_mi_os_numa_node(tld); + mi_atomic_write(&r->info, info.value); // now make it available to others + *region = r; + return true; +} + +/* ---------------------------------------------------------------------------- + Try to claim blocks in suitable regions +-----------------------------------------------------------------------------*/ + +static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool allow_large ) { + // initialized at all? + mi_region_info_t info; + info.value = mi_atomic_read_relaxed(®ion->info); + if (info.value==0) return false; + + // numa correct + if (numa_node >= 0) { // use negative numa node to always succeed + int rnode = info.x.numa_node; + if (rnode >= 0 && rnode != numa_node) return false; + } + + // check allow-large + if (!allow_large && info.x.is_large) return false; + + return true; +} + + +static bool mi_region_try_claim(int numa_node, size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) +{ + // try all regions for a free slot + const size_t count = mi_atomic_read(®ions_count); + size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? Starting at 0 seems to increase latency though + for (size_t visited = 0; visited < count; visited++, idx++) { + if (idx >= count) idx = 0; // wrap around + mem_region_t* r = ®ions[idx]; + // if this region suits our demand (numa node matches, large OS page matches) + if (mi_region_is_suitable(r, numa_node, allow_large)) { + // then try to atomically claim a segment(s) in this region + if (mi_bitmap_try_find_claim_field(&r->in_use, 0, blocks, bit_idx)) { + tld->region_idx = idx; // remember the last found position + *region = r; + return true; + } + } + } + return false; +} + + +static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) +{ + mi_assert_internal(blocks <= MI_BITMAP_FIELD_BITS); + mem_region_t* region; + mi_bitmap_index_t bit_idx; + const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld)); + // try to claim in existing regions + if (!mi_region_try_claim(numa_node, blocks, *is_large, ®ion, &bit_idx, tld)) { + // otherwise try to allocate a fresh region and claim in there + if (!mi_region_try_alloc_os(blocks, *commit, *is_large, ®ion, &bit_idx, tld)) { + // out of regions or memory + return NULL; + } + } + + // ------------------------------------------------ + // found a region and claimed `blocks` at `bit_idx`, initialize them now + mi_assert_internal(region != NULL); + mi_assert_internal(mi_bitmap_is_claimed(®ion->in_use, 1, blocks, bit_idx)); + + mi_region_info_t info; + info.value = mi_atomic_read(®ion->info); + uint8_t* start = mi_atomic_read_ptr(uint8_t,®ion->start); + mi_assert_internal(!(info.x.is_large && !*is_large)); + mi_assert_internal(start != NULL); + + *is_zero = mi_bitmap_claim(®ion->dirty, 1, blocks, bit_idx, NULL); + *is_large = info.x.is_large; + *memid = mi_memid_create(region, bit_idx); + void* p = start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE); + + // commit + if (*commit) { + // ensure commit + bool any_uncommitted; + mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, &any_uncommitted); + if (any_uncommitted) { + mi_assert_internal(!info.x.is_large); + bool commit_zero; + _mi_mem_commit(p, blocks * MI_SEGMENT_SIZE, &commit_zero, tld); + if (commit_zero) *is_zero = true; + } + } + else { + // no need to commit, but check if already fully committed + *commit = mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx); + } + mi_assert_internal(!*commit || mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx)); + + // unreset reset blocks + if (mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)) { + // some blocks are still reset + mi_assert_internal(!info.x.is_large); + mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit || mi_option_get(mi_option_eager_commit_delay) > 0); + mi_bitmap_unclaim(®ion->reset, 1, blocks, bit_idx); + if (*commit || !mi_option_is_enabled(mi_option_reset_decommits)) { // only if needed + bool reset_zero = false; + _mi_mem_unreset(p, blocks * MI_SEGMENT_SIZE, &reset_zero, tld); + if (reset_zero) *is_zero = true; + } + } + mi_assert_internal(!mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)); + + #if (MI_DEBUG>=2) + if (*commit) { ((uint8_t*)p)[0] = 0; } + #endif + + // and return the allocation + mi_assert_internal(p != NULL); + return p; +} + + +/* ---------------------------------------------------------------------------- + Allocation +-----------------------------------------------------------------------------*/ + +// Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`. +// (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`) +void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld) +{ + mi_assert_internal(memid != NULL && tld != NULL); + mi_assert_internal(size > 0); + *memid = 0; + *is_zero = false; + bool default_large = false; + if (large==NULL) large = &default_large; // ensure `large != NULL` + if (size == 0) return NULL; + size = _mi_align_up(size, _mi_os_page_size()); + + // allocate from regions if possible + void* p = NULL; + size_t arena_memid; + const size_t blocks = mi_region_block_count(size); + if (blocks <= MI_REGION_MAX_OBJ_BLOCKS && alignment <= MI_SEGMENT_ALIGN) { + p = mi_region_try_alloc(blocks, commit, large, is_zero, memid, tld); + if (p == NULL) { + _mi_warning_message("unable to allocate from region: size %zu\n", size); + } + } + if (p == NULL) { + // and otherwise fall back to the OS + p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_zero, &arena_memid, tld); + *memid = mi_memid_create_from_arena(arena_memid); + } + + if (p != NULL) { + mi_assert_internal((uintptr_t)p % alignment == 0); +#if (MI_DEBUG>=2) + if (*commit) { ((uint8_t*)p)[0] = 0; } // ensure the memory is committed +#endif + } + return p; +} + + + +/* ---------------------------------------------------------------------------- +Free +-----------------------------------------------------------------------------*/ + +// Free previously allocated memory with a given id. +void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_reset, mi_os_tld_t* tld) { + mi_assert_internal(size > 0 && tld != NULL); + if (p==NULL) return; + if (size==0) return; + size = _mi_align_up(size, _mi_os_page_size()); + + size_t arena_memid = 0; + mi_bitmap_index_t bit_idx; + mem_region_t* region; + if (mi_memid_is_arena(id,®ion,&bit_idx,&arena_memid)) { + // was a direct arena allocation, pass through + _mi_arena_free(p, size, arena_memid, tld->stats); + } + else { + // allocated in a region + mi_assert_internal(size <= MI_REGION_MAX_OBJ_SIZE); if (size > MI_REGION_MAX_OBJ_SIZE) return; + const size_t blocks = mi_region_block_count(size); + mi_assert_internal(blocks + bit_idx <= MI_BITMAP_FIELD_BITS); + mi_region_info_t info; + info.value = mi_atomic_read(®ion->info); + mi_assert_internal(info.value != 0); + void* blocks_start = mi_region_blocks_start(region, bit_idx); + mi_assert_internal(blocks_start == p); // not a pointer in our area? + mi_assert_internal(bit_idx + blocks <= MI_BITMAP_FIELD_BITS); + if (blocks_start != p || bit_idx + blocks > MI_BITMAP_FIELD_BITS) return; // or `abort`? + + // committed? + if (full_commit && (size % MI_SEGMENT_SIZE) == 0) { + mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, NULL); + } + + if (any_reset) { + // set the is_reset bits if any pages were reset + mi_bitmap_claim(®ion->reset, 1, blocks, bit_idx, NULL); + } + + // reset the blocks to reduce the working set. + if (!info.x.is_large && mi_option_is_enabled(mi_option_segment_reset) + && (mi_option_is_enabled(mi_option_eager_commit) || + mi_option_is_enabled(mi_option_reset_decommits))) // cannot reset halfway committed segments, use only `option_page_reset` instead + { + bool any_unreset; + mi_bitmap_claim(®ion->reset, 1, blocks, bit_idx, &any_unreset); + if (any_unreset) { + _mi_abandoned_await_readers(); // ensure no more pending write (in case reset = decommit) + _mi_mem_reset(p, blocks * MI_SEGMENT_SIZE, tld); + } + } + + // and unclaim + bool all_unclaimed = mi_bitmap_unclaim(®ion->in_use, 1, blocks, bit_idx); + mi_assert_internal(all_unclaimed); UNUSED(all_unclaimed); + } +} + + +/* ---------------------------------------------------------------------------- + collection +-----------------------------------------------------------------------------*/ +void _mi_mem_collect(mi_os_tld_t* tld) { + // free every region that has no segments in use. + uintptr_t rcount = mi_atomic_read_relaxed(®ions_count); + for (size_t i = 0; i < rcount; i++) { + mem_region_t* region = ®ions[i]; + if (mi_atomic_read_relaxed(®ion->info) != 0) { + // if no segments used, try to claim the whole region + uintptr_t m; + do { + m = mi_atomic_read_relaxed(®ion->in_use); + } while(m == 0 && !mi_atomic_cas_weak(®ion->in_use, MI_BITMAP_FIELD_FULL, 0 )); + if (m == 0) { + // on success, free the whole region + uint8_t* start = mi_atomic_read_ptr(uint8_t,®ions[i].start); + size_t arena_memid = mi_atomic_read_relaxed(®ions[i].arena_memid); + memset(®ions[i], 0, sizeof(mem_region_t)); + // and release the whole region + mi_atomic_write(®ion->info, 0); + if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { + _mi_abandoned_await_readers(); // ensure no pending reads + _mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats); + } + } + } + } +} + + +/* ---------------------------------------------------------------------------- + Other +-----------------------------------------------------------------------------*/ + +bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) { + return _mi_os_reset(p, size, tld->stats); +} + +bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { + return _mi_os_unreset(p, size, is_zero, tld->stats); +} + +bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { + return _mi_os_commit(p, size, is_zero, tld->stats); +} + +bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld) { + return _mi_os_decommit(p, size, tld->stats); +} + +bool _mi_mem_protect(void* p, size_t size) { + return _mi_os_protect(p, size); +} + +bool _mi_mem_unprotect(void* p, size_t size) { + return _mi_os_unprotect(p, size); +} diff --git a/src/segment.c b/src/segment.c index dcc6a04b..c7a9662b 100644 --- a/src/segment.c +++ b/src/segment.c @@ -13,36 +13,35 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_PAGE_HUGE_ALIGN (256*1024) -/* ----------------------------------------------------------- +static uint8_t* mi_segment_raw_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); + +/* -------------------------------------------------------------------------------- Segment allocation - We allocate pages inside big OS allocated "segments" - (4mb on 64-bit). This is to avoid splitting VMA's on Linux - and reduce fragmentation on other OS's. Each thread - owns its own segments. + We allocate pages inside bigger "segments" (4mb on 64-bit). This is to avoid + splitting VMA's on Linux and reduce fragmentation on other OS's. + Each thread owns its own segments. Currently we have: - small pages (64kb), 64 in one segment - medium pages (512kb), 8 in one segment - large pages (4mb), 1 in one segment - - huge blocks > MI_LARGE_OBJ_SIZE_MAX (512kb) are directly allocated by the OS + - huge blocks > MI_LARGE_OBJ_SIZE_MAX become large segment with 1 page - In any case the memory for a segment is virtual and only - committed on demand (i.e. we are careful to not touch the memory - until we actually allocate a block there) + In any case the memory for a segment is virtual and usually committed on demand. + (i.e. we are careful to not touch the memory until we actually allocate a block there) If a thread ends, it "abandons" pages with used blocks and there is an abandoned segment list whose segments can be reclaimed by still running threads, much like work-stealing. ------------------------------------------------------------ */ +-------------------------------------------------------------------------------- */ /* ----------------------------------------------------------- Queue of segments containing free pages ----------------------------------------------------------- */ - -#if (MI_DEBUG>1) -static bool mi_segment_queue_contains(const mi_segment_queue_t* queue, mi_segment_t* segment) { +#if (MI_DEBUG>=3) +static bool mi_segment_queue_contains(const mi_segment_queue_t* queue, const mi_segment_t* segment) { mi_assert_internal(segment != NULL); mi_segment_t* list = queue->first; while (list != NULL) { @@ -89,7 +88,7 @@ static mi_segment_queue_t* mi_segment_free_queue_of_kind(mi_page_kind_t kind, mi else return NULL; } -static mi_segment_queue_t* mi_segment_free_queue(mi_segment_t* segment, mi_segments_tld_t* tld) { +static mi_segment_queue_t* mi_segment_free_queue(const mi_segment_t* segment, mi_segments_tld_t* tld) { return mi_segment_free_queue_of_kind(segment->page_kind, tld); } @@ -111,8 +110,8 @@ static void mi_segment_insert_in_free_queue(mi_segment_t* segment, mi_segments_t Invariant checking ----------------------------------------------------------- */ -#if (MI_DEBUG > 1) -static bool mi_segment_is_in_free_queue(mi_segment_t* segment, mi_segments_tld_t* tld) { +#if (MI_DEBUG>=2) +static bool mi_segment_is_in_free_queue(const mi_segment_t* segment, mi_segments_tld_t* tld) { mi_segment_queue_t* queue = mi_segment_free_queue(segment, tld); bool in_queue = (queue!=NULL && (segment->next != NULL || segment->prev != NULL || queue->first == segment)); if (in_queue) { @@ -120,52 +119,268 @@ static bool mi_segment_is_in_free_queue(mi_segment_t* segment, mi_segments_tld_t } return in_queue; } +#endif -static size_t mi_segment_pagesize(mi_segment_t* segment) { - return ((size_t)1 << segment->page_shift); +static size_t mi_segment_page_size(const mi_segment_t* segment) { + if (segment->capacity > 1) { + mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM); + return ((size_t)1 << segment->page_shift); + } + else { + mi_assert_internal(segment->page_kind >= MI_PAGE_LARGE); + return segment->segment_size; + } } -static bool mi_segment_is_valid(mi_segment_t* segment) { + + +#if (MI_DEBUG>=2) +static bool mi_pages_reset_contains(const mi_page_t* page, mi_segments_tld_t* tld) { + mi_page_t* p = tld->pages_reset.first; + while (p != NULL) { + if (p == page) return true; + p = p->next; + } + return false; +} +#endif + +#if (MI_DEBUG>=3) +static bool mi_segment_is_valid(const mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(segment != NULL); mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); mi_assert_internal(segment->used <= segment->capacity); mi_assert_internal(segment->abandoned <= segment->used); size_t nfree = 0; for (size_t i = 0; i < segment->capacity; i++) { - if (!segment->pages[i].segment_in_use) nfree++; + const mi_page_t* const page = &segment->pages[i]; + if (!page->segment_in_use) { + nfree++; + } + if (page->segment_in_use || page->is_reset) { + mi_assert_expensive(!mi_pages_reset_contains(page, tld)); + } } mi_assert_internal(nfree + segment->used == segment->capacity); mi_assert_internal(segment->thread_id == _mi_thread_id() || (segment->thread_id==0)); // or 0 mi_assert_internal(segment->page_kind == MI_PAGE_HUGE || - (mi_segment_pagesize(segment) * segment->capacity == segment->segment_size)); + (mi_segment_page_size(segment) * segment->capacity == segment->segment_size)); return true; } #endif +static bool mi_page_not_in_queue(const mi_page_t* page, mi_segments_tld_t* tld) { + mi_assert_internal(page != NULL); + if (page->next != NULL || page->prev != NULL) { + mi_assert_internal(mi_pages_reset_contains(page, tld)); + return false; + } + else { + // both next and prev are NULL, check for singleton list + return (tld->pages_reset.first != page && tld->pages_reset.last != page); + } +} + + +/* ----------------------------------------------------------- + Guard pages +----------------------------------------------------------- */ + +static void mi_segment_protect_range(void* p, size_t size, bool protect) { + if (protect) { + _mi_mem_protect(p, size); + } + else { + _mi_mem_unprotect(p, size); + } +} + +static void mi_segment_protect(mi_segment_t* segment, bool protect, mi_os_tld_t* tld) { + // add/remove guard pages + if (MI_SECURE != 0) { + // in secure mode, we set up a protected page in between the segment info and the page data + const size_t os_page_size = _mi_os_page_size(); + mi_assert_internal((segment->segment_info_size - os_page_size) >= (sizeof(mi_segment_t) + ((segment->capacity - 1) * sizeof(mi_page_t)))); + mi_assert_internal(((uintptr_t)segment + segment->segment_info_size) % os_page_size == 0); + mi_segment_protect_range((uint8_t*)segment + segment->segment_info_size - os_page_size, os_page_size, protect); + if (MI_SECURE <= 1 || segment->capacity == 1) { + // and protect the last (or only) page too + mi_assert_internal(segment->page_kind >= MI_PAGE_LARGE); + uint8_t* start = (uint8_t*)segment + segment->segment_size - os_page_size; + if (protect && !mi_option_is_enabled(mi_option_eager_page_commit)) { + // ensure secure page is committed + _mi_mem_commit(start, os_page_size, NULL, tld); + } + mi_segment_protect_range(start, os_page_size, protect); + } + else { + // or protect every page + const size_t page_size = mi_segment_page_size(segment); + for (size_t i = 0; i < segment->capacity; i++) { + if (segment->pages[i].is_committed) { + mi_segment_protect_range((uint8_t*)segment + (i+1)*page_size - os_page_size, os_page_size, protect); + } + } + } + } +} + +/* ----------------------------------------------------------- + Page reset +----------------------------------------------------------- */ + +static void mi_page_reset(mi_segment_t* segment, mi_page_t* page, size_t size, mi_segments_tld_t* tld) { + mi_assert_internal(page->is_committed); + if (!mi_option_is_enabled(mi_option_page_reset)) return; + if (segment->mem_is_fixed || page->segment_in_use || !page->is_committed || page->is_reset) return; + size_t psize; + void* start = mi_segment_raw_page_start(segment, page, &psize); + page->is_reset = true; + mi_assert_internal(size <= psize); + size_t reset_size = (size == 0 || size > psize ? psize : size); + if (size == 0 && segment->page_kind >= MI_PAGE_LARGE && !mi_option_is_enabled(mi_option_eager_page_commit)) { + mi_assert_internal(page->xblock_size > 0); + reset_size = page->capacity * mi_page_block_size(page); + } + _mi_mem_reset(start, reset_size, tld->os); +} + +static void mi_page_unreset(mi_segment_t* segment, mi_page_t* page, size_t size, mi_segments_tld_t* tld) +{ + mi_assert_internal(page->is_reset); + mi_assert_internal(!segment->mem_is_fixed); + page->is_reset = false; + size_t psize; + uint8_t* start = mi_segment_raw_page_start(segment, page, &psize); + size_t unreset_size = (size == 0 || size > psize ? psize : size); + if (size == 0 && segment->page_kind >= MI_PAGE_LARGE && !mi_option_is_enabled(mi_option_eager_page_commit)) { + mi_assert_internal(page->xblock_size > 0); + unreset_size = page->capacity * mi_page_block_size(page); + } + bool is_zero = false; + _mi_mem_unreset(start, unreset_size, &is_zero, tld->os); + if (is_zero) page->is_zero_init = true; +} + + +/* ----------------------------------------------------------- + The free page queue +----------------------------------------------------------- */ + +// we re-use the `used` field for the expiration counter. Since this is a +// a 32-bit field while the clock is always 64-bit we need to guard +// against overflow, we use substraction to check for expiry which work +// as long as the reset delay is under (2^30 - 1) milliseconds (~12 days) +static void mi_page_reset_set_expire(mi_page_t* page) { + uint32_t expire = (uint32_t)_mi_clock_now() + mi_option_get(mi_option_reset_delay); + page->used = expire; +} + +static bool mi_page_reset_is_expired(mi_page_t* page, mi_msecs_t now) { + int32_t expire = (int32_t)(page->used); + return (((int32_t)now - expire) >= 0); +} + +static void mi_pages_reset_add(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { + mi_assert_internal(!page->segment_in_use || !page->is_committed); + mi_assert_internal(mi_page_not_in_queue(page,tld)); + mi_assert_expensive(!mi_pages_reset_contains(page, tld)); + mi_assert_internal(_mi_page_segment(page)==segment); + if (!mi_option_is_enabled(mi_option_page_reset)) return; + if (segment->mem_is_fixed || page->segment_in_use || !page->is_committed || page->is_reset) return; + + if (mi_option_get(mi_option_reset_delay) == 0) { + // reset immediately? + mi_page_reset(segment, page, 0, tld); + } + else { + // otherwise push on the delayed page reset queue + mi_page_queue_t* pq = &tld->pages_reset; + // push on top + mi_page_reset_set_expire(page); + page->next = pq->first; + page->prev = NULL; + if (pq->first == NULL) { + mi_assert_internal(pq->last == NULL); + pq->first = pq->last = page; + } + else { + pq->first->prev = page; + pq->first = page; + } + } +} + +static void mi_pages_reset_remove(mi_page_t* page, mi_segments_tld_t* tld) { + if (mi_page_not_in_queue(page,tld)) return; + + mi_page_queue_t* pq = &tld->pages_reset; + mi_assert_internal(pq!=NULL); + mi_assert_internal(!page->segment_in_use); + mi_assert_internal(mi_pages_reset_contains(page, tld)); + if (page->prev != NULL) page->prev->next = page->next; + if (page->next != NULL) page->next->prev = page->prev; + if (page == pq->last) pq->last = page->prev; + if (page == pq->first) pq->first = page->next; + page->next = page->prev = NULL; + page->used = 0; +} + +static void mi_pages_reset_remove_all_in_segment(mi_segment_t* segment, bool force_reset, mi_segments_tld_t* tld) { + if (segment->mem_is_fixed) return; // never reset in huge OS pages + for (size_t i = 0; i < segment->capacity; i++) { + mi_page_t* page = &segment->pages[i]; + if (!page->segment_in_use && page->is_committed && !page->is_reset) { + mi_pages_reset_remove(page, tld); + if (force_reset) { + mi_page_reset(segment, page, 0, tld); + } + } + else { + mi_assert_internal(mi_page_not_in_queue(page,tld)); + } + } +} + +static void mi_reset_delayed(mi_segments_tld_t* tld) { + if (!mi_option_is_enabled(mi_option_page_reset)) return; + mi_msecs_t now = _mi_clock_now(); + mi_page_queue_t* pq = &tld->pages_reset; + // from oldest up to the first that has not expired yet + mi_page_t* page = pq->last; + while (page != NULL && mi_page_reset_is_expired(page,now)) { + mi_page_t* const prev = page->prev; // save previous field + mi_page_reset(_mi_page_segment(page), page, 0, tld); + page->used = 0; + page->prev = page->next = NULL; + page = prev; + } + // discard the reset pages from the queue + pq->last = page; + if (page != NULL){ + page->next = NULL; + } + else { + pq->first = NULL; + } +} + + /* ----------------------------------------------------------- Segment size calculations ----------------------------------------------------------- */ -// Start of the page available memory; can be used on uninitialized pages (only `segment_idx` must be set) -uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size) -{ +// Raw start of the page available memory; can be used on uninitialized pages (only `segment_idx` must be set) +// The raw start is not taking aligned block allocation into consideration. +static uint8_t* mi_segment_raw_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) { size_t psize = (segment->page_kind == MI_PAGE_HUGE ? segment->segment_size : (size_t)1 << segment->page_shift); - uint8_t* p = (uint8_t*)segment + page->segment_idx*psize; + uint8_t* p = (uint8_t*)segment + page->segment_idx * psize; if (page->segment_idx == 0) { // the first page starts after the segment info (and possible guard page) - p += segment->segment_info_size; + p += segment->segment_info_size; psize -= segment->segment_info_size; - // for small and medium objects, ensure the page start is aligned with the block size (PR#66 by kickunderscore) - if (block_size > 0 && segment->page_kind <= MI_PAGE_MEDIUM) { - size_t adjust = block_size - ((uintptr_t)p % block_size); - if (adjust < block_size) { - p += adjust; - psize -= adjust; - } - mi_assert_internal((uintptr_t)p % block_size == 0); - } } - + if (MI_SECURE > 1 || (MI_SECURE == 1 && page->segment_idx == segment->capacity - 1)) { // secure == 1: the last page has an os guard page at the end // secure > 1: every page has an os guard page @@ -173,19 +388,36 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa } if (page_size != NULL) *page_size = psize; - mi_assert_internal(_mi_ptr_page(p) == page); + mi_assert_internal(page->xblock_size == 0 || _mi_ptr_page(p) == page); mi_assert_internal(_mi_ptr_segment(p) == segment); return p; } -static size_t mi_segment_size(size_t capacity, size_t required, size_t* pre_size, size_t* info_size) { - /* - if (mi_option_is_enabled(mi_option_secure)) { - // always reserve maximally so the protection falls on - // the same address area, as we need to reuse them from the caches interchangably. - capacity = MI_SMALL_PAGES_PER_SEGMENT; +// Start of the page available memory; can be used on uninitialized pages (only `segment_idx` must be set) +uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size, size_t* pre_size) +{ + size_t psize; + uint8_t* p = mi_segment_raw_page_start(segment, page, &psize); + if (pre_size != NULL) *pre_size = 0; + if (page->segment_idx == 0 && block_size > 0 && segment->page_kind <= MI_PAGE_MEDIUM) { + // for small and medium objects, ensure the page start is aligned with the block size (PR#66 by kickunderscore) + size_t adjust = block_size - ((uintptr_t)p % block_size); + if (adjust < block_size) { + p += adjust; + psize -= adjust; + if (pre_size != NULL) *pre_size = adjust; + } + mi_assert_internal((uintptr_t)p % block_size == 0); } - */ + + if (page_size != NULL) *page_size = psize; + mi_assert_internal(page->xblock_size==0 || _mi_ptr_page(p) == page); + mi_assert_internal(_mi_ptr_segment(p) == segment); + return p; +} + +static size_t mi_segment_size(size_t capacity, size_t required, size_t* pre_size, size_t* info_size) +{ const size_t minsize = sizeof(mi_segment_t) + ((capacity - 1) * sizeof(mi_page_t)) + 16 /* padding */; size_t guardsize = 0; size_t isize = 0; @@ -230,9 +462,24 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se mi_segments_track_size(-((long)segment_size),tld); if (MI_SECURE != 0) { mi_assert_internal(!segment->mem_is_fixed); - _mi_mem_unprotect(segment, segment->segment_size); // ensure no more guard pages are set + mi_segment_protect(segment, false, tld->os); // ensure no more guard pages are set } - _mi_mem_free(segment, segment_size, segment->memid, tld->stats); + + bool any_reset = false; + bool fully_committed = true; + for (size_t i = 0; i < segment->capacity; i++) { + mi_page_t* page = &segment->pages[i]; + if (!page->is_committed) { fully_committed = false; } + if (page->is_reset) { any_reset = true; } + } + if (any_reset && mi_option_is_enabled(mi_option_reset_decommits)) { + fully_committed = false; + } + if (segment->page_kind >= MI_PAGE_LARGE && !mi_option_is_enabled(mi_option_eager_page_commit)) { + fully_committed = false; + } + + _mi_mem_free(segment, segment_size, segment->memid, fully_committed, any_reset, tld->os); } @@ -252,13 +499,13 @@ static mi_segment_t* mi_segment_cache_pop(size_t segment_size, mi_segments_tld_t return segment; } -static bool mi_segment_cache_full(mi_segments_tld_t* tld) +static bool mi_segment_cache_full(mi_segments_tld_t* tld) { - if (tld->count == 1 && tld->cache_count==0) return false; // always cache at least the final segment of a thread + // if (tld->count == 1 && tld->cache_count==0) return false; // always cache at least the final segment of a thread size_t max_cache = mi_option_get(mi_option_segment_cache); if (tld->cache_count < max_cache && tld->cache_count < (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION)) // at least allow a 1 element cache - ) { + ) { return false; } // take the opportunity to reduce the segment cache if it is too large (now) @@ -278,9 +525,6 @@ static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld) return false; } mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE); - if (!segment->mem_is_fixed && mi_option_is_enabled(mi_option_cache_reset)) { - _mi_mem_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size, tld->stats); - } segment->next = tld->cache; tld->cache = segment; tld->cache_count++; @@ -296,6 +540,12 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) { } mi_assert_internal(tld->cache_count == 0); mi_assert_internal(tld->cache == NULL); +#if MI_DEBUG>=2 + if (!_mi_is_main_thread()) { + mi_assert_internal(tld->pages_reset.first == NULL); + mi_assert_internal(tld->pages_reset.last == NULL); + } +#endif } @@ -304,8 +554,11 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) { ----------------------------------------------------------- */ // Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` . -static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) +static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { + // the segment parameter is non-null if it came from our cache + mi_assert_internal(segment==NULL || (required==0 && page_kind <= MI_PAGE_LARGE)); + // calculate needed sizes first size_t capacity; if (page_kind == MI_PAGE_HUGE) { @@ -323,47 +576,58 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t pre_size; size_t segment_size = mi_segment_size(capacity, required, &pre_size, &info_size); mi_assert_internal(segment_size >= required); - size_t page_size = (page_kind == MI_PAGE_HUGE ? segment_size : (size_t)1 << page_shift); + + // Initialize parameters + const bool eager_delayed = (page_kind <= MI_PAGE_MEDIUM && tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); + const bool eager = !eager_delayed && mi_option_is_enabled(mi_option_eager_commit); + bool commit = eager; // || (page_kind >= MI_PAGE_LARGE); + bool pages_still_good = false; + bool is_zero = false; // Try to get it from our thread local cache first - bool eager_delay = (tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); - bool eager = !eager_delay && mi_option_is_enabled(mi_option_eager_commit); - bool commit = eager || (page_kind > MI_PAGE_MEDIUM); - bool protection_still_good = false; - bool is_zero = false; - mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld); if (segment != NULL) { - if (MI_SECURE!=0) { - mi_assert_internal(!segment->mem_is_fixed); - if (segment->page_kind != page_kind) { - _mi_mem_unprotect(segment, segment->segment_size); // reset protection if the page kind differs - } - else { - protection_still_good = true; // otherwise, the guard pages are still in place - } + // came from cache + mi_assert_internal(segment->segment_size == segment_size); + if (page_kind <= MI_PAGE_MEDIUM && segment->page_kind == page_kind && segment->segment_size == segment_size) { + pages_still_good = true; } - if (!segment->mem_is_committed && page_kind > MI_PAGE_MEDIUM) { - mi_assert_internal(!segment->mem_is_fixed); - _mi_mem_commit(segment, segment->segment_size, &is_zero, tld->stats); - segment->mem_is_committed = true; - } - if (!segment->mem_is_fixed && - (mi_option_is_enabled(mi_option_cache_reset) || mi_option_is_enabled(mi_option_page_reset))) { - bool reset_zero = false; - _mi_mem_unreset(segment, segment->segment_size, &reset_zero, tld->stats); - if (reset_zero) is_zero = true; + else + { + if (MI_SECURE!=0) { + mi_assert_internal(!segment->mem_is_fixed); + mi_segment_protect(segment, false, tld->os); // reset protection if the page kind differs + } + // different page kinds; unreset any reset pages, and unprotect + // TODO: optimize cache pop to return fitting pages if possible? + for (size_t i = 0; i < segment->capacity; i++) { + mi_page_t* page = &segment->pages[i]; + if (page->is_reset) { + if (!commit && mi_option_is_enabled(mi_option_reset_decommits)) { + page->is_reset = false; + } + else { + mi_page_unreset(segment, page, 0, tld); // todo: only unreset the part that was reset? (instead of the full page) + } + } + } + // ensure the initial info is committed + if (segment->capacity < capacity) { + bool commit_zero = false; + _mi_mem_commit(segment, pre_size, &commit_zero, tld->os); + if (commit_zero) is_zero = true; + } } } else { // Allocate the segment from the OS size_t memid; - bool mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy + bool mem_large = (!eager_delayed && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_zero, &memid, os_tld); if (segment == NULL) return NULL; // failed to allocate if (!commit) { // ensure the initial info is committed bool commit_zero = false; - _mi_mem_commit(segment, info_size, &commit_zero, tld->stats); + _mi_mem_commit(segment, pre_size, &commit_zero, tld->os); if (commit_zero) is_zero = true; } segment->memid = memid; @@ -373,28 +637,24 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, } mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); - // zero the segment info (but not the `mem` fields) - ptrdiff_t ofs = offsetof(mi_segment_t,next); - memset((uint8_t*)segment + ofs, 0, info_size - ofs); + if (!pages_still_good) { + // zero the segment info (but not the `mem` fields) + ptrdiff_t ofs = offsetof(mi_segment_t, next); + memset((uint8_t*)segment + ofs, 0, info_size - ofs); - // guard pages - if ((MI_SECURE != 0) && !protection_still_good) { - // in secure mode, we set up a protected page in between the segment info - // and the page data - mi_assert_internal( info_size == pre_size - _mi_os_page_size() && info_size % _mi_os_page_size() == 0); - _mi_mem_protect( (uint8_t*)segment + info_size, (pre_size - info_size) ); - size_t os_page_size = _mi_os_page_size(); - if (MI_SECURE <= 1) { - // and protect the last page too - _mi_mem_protect( (uint8_t*)segment + segment_size - os_page_size, os_page_size ); - } - else { - // protect every page - for (size_t i = 0; i < capacity; i++) { - _mi_mem_protect( (uint8_t*)segment + (i+1)*page_size - os_page_size, os_page_size ); - } + // initialize pages info + for (uint8_t i = 0; i < capacity; i++) { + segment->pages[i].segment_idx = i; + segment->pages[i].is_reset = false; + segment->pages[i].is_committed = commit; + segment->pages[i].is_zero_init = is_zero; } } + else { + // zero the segment info but not the pages info (and mem fields) + ptrdiff_t ofs = offsetof(mi_segment_t, next); + memset((uint8_t*)segment + ofs, 0, offsetof(mi_segment_t,pages) - ofs); + } // initialize segment->page_kind = page_kind; @@ -404,40 +664,37 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, segment->segment_info_size = pre_size; segment->thread_id = _mi_thread_id(); segment->cookie = _mi_ptr_cookie(segment); - for (uint8_t i = 0; i < segment->capacity; i++) { - segment->pages[i].segment_idx = i; - segment->pages[i].is_reset = false; - segment->pages[i].is_committed = commit; - segment->pages[i].is_zero_init = is_zero; + // _mi_stat_increase(&tld->stats->page_committed, segment->segment_info_size); + + // set protection + mi_segment_protect(segment, true, tld->os); + + // insert in free lists for small and medium pages + if (page_kind <= MI_PAGE_MEDIUM) { + mi_segment_insert_in_free_queue(segment, tld); } - _mi_stat_increase(&tld->stats->page_committed, segment->segment_info_size); + //fprintf(stderr,"mimalloc: alloc segment at %p\n", (void*)segment); return segment; } +static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { + return mi_segment_init(NULL, required, page_kind, page_shift, tld, os_tld); +} static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) { UNUSED(force); - //fprintf(stderr,"mimalloc: free segment at %p\n", (void*)segment); - mi_assert(segment != NULL); + mi_assert(segment != NULL); + // note: don't reset pages even on abandon as the whole segment is freed? (and ready for reuse) + bool force_reset = (force && mi_option_is_enabled(mi_option_abandoned_page_reset)); + mi_pages_reset_remove_all_in_segment(segment, force_reset, tld); mi_segment_remove_from_free_queue(segment,tld); mi_assert_expensive(!mi_segment_queue_contains(&tld->small_free, segment)); mi_assert_expensive(!mi_segment_queue_contains(&tld->medium_free, segment)); mi_assert(segment->next == NULL); mi_assert(segment->prev == NULL); - _mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size); - - // update reset memory statistics - /* - for (uint8_t i = 0; i < segment->capacity; i++) { - mi_page_t* page = &segment->pages[i]; - if (page->is_reset) { - page->is_reset = false; - mi_stat_decrease( tld->stats->reset,mi_page_size(page)); - } - } - */ + _mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size); if (!force && mi_segment_cache_push(segment, tld)) { // it is put in our cache @@ -457,35 +714,38 @@ static bool mi_segment_has_free(const mi_segment_t* segment) { return (segment->used < segment->capacity); } -static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_stats_t* stats) { - mi_assert_internal(mi_segment_has_free(segment)); - mi_assert_expensive(mi_segment_is_valid(segment)); - for (size_t i = 0; i < segment->capacity; i++) { - mi_page_t* page = &segment->pages[i]; - if (!page->segment_in_use) { - if (page->is_reset || !page->is_committed) { - size_t psize; - uint8_t* start = _mi_page_start(segment, page, &psize); - if (!page->is_committed) { - mi_assert_internal(!segment->mem_is_fixed); - page->is_committed = true; - bool is_zero = false; - _mi_mem_commit(start,psize,&is_zero,stats); - if (is_zero) page->is_zero_init = true; - } - if (page->is_reset) { - mi_assert_internal(!segment->mem_is_fixed); - page->is_reset = false; - bool is_zero = false; - _mi_mem_unreset(start, psize, &is_zero, stats); - if (is_zero) page->is_zero_init = true; - } - } - return page; +static void mi_segment_page_claim(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { + mi_assert_internal(_mi_page_segment(page) == segment); + mi_assert_internal(!page->segment_in_use); + // set in-use before doing unreset to prevent delayed reset + mi_pages_reset_remove(page, tld); + page->segment_in_use = true; + segment->used++; + if (!page->is_committed) { + mi_assert_internal(!segment->mem_is_fixed); + mi_assert_internal(!page->is_reset); + page->is_committed = true; + if (segment->page_kind < MI_PAGE_LARGE + || !mi_option_is_enabled(mi_option_eager_page_commit)) { + size_t psize; + uint8_t* start = mi_segment_raw_page_start(segment, page, &psize); + bool is_zero = false; + const size_t gsize = (MI_SECURE >= 2 ? _mi_os_page_size() : 0); + _mi_mem_commit(start, psize + gsize, &is_zero, tld->os); + if (gsize > 0) { mi_segment_protect_range(start + psize, gsize, true); } + if (is_zero) { page->is_zero_init = true; } } } - mi_assert(false); - return NULL; + if (page->is_reset) { + mi_page_unreset(segment, page, 0, tld); // todo: only unreset the part that was reset? + } + mi_assert_internal(page->segment_in_use); + mi_assert_internal(segment->used <= segment->capacity); + if (segment->used == segment->capacity && segment->page_kind <= MI_PAGE_MEDIUM) { + // if no more free pages, remove from the queue + mi_assert_internal(!mi_segment_has_free(segment)); + mi_segment_remove_from_free_queue(segment, tld); + } } @@ -495,39 +755,52 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_stats_t* stats) static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld); -static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_stats_t* stats) { - UNUSED(stats); +// clear page data; can be called on abandoned segments +static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, bool allow_reset, mi_segments_tld_t* tld) +{ mi_assert_internal(page->segment_in_use); mi_assert_internal(mi_page_all_free(page)); mi_assert_internal(page->is_committed); - size_t inuse = page->capacity * page->block_size; - _mi_stat_decrease(&stats->page_committed, inuse); - _mi_stat_decrease(&stats->pages, 1); - - // reset the page memory to reduce memory pressure? - if (!segment->mem_is_fixed && !page->is_reset && mi_option_is_enabled(mi_option_page_reset)) { - size_t psize; - uint8_t* start = _mi_page_start(segment, page, &psize); - page->is_reset = true; - _mi_mem_reset(start, psize, stats); - } + mi_assert_internal(mi_page_not_in_queue(page, tld)); + + size_t inuse = page->capacity * mi_page_block_size(page); + _mi_stat_decrease(&tld->stats->page_committed, inuse); + _mi_stat_decrease(&tld->stats->pages, 1); + + // calculate the used size from the raw (non-aligned) start of the page + //size_t pre_size; + //_mi_segment_page_start(segment, page, page->block_size, NULL, &pre_size); + //size_t used_size = pre_size + (page->capacity * page->block_size); - // zero the page data, but not the segment fields page->is_zero_init = false; + page->segment_in_use = false; + + // reset the page memory to reduce memory pressure? + // note: must come after setting `segment_in_use` to false but before block_size becomes 0 + //mi_page_reset(segment, page, 0 /*used_size*/, tld); + + // zero the page data, but not the segment fields and block_size (for page size calculations) + uint32_t block_size = page->xblock_size; ptrdiff_t ofs = offsetof(mi_page_t,capacity); memset((uint8_t*)page + ofs, 0, sizeof(*page) - ofs); - page->segment_in_use = false; + page->xblock_size = block_size; segment->used--; + + // add to the free page list for reuse/reset + if (allow_reset) { + mi_pages_reset_add(segment, page, tld); + } } void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) { mi_assert(page != NULL); mi_segment_t* segment = _mi_page_segment(page); - mi_assert_expensive(mi_segment_is_valid(segment)); + mi_assert_expensive(mi_segment_is_valid(segment,tld)); + mi_reset_delayed(tld); // mark it as free now - mi_segment_page_clear(segment, page, tld->stats); + mi_segment_page_clear(segment, page, true, tld); if (segment->used == 0) { // no more used pages; remove from the free list and free the segment @@ -548,111 +821,377 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) /* ----------------------------------------------------------- - Abandonment +Abandonment + +When threads terminate, they can leave segments with +live blocks (reached through other threads). Such segments +are "abandoned" and will be reclaimed by other threads to +reuse their pages and/or free them eventually + +We maintain a global list of abandoned segments that are +reclaimed on demand. Since this is shared among threads +the implementation needs to avoid the A-B-A problem on +popping abandoned segments: +We use tagged pointers to avoid accidentially identifying +reused segments, much like stamped references in Java. +Secondly, we maintain a reader counter to avoid resetting +or decommitting segments that have a pending read operation. + +Note: the current implementation is one possible design; +another way might be to keep track of abandoned segments +in the regions. This would have the advantage of keeping +all concurrent code in one place and not needing to deal +with ABA issues. The drawback is that it is unclear how to +scan abandoned segments efficiently in that case as they +would be spread among all other segments in the regions. ----------------------------------------------------------- */ -// When threads terminate, they can leave segments with -// live blocks (reached through other threads). Such segments -// are "abandoned" and will be reclaimed by other threads to -// reuse their pages and/or free them eventually -static volatile _Atomic(mi_segment_t*) abandoned; // = NULL; -static volatile _Atomic(uintptr_t) abandoned_count; // = 0; +// Use the bottom 20-bits (on 64-bit) of the aligned segment pointers +// to put in a tag that increments on update to avoid the A-B-A problem. +#define MI_TAGGED_MASK MI_SEGMENT_MASK +typedef uintptr_t mi_tagged_segment_t; + +static mi_segment_t* mi_tagged_segment_ptr(mi_tagged_segment_t ts) { + return (mi_segment_t*)(ts & ~MI_TAGGED_MASK); +} + +static mi_tagged_segment_t mi_tagged_segment(mi_segment_t* segment, mi_tagged_segment_t ts) { + mi_assert_internal(((uintptr_t)segment & MI_TAGGED_MASK) == 0); + uintptr_t tag = ((ts & MI_TAGGED_MASK) + 1) & MI_TAGGED_MASK; + return ((uintptr_t)segment | tag); +} + +// This is a list of visited abandoned pages that were full at the time. +// this list migrates to `abandoned` when that becomes NULL. The use of +// this list reduces contention and the rate at which segments are visited. +static mi_decl_cache_align volatile _Atomic(mi_segment_t*) abandoned_visited; // = NULL + +// The abandoned page list (tagged as it supports pop) +static mi_decl_cache_align volatile _Atomic(mi_tagged_segment_t) abandoned; // = NULL + +// We also maintain a count of current readers of the abandoned list +// in order to prevent resetting/decommitting segment memory if it might +// still be read. +static mi_decl_cache_align volatile _Atomic(uintptr_t) abandoned_readers; // = 0 + +// Push on the visited list +static void mi_abandoned_visited_push(mi_segment_t* segment) { + mi_assert_internal(segment->thread_id == 0); + mi_assert_internal(segment->abandoned_next == NULL); + mi_assert_internal(segment->next == NULL && segment->prev == NULL); + mi_assert_internal(segment->used > 0); + mi_segment_t* anext; + do { + anext = mi_atomic_read_ptr_relaxed(mi_segment_t, &abandoned_visited); + segment->abandoned_next = anext; + } while (!mi_atomic_cas_ptr_weak(mi_segment_t, &abandoned_visited, segment, anext)); +} + +// Move the visited list to the abandoned list. +static bool mi_abandoned_visited_revisit(void) +{ + // quick check if the visited list is empty + if (mi_atomic_read_ptr_relaxed(mi_segment_t,&abandoned_visited)==NULL) return false; + + // grab the whole visited list + mi_segment_t* first = mi_atomic_exchange_ptr(mi_segment_t, &abandoned_visited, NULL); + if (first == NULL) return false; + + // first try to swap directly if the abandoned list happens to be NULL + const mi_tagged_segment_t ts = mi_atomic_read_relaxed(&abandoned); + mi_tagged_segment_t afirst; + if (mi_tagged_segment_ptr(ts)==NULL) { + afirst = mi_tagged_segment(first, ts); + if (mi_atomic_cas_strong(&abandoned, afirst, ts)) return true; + } + + // find the last element of the visited list: O(n) + mi_segment_t* last = first; + while (last->abandoned_next != NULL) { + last = last->abandoned_next; + } + + // and atomically prepend to the abandoned list + // (no need to increase the readers as we don't access the abandoned segments) + mi_tagged_segment_t anext; + do { + anext = mi_atomic_read_relaxed(&abandoned); + last->abandoned_next = mi_tagged_segment_ptr(anext); + afirst = mi_tagged_segment(first, anext); + } while (!mi_atomic_cas_weak(&abandoned, afirst, anext)); + return true; +} + +// Push on the abandoned list. +static void mi_abandoned_push(mi_segment_t* segment) { + mi_assert_internal(segment->thread_id == 0); + mi_assert_internal(segment->abandoned_next == NULL); + mi_assert_internal(segment->next == NULL && segment->prev == NULL); + mi_assert_internal(segment->used > 0); + mi_tagged_segment_t ts; + mi_tagged_segment_t next; + do { + ts = mi_atomic_read_relaxed(&abandoned); + segment->abandoned_next = mi_tagged_segment_ptr(ts); + next = mi_tagged_segment(segment, ts); + } while (!mi_atomic_cas_weak(&abandoned, next, ts)); +} + +// Wait until there are no more pending reads on segments that used to be in the abandoned list +void _mi_abandoned_await_readers(void) { + uintptr_t n; + do { + n = mi_atomic_read(&abandoned_readers); + if (n != 0) mi_atomic_yield(); + } while (n != 0); +} + +// Pop from the abandoned list +static mi_segment_t* mi_abandoned_pop(void) { + mi_segment_t* segment; + // Check efficiently if it is empty (or if the visited list needs to be moved) + mi_tagged_segment_t ts = mi_atomic_read_relaxed(&abandoned); + segment = mi_tagged_segment_ptr(ts); + if (mi_likely(segment == NULL)) { + if (mi_likely(!mi_abandoned_visited_revisit())) { // try to swap in the visited list on NULL + return NULL; + } + } + + // Do a pop. We use a reader count to prevent + // a segment to be decommitted while a read is still pending, + // and a tagged pointer to prevent A-B-A link corruption. + // (this is called from `memory.c:_mi_mem_free` for example) + mi_atomic_increment(&abandoned_readers); // ensure no segment gets decommitted + mi_tagged_segment_t next = 0; + do { + ts = mi_atomic_read_relaxed(&abandoned); + segment = mi_tagged_segment_ptr(ts); + if (segment != NULL) { + next = mi_tagged_segment(segment->abandoned_next, ts); // note: reads the segment's `abandoned_next` field so should not be decommitted + } + } while (segment != NULL && !mi_atomic_cas_weak(&abandoned, next, ts)); + mi_atomic_decrement(&abandoned_readers); // release reader lock + if (segment != NULL) { + segment->abandoned_next = NULL; + } + return segment; +} + +/* ----------------------------------------------------------- + Abandon segment/page +----------------------------------------------------------- */ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(segment->used == segment->abandoned); mi_assert_internal(segment->used > 0); mi_assert_internal(segment->abandoned_next == NULL); - mi_assert_expensive(mi_segment_is_valid(segment)); + mi_assert_expensive(mi_segment_is_valid(segment, tld)); // remove the segment from the free page queue if needed - mi_segment_remove_from_free_queue(segment,tld); + mi_reset_delayed(tld); + mi_pages_reset_remove_all_in_segment(segment, mi_option_is_enabled(mi_option_abandoned_page_reset), tld); + mi_segment_remove_from_free_queue(segment, tld); mi_assert_internal(segment->next == NULL && segment->prev == NULL); // all pages in the segment are abandoned; add it to the abandoned list _mi_stat_increase(&tld->stats->segments_abandoned, 1); mi_segments_track_size(-((long)segment->segment_size), tld); segment->thread_id = 0; - mi_segment_t* next; - do { - next = (mi_segment_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&abandoned)); - mi_atomic_write_ptr(mi_atomic_cast(void*,&segment->abandoned_next), next); - } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&abandoned), segment, next)); - mi_atomic_increment(&abandoned_count); + segment->abandoned_next = NULL; + segment->abandoned_visits = 0; + mi_abandoned_push(segment); } void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { mi_assert(page != NULL); + mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE); + mi_assert_internal(mi_page_heap(page) == NULL); mi_segment_t* segment = _mi_page_segment(page); - mi_assert_expensive(mi_segment_is_valid(segment)); - segment->abandoned++; + mi_assert_expensive(!mi_pages_reset_contains(page, tld)); + mi_assert_expensive(mi_segment_is_valid(segment, tld)); + segment->abandoned++; _mi_stat_increase(&tld->stats->pages_abandoned, 1); mi_assert_internal(segment->abandoned <= segment->used); if (segment->used == segment->abandoned) { // all pages are abandoned, abandon the entire segment - mi_segment_abandon(segment,tld); + mi_segment_abandon(segment, tld); } } -bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld) { - uintptr_t reclaimed = 0; - uintptr_t atmost; - if (try_all) { - atmost = abandoned_count+16; // close enough - } - else { - atmost = abandoned_count/8; // at most 1/8th of all outstanding (estimated) - if (atmost < 8) atmost = 8; // but at least 8 - } +/* ----------------------------------------------------------- + Reclaim abandoned pages +----------------------------------------------------------- */ - // for `atmost` `reclaimed` abandoned segments... - while(atmost > reclaimed) { - // try to claim the head of the abandoned segments - mi_segment_t* segment; - do { - segment = (mi_segment_t*)abandoned; - } while(segment != NULL && !mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&abandoned), (mi_segment_t*)segment->abandoned_next, segment)); - if (segment==NULL) break; // stop early if no more segments available - - // got it. - mi_atomic_decrement(&abandoned_count); - segment->thread_id = _mi_thread_id(); - segment->abandoned_next = NULL; - mi_segments_track_size((long)segment->segment_size,tld); - mi_assert_internal(segment->next == NULL && segment->prev == NULL); - mi_assert_expensive(mi_segment_is_valid(segment)); - _mi_stat_decrease(&tld->stats->segments_abandoned,1); - - // add its abandoned pages to the current thread - mi_assert(segment->abandoned == segment->used); - for (size_t i = 0; i < segment->capacity; i++) { - mi_page_t* page = &segment->pages[i]; - if (page->segment_in_use) { - segment->abandoned--; - mi_assert(page->next == NULL); - _mi_stat_decrease(&tld->stats->pages_abandoned, 1); - if (mi_page_all_free(page)) { - // if everything free by now, free the page - mi_segment_page_clear(segment,page,tld->stats); - } - else { - // otherwise reclaim it - _mi_page_reclaim(heap,page); - } +// Possibly clear pages and check if free space is available +static bool mi_segment_check_free(mi_segment_t* segment, size_t block_size, bool* all_pages_free) +{ + mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE); + bool has_page = false; + size_t pages_used = 0; + size_t pages_used_empty = 0; + for (size_t i = 0; i < segment->capacity; i++) { + mi_page_t* page = &segment->pages[i]; + if (page->segment_in_use) { + pages_used++; + // ensure used count is up to date and collect potential concurrent frees + _mi_page_free_collect(page, false); + if (mi_page_all_free(page)) { + // if everything free already, page can be reused for some block size + // note: don't clear the page yet as we can only OS reset it once it is reclaimed + pages_used_empty++; + has_page = true; + } + else if (page->xblock_size == block_size && mi_page_has_any_available(page)) { + // a page has available free blocks of the right size + has_page = true; } - } - mi_assert(segment->abandoned == 0); - if (segment->used == 0) { // due to page_clear - mi_segment_free(segment,false,tld); } else { - reclaimed++; - // add its free pages to the the current thread free small segment queue - if (segment->page_kind <= MI_PAGE_MEDIUM && mi_segment_has_free(segment)) { - mi_segment_insert_in_free_queue(segment,tld); + // whole empty page + has_page = true; + } + } + mi_assert_internal(pages_used == segment->used && pages_used >= pages_used_empty); + if (all_pages_free != NULL) { + *all_pages_free = ((pages_used - pages_used_empty) == 0); + } + return has_page; +} + + +// Reclaim a segment; returns NULL if the segment was freed +// set `right_page_reclaimed` to `true` if it reclaimed a page of the right `block_size` that was not full. +static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, size_t requested_block_size, bool* right_page_reclaimed, mi_segments_tld_t* tld) { + mi_assert_internal(segment->abandoned_next == NULL); + if (right_page_reclaimed != NULL) { *right_page_reclaimed = false; } + + segment->thread_id = _mi_thread_id(); + segment->abandoned_visits = 0; + mi_segments_track_size((long)segment->segment_size, tld); + mi_assert_internal(segment->next == NULL && segment->prev == NULL); + mi_assert_expensive(mi_segment_is_valid(segment, tld)); + _mi_stat_decrease(&tld->stats->segments_abandoned, 1); + + for (size_t i = 0; i < segment->capacity; i++) { + mi_page_t* page = &segment->pages[i]; + if (page->segment_in_use) { + mi_assert_internal(!page->is_reset); + mi_assert_internal(page->is_committed); + mi_assert_internal(mi_page_not_in_queue(page, tld)); + mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE); + mi_assert_internal(mi_page_heap(page) == NULL); + segment->abandoned--; + mi_assert(page->next == NULL); + _mi_stat_decrease(&tld->stats->pages_abandoned, 1); + // set the heap again and allow heap thread delayed free again. + mi_page_set_heap(page, heap); + _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set) + // TODO: should we not collect again given that we just collected in `check_free`? + _mi_page_free_collect(page, false); // ensure used count is up to date + if (mi_page_all_free(page)) { + // if everything free already, clear the page directly + mi_segment_page_clear(segment, page, true, tld); // reset is ok now } + else { + // otherwise reclaim it into the heap + _mi_page_reclaim(heap, page); + if (requested_block_size == page->xblock_size && mi_page_has_any_available(page)) { + if (right_page_reclaimed != NULL) { *right_page_reclaimed = true; } + } + } + } + else if (page->is_committed && !page->is_reset) { // not in-use, and not reset yet + // note: do not reset as this includes pages that were not touched before + // mi_pages_reset_add(segment, page, tld); } } - return (reclaimed>0); + mi_assert_internal(segment->abandoned == 0); + if (segment->used == 0) { + mi_assert_internal(right_page_reclaimed == NULL || !(*right_page_reclaimed)); + mi_segment_free(segment, false, tld); + return NULL; + } + else { + if (segment->page_kind <= MI_PAGE_MEDIUM && mi_segment_has_free(segment)) { + mi_segment_insert_in_free_queue(segment, tld); + } + return segment; + } +} + + +void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld) { + mi_segment_t* segment; + while ((segment = mi_abandoned_pop()) != NULL) { + mi_segment_reclaim(segment, heap, 0, NULL, tld); + } +} + +static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size, mi_page_kind_t page_kind, bool* reclaimed, mi_segments_tld_t* tld) +{ + *reclaimed = false; + mi_segment_t* segment; + int max_tries = 8; // limit the work to bound allocation times + while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) { + segment->abandoned_visits++; + bool all_pages_free; + bool has_page = mi_segment_check_free(segment,block_size,&all_pages_free); // try to free up pages (due to concurrent frees) + if (all_pages_free) { + // free the segment (by forced reclaim) to make it available to other threads. + // note1: we prefer to free a segment as that might lead to reclaiming another + // segment that is still partially used. + // note2: we could in principle optimize this by skipping reclaim and directly + // freeing but that would violate some invariants temporarily) + mi_segment_reclaim(segment, heap, 0, NULL, tld); + } + else if (has_page && segment->page_kind == page_kind) { + // found a free page of the right kind, or page of the right block_size with free space + // we return the result of reclaim (which is usually `segment`) as it might free + // the segment due to concurrent frees (in which case `NULL` is returned). + return mi_segment_reclaim(segment, heap, block_size, reclaimed, tld); + } + else if (segment->abandoned_visits >= 3) { + // always reclaim on 3rd visit to limit the list length. + mi_segment_reclaim(segment, heap, 0, NULL, tld); + } + else { + // otherwise, push on the visited list so it gets not looked at too quickly again + mi_abandoned_visited_push(segment); + } + } + return NULL; +} + + +/* ----------------------------------------------------------- + Reclaim or allocate +----------------------------------------------------------- */ + +static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t block_size, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) +{ + mi_assert_internal(page_kind <= MI_PAGE_LARGE); + mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE); + // 1. try to get a segment from our cache + mi_segment_t* segment = mi_segment_cache_pop(MI_SEGMENT_SIZE, tld); + if (segment != NULL) { + mi_segment_init(segment, 0, page_kind, page_shift, tld, os_tld); + return segment; + } + // 2. try to reclaim an abandoned segment + bool reclaimed; + segment = mi_segment_try_reclaim(heap, block_size, page_kind, &reclaimed, tld); + if (reclaimed) { + // reclaimed the right page right into the heap + mi_assert_internal(segment != NULL && segment->page_kind == page_kind && page_kind <= MI_PAGE_LARGE); + return NULL; // pretend out-of-memory as the page will be in the page queue of the heap with available blocks + } + else if (segment != NULL) { + // reclaimed a segment with empty pages (of `page_kind`) in it + return segment; + } + // 3. otherwise allocate a fresh segment + return mi_segment_alloc(0, page_kind, page_shift, tld, os_tld); } @@ -660,51 +1199,67 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen Small page allocation ----------------------------------------------------------- */ -// Allocate a small page inside a segment. -// Requires that the page has free pages +static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t* tld) { + mi_assert_internal(mi_segment_has_free(segment)); + mi_assert_expensive(mi_segment_is_valid(segment, tld)); + for (size_t i = 0; i < segment->capacity; i++) { // TODO: use a bitmap instead of search? + mi_page_t* page = &segment->pages[i]; + if (!page->segment_in_use) { + mi_segment_page_claim(segment, page, tld); + return page; + } + } + mi_assert(false); + return NULL; +} + +// Allocate a page inside a segment. Requires that the page has free pages static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(mi_segment_has_free(segment)); - mi_page_t* page = mi_segment_find_free(segment, tld->stats); - page->segment_in_use = true; - segment->used++; - mi_assert_internal(segment->used <= segment->capacity); - if (segment->used == segment->capacity) { - // if no more free pages, remove from the queue - mi_assert_internal(!mi_segment_has_free(segment)); - mi_segment_remove_from_free_queue(segment,tld); + return mi_segment_find_free(segment, tld); +} + +static mi_page_t* mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_page_kind_t kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { + // find an available segment the segment free queue + mi_segment_queue_t* const free_queue = mi_segment_free_queue_of_kind(kind, tld); + if (mi_segment_queue_is_empty(free_queue)) { + // possibly allocate or reclaim a fresh segment + mi_segment_t* const segment = mi_segment_reclaim_or_alloc(heap, block_size, kind, page_shift, tld, os_tld); + if (segment == NULL) return NULL; // return NULL if out-of-memory (or reclaimed) + mi_assert_internal(free_queue->first == segment); + mi_assert_internal(segment->page_kind==kind); + mi_assert_internal(segment->used < segment->capacity); } + mi_assert_internal(free_queue->first != NULL); + mi_page_t* const page = mi_segment_page_alloc_in(free_queue->first, tld); + mi_assert_internal(page != NULL); +#if MI_DEBUG>=2 + // verify it is committed + _mi_segment_page_start(_mi_page_segment(page), page, sizeof(void*), NULL, NULL)[0] = 0; +#endif return page; } -static mi_page_t* mi_segment_page_alloc(mi_page_kind_t kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { - mi_segment_queue_t* free_queue = mi_segment_free_queue_of_kind(kind,tld); - if (mi_segment_queue_is_empty(free_queue)) { - mi_segment_t* segment = mi_segment_alloc(0,kind,page_shift,tld,os_tld); - if (segment == NULL) return NULL; - mi_segment_enqueue(free_queue, segment); - } - mi_assert_internal(free_queue->first != NULL); - return mi_segment_page_alloc_in(free_queue->first,tld); +static mi_page_t* mi_segment_small_page_alloc(mi_heap_t* heap, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { + return mi_segment_page_alloc(heap, block_size, MI_PAGE_SMALL,MI_SMALL_PAGE_SHIFT,tld,os_tld); } -static mi_page_t* mi_segment_small_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { - return mi_segment_page_alloc(MI_PAGE_SMALL,MI_SMALL_PAGE_SHIFT,tld,os_tld); -} - -static mi_page_t* mi_segment_medium_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { - return mi_segment_page_alloc(MI_PAGE_MEDIUM, MI_MEDIUM_PAGE_SHIFT, tld, os_tld); +static mi_page_t* mi_segment_medium_page_alloc(mi_heap_t* heap, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { + return mi_segment_page_alloc(heap, block_size, MI_PAGE_MEDIUM, MI_MEDIUM_PAGE_SHIFT, tld, os_tld); } /* ----------------------------------------------------------- large page allocation ----------------------------------------------------------- */ -static mi_page_t* mi_segment_large_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { - mi_segment_t* segment = mi_segment_alloc(0,MI_PAGE_LARGE,MI_LARGE_PAGE_SHIFT,tld,os_tld); +static mi_page_t* mi_segment_large_page_alloc(mi_heap_t* heap, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { + mi_segment_t* segment = mi_segment_reclaim_or_alloc(heap,block_size,MI_PAGE_LARGE,MI_LARGE_PAGE_SHIFT,tld,os_tld); if (segment == NULL) return NULL; - segment->used = 1; - mi_page_t* page = &segment->pages[0]; - page->segment_in_use = true; + mi_page_t* page = mi_segment_find_free(segment, tld); + mi_assert_internal(page != NULL); +#if MI_DEBUG>=2 + _mi_segment_page_start(segment, page, sizeof(void*), NULL, NULL)[0] = 0; +#endif return page; } @@ -712,32 +1267,34 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld { mi_segment_t* segment = mi_segment_alloc(size, MI_PAGE_HUGE, MI_SEGMENT_SHIFT,tld,os_tld); if (segment == NULL) return NULL; - mi_assert_internal(segment->segment_size - segment->segment_info_size >= size); - segment->used = 1; + mi_assert_internal(mi_segment_page_size(segment) - segment->segment_info_size - (2*(MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= size); segment->thread_id = 0; // huge pages are immediately abandoned - mi_page_t* page = &segment->pages[0]; - page->segment_in_use = true; + mi_page_t* page = mi_segment_find_free(segment, tld); + mi_assert_internal(page != NULL); return page; } /* ----------------------------------------------------------- - Page allocation and free + Page allocation ----------------------------------------------------------- */ -mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { +mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { mi_page_t* page; if (block_size <= MI_SMALL_OBJ_SIZE_MAX) { - page = mi_segment_small_page_alloc(tld,os_tld); + page = mi_segment_small_page_alloc(heap, block_size, tld, os_tld); } else if (block_size <= MI_MEDIUM_OBJ_SIZE_MAX) { - page = mi_segment_medium_page_alloc(tld, os_tld); + page = mi_segment_medium_page_alloc(heap, block_size, tld, os_tld); } else if (block_size <= MI_LARGE_OBJ_SIZE_MAX) { - page = mi_segment_large_page_alloc(tld, os_tld); + page = mi_segment_large_page_alloc(heap, block_size, tld, os_tld); } else { page = mi_segment_huge_page_alloc(block_size,tld,os_tld); } - mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page))); + mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld)); + mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size); + mi_reset_delayed(tld); + mi_assert_internal(page == NULL || mi_page_not_in_queue(page, tld)); return page; } diff --git a/src/static.c b/src/static.c index f1656fa9..ec9370eb 100644 --- a/src/static.c +++ b/src/static.c @@ -14,8 +14,10 @@ terms of the MIT license. A copy of the license can be found in the file // it will override all the standard library allocation // functions (on Unix's). #include "stats.c" +#include "random.c" #include "os.c" -#include "memory.c" +#include "arena.c" +#include "region.c" #include "segment.c" #include "page.c" #include "heap.c" diff --git a/src/stats.c b/src/stats.c index 50bd029d..a1404502 100644 --- a/src/stats.c +++ b/src/stats.c @@ -26,13 +26,13 @@ static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) { if (mi_is_in_main(stat)) { // add atomically (for abandoned pages) - mi_atomic_add64(&stat->current,amount); + mi_atomic_addi64(&stat->current,amount); if (stat->current > stat->peak) stat->peak = stat->current; // racing.. it's ok if (amount > 0) { - mi_atomic_add64(&stat->allocated,amount); + mi_atomic_addi64(&stat->allocated,amount); } else { - mi_atomic_add64(&stat->freed, -amount); + mi_atomic_addi64(&stat->freed, -amount); } } else { @@ -50,8 +50,8 @@ static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) { void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount) { if (mi_is_in_main(stat)) { - mi_atomic_add64( &stat->count, 1 ); - mi_atomic_add64( &stat->total, (int64_t)amount ); + mi_atomic_addi64( &stat->count, 1 ); + mi_atomic_addi64( &stat->total, (int64_t)amount ); } else { stat->count++; @@ -70,17 +70,17 @@ void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount) { // must be thread safe as it is called from stats_merge static void mi_stat_add(mi_stat_count_t* stat, const mi_stat_count_t* src, int64_t unit) { if (stat==src) return; - mi_atomic_add64( &stat->allocated, src->allocated * unit); - mi_atomic_add64( &stat->current, src->current * unit); - mi_atomic_add64( &stat->freed, src->freed * unit); + mi_atomic_addi64( &stat->allocated, src->allocated * unit); + mi_atomic_addi64( &stat->current, src->current * unit); + mi_atomic_addi64( &stat->freed, src->freed * unit); // peak scores do not work across threads.. - mi_atomic_add64( &stat->peak, src->peak * unit); + mi_atomic_addi64( &stat->peak, src->peak * unit); } static void mi_stat_counter_add(mi_stat_counter_t* stat, const mi_stat_counter_t* src, int64_t unit) { if (stat==src) return; - mi_atomic_add64( &stat->total, src->total * unit); - mi_atomic_add64( &stat->count, src->count * unit); + mi_atomic_addi64( &stat->total, src->total * unit); + mi_atomic_addi64( &stat->count, src->count * unit); } // must be thread safe as it is called from stats_merge @@ -126,90 +126,96 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) { // unit > 0 : size in binary bytes // unit == 0: count as decimal // unit < 0 : count in binary -static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, const char* fmt) { +static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, void* arg, const char* fmt) { char buf[32]; int len = 32; const char* suffix = (unit <= 0 ? " " : "b"); - double base = (unit == 0 ? 1000.0 : 1024.0); + const int64_t base = (unit == 0 ? 1000 : 1024); if (unit>0) n *= unit; - double pos = (double)(n < 0 ? -n : n); - if (pos < base) - snprintf(buf,len, "%d %s ", (int)n, suffix); - else if (pos < base*base) - snprintf(buf, len, "%.1f k%s", (double)n / base, suffix); - else if (pos < base*base*base) - snprintf(buf, len, "%.1f m%s", (double)n / (base*base), suffix); - else - snprintf(buf, len, "%.1f g%s", (double)n / (base*base*base), suffix); - - _mi_fprintf(out, (fmt==NULL ? "%11s" : fmt), buf); -} - - -static void mi_print_amount(int64_t n, int64_t unit, mi_output_fun* out) { - mi_printf_amount(n,unit,out,NULL); -} - -static void mi_print_count(int64_t n, int64_t unit, mi_output_fun* out) { - if (unit==1) _mi_fprintf(out,"%11s"," "); - else mi_print_amount(n,0,out); -} - -static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out ) { - _mi_fprintf(out,"%10s:", msg); - if (unit>0) { - mi_print_amount(stat->peak, unit, out); - mi_print_amount(stat->allocated, unit, out); - mi_print_amount(stat->freed, unit, out); - mi_print_amount(unit, 1, out); - mi_print_count(stat->allocated, unit, out); - if (stat->allocated > stat->freed) - _mi_fprintf(out, " not all freed!\n"); - else - _mi_fprintf(out, " ok\n"); - } - else if (unit<0) { - mi_print_amount(stat->peak, -1, out); - mi_print_amount(stat->allocated, -1, out); - mi_print_amount(stat->freed, -1, out); - if (unit==-1) { - _mi_fprintf(out, "%22s", ""); - } - else { - mi_print_amount(-unit, 1, out); - mi_print_count((stat->allocated / -unit), 0, out); - } - if (stat->allocated > stat->freed) - _mi_fprintf(out, " not all freed!\n"); - else - _mi_fprintf(out, " ok\n"); + const int64_t pos = (n < 0 ? -n : n); + if (pos < base) { + snprintf(buf, len, "%d %s ", (int)n, suffix); } else { - mi_print_amount(stat->peak, 1, out); - mi_print_amount(stat->allocated, 1, out); - _mi_fprintf(out, "\n"); + int64_t divider = base; + const char* magnitude = "k"; + if (pos >= divider*base) { divider *= base; magnitude = "m"; } + if (pos >= divider*base) { divider *= base; magnitude = "g"; } + const int64_t tens = (n / (divider/10)); + const long whole = (long)(tens/10); + const long frac1 = (long)(tens%10); + snprintf(buf, len, "%ld.%ld %s%s", whole, frac1, magnitude, suffix); + } + _mi_fprintf(out, arg, (fmt==NULL ? "%11s" : fmt), buf); +} + + +static void mi_print_amount(int64_t n, int64_t unit, mi_output_fun* out, void* arg) { + mi_printf_amount(n,unit,out,arg,NULL); +} + +static void mi_print_count(int64_t n, int64_t unit, mi_output_fun* out, void* arg) { + if (unit==1) _mi_fprintf(out, arg, "%11s"," "); + else mi_print_amount(n,0,out,arg); +} + +static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg ) { + _mi_fprintf(out, arg,"%10s:", msg); + if (unit>0) { + mi_print_amount(stat->peak, unit, out, arg); + mi_print_amount(stat->allocated, unit, out, arg); + mi_print_amount(stat->freed, unit, out, arg); + mi_print_amount(unit, 1, out, arg); + mi_print_count(stat->allocated, unit, out, arg); + if (stat->allocated > stat->freed) + _mi_fprintf(out, arg, " not all freed!\n"); + else + _mi_fprintf(out, arg, " ok\n"); + } + else if (unit<0) { + mi_print_amount(stat->peak, -1, out, arg); + mi_print_amount(stat->allocated, -1, out, arg); + mi_print_amount(stat->freed, -1, out, arg); + if (unit==-1) { + _mi_fprintf(out, arg, "%22s", ""); + } + else { + mi_print_amount(-unit, 1, out, arg); + mi_print_count((stat->allocated / -unit), 0, out, arg); + } + if (stat->allocated > stat->freed) + _mi_fprintf(out, arg, " not all freed!\n"); + else + _mi_fprintf(out, arg, " ok\n"); + } + else { + mi_print_amount(stat->peak, 1, out, arg); + mi_print_amount(stat->allocated, 1, out, arg); + _mi_fprintf(out, arg, "\n"); } } -static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out ) { - _mi_fprintf(out, "%10s:", msg); - mi_print_amount(stat->total, -1, out); - _mi_fprintf(out, "\n"); +static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg ) { + _mi_fprintf(out, arg, "%10s:", msg); + mi_print_amount(stat->total, -1, out, arg); + _mi_fprintf(out, arg, "\n"); } -static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out) { - double avg = (stat->count == 0 ? 0.0 : (double)stat->total / (double)stat->count); - _mi_fprintf(out, "%10s: %7.1f avg\n", msg, avg); +static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg) { + const int64_t avg_tens = (stat->count == 0 ? 0 : (stat->total*10 / stat->count)); + const long avg_whole = (long)(avg_tens/10); + const long avg_frac1 = (long)(avg_tens%10); + _mi_fprintf(out, arg, "%10s: %5ld.%ld avg\n", msg, avg_whole, avg_frac1); } -static void mi_print_header(mi_output_fun* out ) { - _mi_fprintf(out,"%10s: %10s %10s %10s %10s %10s\n", "heap stats", "peak ", "total ", "freed ", "unit ", "count "); +static void mi_print_header(mi_output_fun* out, void* arg ) { + _mi_fprintf(out, arg, "%10s: %10s %10s %10s %10s %10s\n", "heap stats", "peak ", "total ", "freed ", "unit ", "count "); } #if MI_STAT>1 -static void mi_stats_print_bins(mi_stat_count_t* all, const mi_stat_count_t* bins, size_t max, const char* fmt, mi_output_fun* out) { +static void mi_stats_print_bins(mi_stat_count_t* all, const mi_stat_count_t* bins, size_t max, const char* fmt, mi_output_fun* out, void* arg) { bool found = false; char buf[64]; for (size_t i = 0; i <= max; i++) { @@ -218,75 +224,73 @@ static void mi_stats_print_bins(mi_stat_count_t* all, const mi_stat_count_t* bin int64_t unit = _mi_bin_size((uint8_t)i); snprintf(buf, 64, "%s %3zu", fmt, i); mi_stat_add(all, &bins[i], unit); - mi_stat_print(&bins[i], buf, unit, out); + mi_stat_print(&bins[i], buf, unit, out, arg); } } //snprintf(buf, 64, "%s all", fmt); //mi_stat_print(all, buf, 1); if (found) { - _mi_fprintf(out, "\n"); - mi_print_header(out); + _mi_fprintf(out, arg, "\n"); + mi_print_header(out, arg); } } #endif -static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit); +static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit); -static void _mi_stats_print(mi_stats_t* stats, double secs, mi_output_fun* out) mi_attr_noexcept { - mi_print_header(out); +static void _mi_stats_print(mi_stats_t* stats, mi_msecs_t elapsed, mi_output_fun* out, void* arg) mi_attr_noexcept { + mi_print_header(out,arg); #if MI_STAT>1 mi_stat_count_t normal = { 0,0,0,0 }; - mi_stats_print_bins(&normal, stats->normal, MI_BIN_HUGE, "normal",out); - mi_stat_print(&normal, "normal", 1, out); - mi_stat_print(&stats->huge, "huge", (stats->huge_count.count == 0 ? 1 : -(stats->huge.allocated / stats->huge_count.count)), out); - mi_stat_print(&stats->giant, "giant", (stats->giant_count.count == 0 ? 1 : -(stats->giant.allocated / stats->giant_count.count)), out); + mi_stats_print_bins(&normal, stats->normal, MI_BIN_HUGE, "normal",out,arg); + mi_stat_print(&normal, "normal", 1, out, arg); + mi_stat_print(&stats->huge, "huge", (stats->huge_count.count == 0 ? 1 : -(stats->huge.allocated / stats->huge_count.count)), out, arg); + mi_stat_print(&stats->giant, "giant", (stats->giant_count.count == 0 ? 1 : -(stats->giant.allocated / stats->giant_count.count)), out, arg); mi_stat_count_t total = { 0,0,0,0 }; mi_stat_add(&total, &normal, 1); mi_stat_add(&total, &stats->huge, 1); mi_stat_add(&total, &stats->giant, 1); - mi_stat_print(&total, "total", 1, out); - _mi_fprintf(out, "malloc requested: "); - mi_print_amount(stats->malloc.allocated, 1, out); - _mi_fprintf(out, "\n\n"); + mi_stat_print(&total, "total", 1, out, arg); + _mi_fprintf(out, arg, "malloc requested: "); + mi_print_amount(stats->malloc.allocated, 1, out, arg); + _mi_fprintf(out, arg, "\n\n"); #endif - mi_stat_print(&stats->reserved, "reserved", 1, out); - mi_stat_print(&stats->committed, "committed", 1, out); - mi_stat_print(&stats->reset, "reset", 1, out); - mi_stat_print(&stats->page_committed, "touched", 1, out); - mi_stat_print(&stats->segments, "segments", -1, out); - mi_stat_print(&stats->segments_abandoned, "-abandoned", -1, out); - mi_stat_print(&stats->segments_cache, "-cached", -1, out); - mi_stat_print(&stats->pages, "pages", -1, out); - mi_stat_print(&stats->pages_abandoned, "-abandoned", -1, out); - mi_stat_counter_print(&stats->pages_extended, "-extended", out); - mi_stat_counter_print(&stats->page_no_retire, "-noretire", out); - mi_stat_counter_print(&stats->mmap_calls, "mmaps", out); - mi_stat_counter_print(&stats->commit_calls, "commits", out); - mi_stat_print(&stats->threads, "threads", -1, out); - mi_stat_counter_print_avg(&stats->searches, "searches", out); + mi_stat_print(&stats->reserved, "reserved", 1, out, arg); + mi_stat_print(&stats->committed, "committed", 1, out, arg); + mi_stat_print(&stats->reset, "reset", 1, out, arg); + mi_stat_print(&stats->page_committed, "touched", 1, out, arg); + mi_stat_print(&stats->segments, "segments", -1, out, arg); + mi_stat_print(&stats->segments_abandoned, "-abandoned", -1, out, arg); + mi_stat_print(&stats->segments_cache, "-cached", -1, out, arg); + mi_stat_print(&stats->pages, "pages", -1, out, arg); + mi_stat_print(&stats->pages_abandoned, "-abandoned", -1, out, arg); + mi_stat_counter_print(&stats->pages_extended, "-extended", out, arg); + mi_stat_counter_print(&stats->page_no_retire, "-noretire", out, arg); + mi_stat_counter_print(&stats->mmap_calls, "mmaps", out, arg); + mi_stat_counter_print(&stats->commit_calls, "commits", out, arg); + mi_stat_print(&stats->threads, "threads", -1, out, arg); + mi_stat_counter_print_avg(&stats->searches, "searches", out, arg); + _mi_fprintf(out, arg, "%10s: %7i\n", "numa nodes", _mi_os_numa_node_count()); + if (elapsed > 0) _mi_fprintf(out, arg, "%10s: %7ld.%03ld s\n", "elapsed", elapsed/1000, elapsed%1000); - if (secs >= 0.0) _mi_fprintf(out, "%10s: %9.3f s\n", "elapsed", secs); - - double user_time; - double sys_time; + mi_msecs_t user_time; + mi_msecs_t sys_time; size_t peak_rss; size_t page_faults; size_t page_reclaim; size_t peak_commit; mi_process_info(&user_time, &sys_time, &peak_rss, &page_faults, &page_reclaim, &peak_commit); - _mi_fprintf(out,"%10s: user: %.3f s, system: %.3f s, faults: %lu, reclaims: %lu, rss: ", "process", user_time, sys_time, (unsigned long)page_faults, (unsigned long)page_reclaim ); - mi_printf_amount((int64_t)peak_rss, 1, out, "%s"); + _mi_fprintf(out, arg, "%10s: user: %ld.%03ld s, system: %ld.%03ld s, faults: %lu, reclaims: %lu, rss: ", "process", user_time/1000, user_time%1000, sys_time/1000, sys_time%1000, (unsigned long)page_faults, (unsigned long)page_reclaim ); + mi_printf_amount((int64_t)peak_rss, 1, out, arg, "%s"); if (peak_commit > 0) { - _mi_fprintf(out,", commit charge: "); - mi_printf_amount((int64_t)peak_commit, 1, out, "%s"); + _mi_fprintf(out, arg, ", commit charge: "); + mi_printf_amount((int64_t)peak_commit, 1, out, arg, "%s"); } - _mi_fprintf(out,"\n"); + _mi_fprintf(out, arg, "\n"); } -double _mi_clock_end(double start); -double _mi_clock_start(void); -static double mi_time_start = 0.0; +static mi_msecs_t mi_time_start; // = 0 static mi_stats_t* mi_stats_get_default(void) { mi_heap_t* heap = mi_heap_get_default(); @@ -315,72 +319,73 @@ void _mi_stats_done(mi_stats_t* stats) { // called from `mi_thread_done` mi_stats_merge_from(stats); } - -static void mi_stats_print_ex(mi_stats_t* stats, double secs, mi_output_fun* out) { - mi_stats_merge_from(stats); - _mi_stats_print(&_mi_stats_main, secs, out); +void mi_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept { + mi_msecs_t elapsed = _mi_clock_end(mi_time_start); + mi_stats_merge_from(mi_stats_get_default()); + _mi_stats_print(&_mi_stats_main, elapsed, out, arg); } -void mi_stats_print(mi_output_fun* out) mi_attr_noexcept { - mi_stats_print_ex(mi_stats_get_default(),_mi_clock_end(mi_time_start),out); +void mi_stats_print(void* out) mi_attr_noexcept { + // for compatibility there is an `out` parameter (which can be `stdout` or `stderr`) + mi_stats_print_out((mi_output_fun*)out, NULL); } -void mi_thread_stats_print(mi_output_fun* out) mi_attr_noexcept { - _mi_stats_print(mi_stats_get_default(), _mi_clock_end(mi_time_start), out); +void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept { + mi_msecs_t elapsed = _mi_clock_end(mi_time_start); + _mi_stats_print(mi_stats_get_default(), elapsed, out, arg); } - -// -------------------------------------------------------- -// Basic timer for convenience -// -------------------------------------------------------- - +// ---------------------------------------------------------------- +// Basic timer for convenience; use milli-seconds to avoid doubles +// ---------------------------------------------------------------- #ifdef _WIN32 #include -static double mi_to_seconds(LARGE_INTEGER t) { - static double freq = 0.0; - if (freq <= 0.0) { +static mi_msecs_t mi_to_msecs(LARGE_INTEGER t) { + static LARGE_INTEGER mfreq; // = 0 + if (mfreq.QuadPart == 0LL) { LARGE_INTEGER f; QueryPerformanceFrequency(&f); - freq = (double)(f.QuadPart); + mfreq.QuadPart = f.QuadPart/1000LL; + if (mfreq.QuadPart == 0) mfreq.QuadPart = 1; } - return ((double)(t.QuadPart) / freq); + return (mi_msecs_t)(t.QuadPart / mfreq.QuadPart); } -static double mi_clock_now(void) { +mi_msecs_t _mi_clock_now(void) { LARGE_INTEGER t; QueryPerformanceCounter(&t); - return mi_to_seconds(t); + return mi_to_msecs(t); } #else #include #ifdef CLOCK_REALTIME -static double mi_clock_now(void) { +mi_msecs_t _mi_clock_now(void) { struct timespec t; clock_gettime(CLOCK_REALTIME, &t); - return (double)t.tv_sec + (1.0e-9 * (double)t.tv_nsec); + return ((mi_msecs_t)t.tv_sec * 1000) + ((mi_msecs_t)t.tv_nsec / 1000000); } #else // low resolution timer -static double mi_clock_now(void) { - return ((double)clock() / (double)CLOCKS_PER_SEC); +mi_msecs_t _mi_clock_now(void) { + return ((mi_msecs_t)clock() / ((mi_msecs_t)CLOCKS_PER_SEC / 1000)); } #endif #endif -static double mi_clock_diff = 0.0; +static mi_msecs_t mi_clock_diff; -double _mi_clock_start(void) { +mi_msecs_t _mi_clock_start(void) { if (mi_clock_diff == 0.0) { - double t0 = mi_clock_now(); - mi_clock_diff = mi_clock_now() - t0; + mi_msecs_t t0 = _mi_clock_now(); + mi_clock_diff = _mi_clock_now() - t0; } - return mi_clock_now(); + return _mi_clock_now(); } -double _mi_clock_end(double start) { - double end = mi_clock_now(); +mi_msecs_t _mi_clock_end(mi_msecs_t start) { + mi_msecs_t end = _mi_clock_now(); return (end - start - mi_clock_diff); } @@ -394,21 +399,21 @@ double _mi_clock_end(double start) { #include #pragma comment(lib,"psapi.lib") -static double filetime_secs(const FILETIME* ftime) { +static mi_msecs_t filetime_msecs(const FILETIME* ftime) { ULARGE_INTEGER i; i.LowPart = ftime->dwLowDateTime; i.HighPart = ftime->dwHighDateTime; - double secs = (double)(i.QuadPart) * 1.0e-7; // FILETIME is in 100 nano seconds - return secs; + mi_msecs_t msecs = (i.QuadPart / 10000); // FILETIME is in 100 nano seconds + return msecs; } -static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) { +static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) { FILETIME ct; FILETIME ut; FILETIME st; FILETIME et; GetProcessTimes(GetCurrentProcess(), &ct, &et, &st, &ut); - *utime = filetime_secs(&ut); - *stime = filetime_secs(&st); + *utime = filetime_msecs(&ut); + *stime = filetime_msecs(&st); PROCESS_MEMORY_COUNTERS info; GetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info)); @@ -427,11 +432,11 @@ static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size #include #endif -static double timeval_secs(const struct timeval* tv) { - return (double)tv->tv_sec + ((double)tv->tv_usec * 1.0e-6); +static mi_msecs_t timeval_secs(const struct timeval* tv) { + return ((mi_msecs_t)tv->tv_sec * 1000L) + ((mi_msecs_t)tv->tv_usec / 1000L); } -static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) { +static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) { struct rusage rusage; getrusage(RUSAGE_SELF, &rusage); #if defined(__APPLE__) && defined(__MACH__) @@ -452,12 +457,12 @@ static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size #pragma message("define a way to get process info") #endif -static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) { +static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) { *peak_rss = 0; *page_faults = 0; *page_reclaim = 0; *peak_commit = 0; - *utime = 0.0; - *stime = 0.0; + *utime = 0; + *stime = 0; } #endif diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index a80dde58..ce077d14 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -13,7 +13,7 @@ if (NOT CMAKE_BUILD_TYPE) endif() # Import mimalloc (if installed) -find_package(mimalloc 1.2 REQUIRED NO_SYSTEM_ENVIRONMENT_PATH) +find_package(mimalloc 1.5 REQUIRED NO_SYSTEM_ENVIRONMENT_PATH) message(STATUS "Found mimalloc installed at: ${MIMALLOC_TARGET_DIR}") # overriding with a dynamic library diff --git a/test/main-override-static.c b/test/main-override-static.c index b04bfeef..54a5ea66 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -13,7 +13,7 @@ static void corrupt_free(); int main() { mi_version(); - + // detect double frees and heap corruption // double_free1(); // double_free2(); @@ -106,4 +106,4 @@ static void corrupt_free() { for (int i = 0; i < 4096; i++) { malloc(SZ); } -} \ No newline at end of file +} diff --git a/test/main-override.cpp b/test/main-override.cpp index e006ad27..fcf3970f 100644 --- a/test/main-override.cpp +++ b/test/main-override.cpp @@ -6,6 +6,7 @@ #include #include +#include static void* p = malloc(8); @@ -24,7 +25,7 @@ public: int main() { - //mi_stats_reset(); // ignore earlier allocations + mi_stats_reset(); // ignore earlier allocations atexit(free_p); void* p1 = malloc(78); void* p2 = mi_malloc_aligned(16,24); @@ -69,3 +70,18 @@ public: static Static s = Static(); +bool test_stl_allocator1() { + std::vector > vec; + vec.push_back(1); + vec.pop_back(); + return vec.size() == 0; +} + +struct some_struct { int i; int j; double z; }; + +bool test_stl_allocator2() { + std::vector > vec; + vec.push_back(some_struct()); + vec.pop_back(); + return vec.size() == 0; +} \ No newline at end of file diff --git a/test/test-api.c b/test/test-api.c index bd2291da..95891754 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -9,7 +9,7 @@ terms of the MIT license. A copy of the license can be found in the file Testing allocators is difficult as bugs may only surface after particular allocation patterns. The main approach to testing _mimalloc_ is therefore to have extensive internal invariant checking (see `page_is_valid` in `page.c` -for example), which is enabled in debug mode with `-DMI_CHECK_FULL=ON`. +for example), which is enabled in debug mode with `-DMI_DEBUG_FULL=ON`. The main testing is then to run `mimalloc-bench` [1] using full invariant checking to catch any potential problems over a wide range of intensive allocation bench marks. @@ -25,6 +25,11 @@ we therefore test the API over various inputs. Please add more tests :-) #include #include #include + +#ifdef __cplusplus +#include +#endif + #include "mimalloc.h" #include "mimalloc-internal.h" @@ -61,6 +66,8 @@ static int failed = 0; // --------------------------------------------------------------------------- bool test_heap1(); bool test_heap2(); +bool test_stl_allocator1(); +bool test_stl_allocator2(); // --------------------------------------------------------------------------- // Main testing @@ -81,6 +88,13 @@ int main() { CHECK_BODY("malloc-null",{ mi_free(NULL); }); + CHECK_BODY("calloc-overflow",{ + // use (size_t)&mi_calloc to get some number without triggering compiler warnings + result = (mi_calloc((size_t)&mi_calloc,SIZE_MAX/1000) == NULL); + }); + CHECK_BODY("calloc0",{ + result = (mi_usable_size(mi_calloc(0,1000)) <= 16); + }); // --------------------------------------------------- // Extended @@ -150,6 +164,9 @@ int main() { mi_free(s); }); + CHECK("stl_allocator1", test_stl_allocator1()); + CHECK("stl_allocator2", test_stl_allocator2()); + // --------------------------------------------------- // Done // ---------------------------------------------------[] @@ -182,3 +199,27 @@ bool test_heap2() { mi_free(p2); return true; } + +bool test_stl_allocator1() { +#ifdef __cplusplus + std::vector > vec; + vec.push_back(1); + vec.pop_back(); + return vec.size() == 0; +#else + return true; +#endif +} + +struct some_struct { int i; int j; double z; }; + +bool test_stl_allocator2() { +#ifdef __cplusplus + std::vector > vec; + vec.push_back(some_struct()); + vec.pop_back(); + return vec.size() == 0; +#else + return true; +#endif +} diff --git a/test/test-stress.c b/test/test-stress.c index 6b2fb8c4..1b559a59 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -5,9 +5,14 @@ terms of the MIT license. -----------------------------------------------------------------------------*/ /* This is a stress test for the allocator, using multiple threads and - transferring objects between threads. This is not a typical workload - but uses a random linear size distribution. Timing can also depend on - (random) thread scheduling. Do not use this test as a benchmark! + transferring objects between threads. It tries to reflect real-world workloads: + - allocation size is distributed linearly in powers of two + - with some fraction extra large (and some extra extra large) + - the allocations are initialized and read again at free + - pointers transfer between threads + - threads are terminated and recreated with some objects surviving in between + - uses deterministic "randomness", but execution can still depend on + (random) thread scheduling. Do not use this test as a benchmark! */ #include @@ -21,22 +26,24 @@ terms of the MIT license. // // argument defaults static int THREADS = 32; // more repeatable if THREADS <= #processors -static int SCALE = 50; // scaling factor -static int ITER = 10; // N full iterations re-creating all threads +static int SCALE = 10; // scaling factor +static int ITER = 50; // N full iterations destructing and re-creating all threads // static int THREADS = 8; // more repeatable if THREADS <= #processors // static int SCALE = 100; // scaling factor +#define STRESS // undefine for leak test + static bool allow_large_objects = true; // allow very large objects? -static size_t use_one_size = 0; // use single object size of N uintptr_t? +static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`? #ifdef USE_STD_MALLOC -#define custom_malloc(s) malloc(s) +#define custom_calloc(n,s) calloc(n,s) #define custom_realloc(p,s) realloc(p,s) #define custom_free(p) free(p) #else -#define custom_malloc(s) mi_malloc(s) +#define custom_calloc(n,s) mi_calloc(n,s) #define custom_realloc(p,s) mi_realloc(p,s) #define custom_free(p) mi_free(p) #endif @@ -89,9 +96,12 @@ static void* alloc_items(size_t items, random_t r) { } if (items == 40) items++; // pthreads uses that size for stack increases if (use_one_size > 0) items = (use_one_size / sizeof(uintptr_t)); - uintptr_t* p = (uintptr_t*)custom_malloc(items * sizeof(uintptr_t)); + if (items==0) items = 1; + uintptr_t* p = (uintptr_t*)custom_calloc(items,sizeof(uintptr_t)); if (p != NULL) { - for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie; + for (uintptr_t i = 0; i < items; i++) { + p[i] = (items - i) ^ cookie; + } } return p; } @@ -113,15 +123,15 @@ static void free_items(void* p) { static void stress(intptr_t tid) { //bench_start_thread(); - uintptr_t r = tid * 43; - const size_t max_item_shift = 5; // 128 + uintptr_t r = (tid * 43); // rand(); + const size_t max_item_shift = 5; // 128 const size_t max_item_retained_shift = max_item_shift + 2; size_t allocs = 100 * ((size_t)SCALE) * (tid % 8 + 1); // some threads do more size_t retain = allocs / 2; void** data = NULL; size_t data_size = 0; size_t data_top = 0; - void** retained = (void**)custom_malloc(retain * sizeof(void*)); + void** retained = (void**)custom_calloc(retain,sizeof(void*)); size_t retain_top = 0; while (allocs > 0 || retain > 0) { @@ -132,7 +142,7 @@ static void stress(intptr_t tid) { data_size += 100000; data = (void**)custom_realloc(data, data_size * sizeof(void*)); } - data[data_top++] = alloc_items( 1ULL << (pick(&r) % max_item_shift), &r); + data[data_top++] = alloc_items(1ULL << (pick(&r) % max_item_shift), &r); } else { // 25% retain @@ -166,7 +176,46 @@ static void stress(intptr_t tid) { //bench_end_thread(); } -static void run_os_threads(size_t nthreads); +static void run_os_threads(size_t nthreads, void (*entry)(intptr_t tid)); + +static void test_stress(void) { + uintptr_t r = rand(); + for (int n = 0; n < ITER; n++) { + run_os_threads(THREADS, &stress); + for (int i = 0; i < TRANSFERS; i++) { + if (chance(50, &r) || n + 1 == ITER) { // free all on last run, otherwise free half of the transfers + void* p = atomic_exchange_ptr(&transfer[i], NULL); + free_items(p); + } + } + mi_collect(false); +#ifndef NDEBUG + if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); } +#endif + } +} + +#ifndef STRESS +static void leak(intptr_t tid) { + uintptr_t r = rand(); + void* p = alloc_items(1 /*pick(&r)%128*/, &r); + if (chance(50, &r)) { + intptr_t i = (pick(&r) % TRANSFERS); + void* q = atomic_exchange_ptr(&transfer[i], p); + free_items(q); + } +} + +static void test_leak(void) { + for (int n = 0; n < ITER; n++) { + run_os_threads(THREADS, &leak); + mi_collect(false); +#ifndef NDEBUG + if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); } +#endif + } +} +#endif int main(int argc, char** argv) { // > mimalloc-test-stress [THREADS] [SCALE] [ITER] @@ -185,28 +234,20 @@ int main(int argc, char** argv) { long n = (strtol(argv[3], &end, 10)); if (n > 0) ITER = n; } - printf("start with %d threads with a %d%% load-per-thread and %d iterations\n", THREADS, SCALE, ITER); + printf("Using %d threads with a %d%% load-per-thread and %d iterations\n", THREADS, SCALE, ITER); //int res = mi_reserve_huge_os_pages(4,1); //printf("(reserve huge: %i\n)", res); //bench_start_program(); // Run ITER full iterations where half the objects in the transfer buffer survive to the next round. + srand(0x7feb352d); mi_stats_reset(); - uintptr_t r = 43 * 43; - for (int n = 0; n < ITER; n++) { - run_os_threads(THREADS); - for (int i = 0; i < TRANSFERS; i++) { - if (chance(50, &r) || n + 1 == ITER) { // free all on last run, otherwise free half of the transfers - void* p = atomic_exchange_ptr(&transfer[i], NULL); - free_items(p); - } - } - mi_collect(false); -#ifndef NDEBUG - if ((n + 1) % 10 == 0) { printf("- iterations: %3d\n", n + 1); } -#endif - } +#ifdef STRESS + test_stress(); +#else + test_leak(); +#endif mi_collect(true); mi_stats_print(NULL); @@ -215,18 +256,21 @@ int main(int argc, char** argv) { } +static void (*thread_entry_fun)(intptr_t) = &stress; + #ifdef _WIN32 #include -static DWORD WINAPI thread_entry(LPVOID param) { - stress((intptr_t)param); +static DWORD WINAPI thread_entry(LPVOID param) { + thread_entry_fun((intptr_t)param); return 0; } -static void run_os_threads(size_t nthreads) { - DWORD* tids = (DWORD*)custom_malloc(nthreads * sizeof(DWORD)); - HANDLE* thandles = (HANDLE*)custom_malloc(nthreads * sizeof(HANDLE)); +static void run_os_threads(size_t nthreads, void (*fun)(intptr_t)) { + thread_entry_fun = fun; + DWORD* tids = (DWORD*)custom_calloc(nthreads,sizeof(DWORD)); + HANDLE* thandles = (HANDLE*)custom_calloc(nthreads,sizeof(HANDLE)); for (uintptr_t i = 0; i < nthreads; i++) { thandles[i] = CreateThread(0, 4096, &thread_entry, (void*)(i), 0, &tids[i]); } @@ -241,7 +285,7 @@ static void run_os_threads(size_t nthreads) { } static void* atomic_exchange_ptr(volatile void** p, void* newval) { -#if (INTPTR_MAX == UINT32_MAX) +#if (INTPTR_MAX == INT32_MAX) return (void*)InterlockedExchange((volatile LONG*)p, (LONG)newval); #else return (void*)InterlockedExchange64((volatile LONG64*)p, (LONG64)newval); @@ -250,15 +294,15 @@ static void* atomic_exchange_ptr(volatile void** p, void* newval) { #else #include -#include static void* thread_entry(void* param) { - stress((uintptr_t)param); + thread_entry_fun((uintptr_t)param); return NULL; } -static void run_os_threads(size_t nthreads) { - pthread_t* threads = (pthread_t*)custom_malloc(nthreads * sizeof(pthread_t)); +static void run_os_threads(size_t nthreads, void (*fun)(intptr_t)) { + thread_entry_fun = fun; + pthread_t* threads = (pthread_t*)custom_calloc(nthreads,sizeof(pthread_t)); memset(threads, 0, sizeof(pthread_t) * nthreads); //pthread_setconcurrency(nthreads); for (uintptr_t i = 0; i < nthreads; i++) { @@ -270,8 +314,16 @@ static void run_os_threads(size_t nthreads) { custom_free(threads); } +#ifdef __cplusplus +#include static void* atomic_exchange_ptr(volatile void** p, void* newval) { - return atomic_exchange_explicit((volatile _Atomic(void*)*)p, newval, memory_order_acquire); + return std::atomic_exchange((volatile std::atomic*)p, newval); } +#else +#include +static void* atomic_exchange_ptr(volatile void** p, void* newval) { + return atomic_exchange((volatile _Atomic(void*)*)p, newval); +} +#endif #endif