add initial primitive api for locks

This commit is contained in:
Daan Leijen 2024-06-01 16:45:20 -07:00
parent d9aa19a763
commit 0b3cd51249
11 changed files with 208 additions and 48 deletions

View File

@ -8,6 +8,17 @@ terms of the MIT license. A copy of the license can be found in the file
#ifndef MIMALLOC_ATOMIC_H
#define MIMALLOC_ATOMIC_H
// include windows.h or pthreads.h
#if defined(_WIN32)
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
#elif !defined(_WIN32) && (defined(__EMSCRIPTEN_SHARED_MEMORY__) || !defined(__wasi__))
#define MI_USE_PTHREADS
#include <pthread.h>
#endif
// --------------------------------------------------------------------------------------------
// Atomics
// We need to be portable between C, C++, and MSVC.
@ -133,10 +144,6 @@ static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x) {
#elif defined(_MSC_VER)
// Legacy MSVC plain C compilation wrapper that uses Interlocked operations to model C11 atomics.
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
#include <intrin.h>
#ifdef _WIN64
typedef LONG64 msc_intptr_t;
@ -306,7 +313,7 @@ typedef _Atomic(uintptr_t) mi_atomic_once_t;
// Returns true only on the first invocation
static inline bool mi_atomic_once( mi_atomic_once_t* once ) {
if (mi_atomic_load_relaxed(once) != 0) return false; // quick test
if (mi_atomic_load_relaxed(once) != 0) return false; // quick test
uintptr_t expected = 0;
return mi_atomic_cas_strong_acq_rel(once, &expected, (uintptr_t)1); // try to set to 1
}
@ -329,10 +336,6 @@ static inline void mi_atomic_yield(void) {
std::this_thread::yield();
}
#elif defined(_WIN32)
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
static inline void mi_atomic_yield(void) {
YieldProcessor();
}

View File

@ -53,11 +53,6 @@ terms of the MIT license. A copy of the license can be found in the file
#define mi_decl_externc
#endif
// pthreads
#if !defined(_WIN32) && !defined(__wasi__)
#define MI_USE_PTHREADS
#include <pthread.h>
#endif
// "options.c"
void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message);

View File

@ -114,6 +114,24 @@ void _mi_prim_thread_done_auto_done(void);
// Called when the default heap for a thread changes
void _mi_prim_thread_associate_default_heap(mi_heap_t* heap);
// Locks are only used if abandoned segment visiting is permitted
#if defined(_WIN32)
#define mi_lock_t CRITICAL_SECTION
#elif defined(MI_USE_PTHREADS)
#define mi_lock_t pthread_mutex_t
#else
#define mi_lock_t _Atomic(uintptr_t)
#endif
// Take a lock (blocking). Return `true` on success.
bool _mi_prim_lock(mi_lock_t* lock);
// Try to take lock and return `true` if successful.
bool _mi_prim_try_lock(mi_lock_t* lock);
// Release a lock.
void _mi_prim_unlock(mi_lock_t* lock);
//-------------------------------------------------------------------
// Thread id: `_mi_prim_thread_id()`
@ -235,10 +253,6 @@ static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
#elif defined(_WIN32)
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
// Windows: works on Intel and ARM in both 32- and 64-bit
return (uintptr_t)NtCurrentTeb();
@ -370,4 +384,6 @@ static inline mi_heap_t* mi_prim_get_default_heap(void) {
#endif // MIMALLOC_PRIM_H

View File

@ -34,7 +34,7 @@ The corresponding `mi_track_free` still uses the block start pointer and origina
The `mi_track_resize` is currently unused but could be called on reallocations within a block.
`mi_track_init` is called at program start.
The following macros are for tools like asan and valgrind to track whether memory is
The following macros are for tools like asan and valgrind to track whether memory is
defined, undefined, or not accessible at all:
#define mi_track_mem_defined(p,size)
@ -82,10 +82,6 @@ defined, undefined, or not accessible at all:
#define MI_TRACK_HEAP_DESTROY 1
#define MI_TRACK_TOOL "ETW"
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
#include "../src/prim/windows/etw.h"
#define mi_track_init() EventRegistermicrosoft_windows_mimalloc();
@ -96,7 +92,7 @@ defined, undefined, or not accessible at all:
// no tracking
#define MI_TRACK_ENABLED 0
#define MI_TRACK_HEAP_DESTROY 0
#define MI_TRACK_HEAP_DESTROY 0
#define MI_TRACK_TOOL "none"
#define mi_track_malloc_size(p,reqsize,size,zero)

View File

@ -28,7 +28,7 @@ terms of the MIT license. A copy of the license can be found in the file
// Fast allocation in a page: just pop from the free list.
// Fall back to generic allocation only if the list is empty.
// Note: in release mode the (inlined) routine is about 7 instructions with a single test.
extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept
extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept
{
mi_assert_internal(page->block_size == 0 /* empty heap */ || mi_page_block_size(page) >= size);
mi_block_t* const block = page->free;
@ -61,7 +61,7 @@ extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_
}
else {
_mi_memzero_aligned(block, page->block_size - MI_PADDING_SIZE);
}
}
}
#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN
@ -123,9 +123,9 @@ static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap,
#if (MI_PADDING)
if (size == 0) { size = sizeof(void*); }
#endif
mi_page_t* page = _mi_heap_get_free_small_page(heap, size + MI_PADDING_SIZE);
void* const p = _mi_page_malloc_zero(heap, page, size + MI_PADDING_SIZE, zero);
void* const p = _mi_page_malloc_zero(heap, page, size + MI_PADDING_SIZE, zero);
mi_track_malloc(p,size,zero);
#if MI_STAT>1
@ -362,7 +362,7 @@ mi_decl_nodiscard mi_decl_restrict char* mi_strndup(const char* s, size_t n) mi_
#ifndef PATH_MAX
#define PATH_MAX MAX_PATH
#endif
#include <windows.h>
mi_decl_nodiscard mi_decl_restrict char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept {
// todo: use GetFullPathNameW to allow longer file names
char buf[PATH_MAX];

View File

@ -200,7 +200,7 @@ bool _mi_prim_random_buf(void* buf, size_t buf_len) {
// Thread init/done
//----------------------------------------------------------------
#ifdef __EMSCRIPTEN_SHARED_MEMORY__
#if defined(MI_USE_PTHREADS)
// use pthread local storage keys to detect thread ending
// (and used with MI_TLS_PTHREADS for the default heap)
@ -242,3 +242,50 @@ void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {
}
#endif
//----------------------------------------------------------------
// Locks
//----------------------------------------------------------------
#if defined(MI_USE_PTHREADS)
bool _mi_prim_lock(mi_lock_t* lock) {
return (pthread_mutex_lock(lock) == 0);
}
bool _mi_prim_try_lock(mi_lock_t* lock) {
return (pthread_mutex_trylock(lock) == 0);
}
void _mi_prim_unlock(mi_lock_t* lock) {
pthread_mutex_unlock(lock);
}
#else
#include <emscripten.h>
// fall back to poor man's locks.
bool _mi_prim_lock(mi_lock_t* lock) {
for(int i = 0; i < 1000; i++) { // for at most 1 second?
if (_mi_prim_try_lock(lock)) return true;
if (i < 25) {
mi_atomic_yield(); // first yield a bit
}
else {
emscripten_sleep(1); // then sleep for 1ms intervals
}
}
return true;
}
bool _mi_prim_try_lock(mi_lock_t* lock) {
uintptr_t expected = 0;
return mi_atomic_cas_strong_acq_rel(lock,&expected,(uintptr_t)1);
}
void _mi_prim_unlock(mi_lock_t* lock) {
mi_atomic_store_release(lock,(uintptr_t)0);
}
#endif

View File

@ -880,3 +880,49 @@ void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {
}
#endif
//----------------------------------------------------------------
// Locks
//----------------------------------------------------------------
#if defined(MI_USE_PTHREADS)
bool _mi_prim_lock(mi_lock_t* lock) {
return (pthread_mutex_lock(lock) == 0);
}
bool _mi_prim_try_lock(mi_lock_t* lock) {
return (pthread_mutex_trylock(lock) == 0);
}
void _mi_prim_unlock(mi_lock_t* lock) {
pthread_mutex_unlock(lock);
}
#else
// fall back to poor man's locks.
bool _mi_prim_lock(mi_lock_t* lock) {
for(int i = 0; i < 1000; i++) { // for at most 1 second?
if (_mi_prim_try_lock(lock)) return true;
if (i < 25) {
mi_atomic_yield(); // first yield a bit
}
else {
usleep(1000); // then sleep for 1ms intervals
}
}
return true;
}
bool _mi_prim_try_lock(mi_lock_t* lock) {
uintptr_t expected = 0;
return mi_atomic_cas_strong_acq_rel(lock,&expected,(uintptr_t)1);
}
void _mi_prim_unlock(mi_lock_t* lock) {
mi_atomic_store_release(lock,(uintptr_t)0);
}
#endif

View File

@ -22,7 +22,7 @@ terms of the MIT license. A copy of the license can be found in the file
void _mi_prim_mem_init( mi_os_mem_config_t* config ) {
config->page_size = 64*MI_KiB; // WebAssembly has a fixed page size: 64KiB
config->alloc_granularity = 16;
config->has_overcommit = false;
config->has_overcommit = false;
config->has_partial_free = false;
config->has_virtual_reserve = false;
}
@ -134,7 +134,7 @@ int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_la
//---------------------------------------------
int _mi_prim_commit(void* addr, size_t size, bool* is_zero) {
MI_UNUSED(addr); MI_UNUSED(size);
MI_UNUSED(addr); MI_UNUSED(size);
*is_zero = false;
return 0;
}
@ -199,9 +199,9 @@ mi_msecs_t _mi_prim_clock_now(void) {
// low resolution timer
mi_msecs_t _mi_prim_clock_now(void) {
#if !defined(CLOCKS_PER_SEC) || (CLOCKS_PER_SEC == 1000) || (CLOCKS_PER_SEC == 0)
return (mi_msecs_t)clock();
return (mi_msecs_t)clock();
#elif (CLOCKS_PER_SEC < 1000)
return (mi_msecs_t)clock() * (1000 / (mi_msecs_t)CLOCKS_PER_SEC);
return (mi_msecs_t)clock() * (1000 / (mi_msecs_t)CLOCKS_PER_SEC);
#else
return (mi_msecs_t)clock() / ((mi_msecs_t)CLOCKS_PER_SEC / 1000);
#endif
@ -278,3 +278,43 @@ void _mi_prim_thread_done_auto_done(void) {
void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {
MI_UNUSED(heap);
}
//----------------------------------------------------------------
// Locks
//----------------------------------------------------------------
#if defined(MI_USE_PTHREADS)
bool _mi_prim_lock(mi_lock_t* lock) {
return (pthread_mutex_lock(lock) == 0);
}
bool _mi_prim_try_lock(mi_lock_t* lock) {
return (pthread_mutex_trylock(lock) == 0);
}
void _mi_prim_unlock(mi_lock_t* lock) {
pthread_mutex_unlock(lock);
}
#else
// fall back to poor man's locks.
bool _mi_prim_lock(mi_lock_t* lock) {
for(int i = 0; i < 1000; i++) { // for at most 1 second?
if (_mi_prim_try_lock(lock)) return true;
mi_atomic_yield(); // this should never happen as wasi is single threaded?
}
return true;
}
bool _mi_prim_try_lock(mi_lock_t* lock) {
uintptr_t expected = 0;
return mi_atomic_cas_strong_acq_rel(lock,&expected,(uintptr_t)1);
}
void _mi_prim_unlock(mi_lock_t* lock) {
mi_atomic_store_release(lock,(uintptr_t)0);
}
#endif

View File

@ -231,7 +231,7 @@ static void* win_virtual_alloc_prim(void* addr, size_t size, size_t try_alignmen
else if (max_retry_msecs > 0 && (try_alignment <= 2*MI_SEGMENT_ALIGN) &&
(flags&MEM_COMMIT) != 0 && (flags&MEM_LARGE_PAGES) == 0 &&
win_is_out_of_memory_error(GetLastError())) {
// if committing regular memory and being out-of-memory,
// if committing regular memory and being out-of-memory,
// keep trying for a bit in case memory frees up after all. See issue #894
_mi_warning_message("out-of-memory on OS allocation, try again... (attempt %lu, 0x%zx bytes, error code: 0x%x, address: %p, alignment: 0x%zx, flags: 0x%x)\n", tries, size, GetLastError(), addr, try_alignment, flags);
long sleep_msecs = tries*40; // increasing waits
@ -316,7 +316,7 @@ int _mi_prim_commit(void* addr, size_t size, bool* is_zero) {
return 0;
}
int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit) {
int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit) {
BOOL ok = VirtualFree(addr, size, MEM_DECOMMIT);
*needs_recommit = true; // for safety, assume always decommitted even in the case of an error.
return (ok ? 0 : (int)GetLastError());
@ -468,7 +468,6 @@ mi_msecs_t _mi_prim_clock_now(void) {
// Process Info
//----------------------------------------------------------------
#include <windows.h>
#include <psapi.h>
static mi_msecs_t filetime_msecs(const FILETIME* ftime) {
@ -491,7 +490,7 @@ void _mi_prim_process_info(mi_process_info_t* pinfo)
GetProcessTimes(GetCurrentProcess(), &ct, &et, &st, &ut);
pinfo->utime = filetime_msecs(&ut);
pinfo->stime = filetime_msecs(&st);
// load psapi on demand
if (pGetProcessMemoryInfo == NULL) {
HINSTANCE hDll = LoadLibrary(TEXT("psapi.dll"));
@ -505,7 +504,7 @@ void _mi_prim_process_info(mi_process_info_t* pinfo)
memset(&info, 0, sizeof(info));
if (pGetProcessMemoryInfo != NULL) {
pGetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info));
}
}
pinfo->current_rss = (size_t)info.WorkingSetSize;
pinfo->peak_rss = (size_t)info.PeakWorkingSetSize;
pinfo->current_commit = (size_t)info.PagefileUsage;
@ -517,7 +516,7 @@ void _mi_prim_process_info(mi_process_info_t* pinfo)
// Output
//----------------------------------------------------------------
void _mi_prim_out_stderr( const char* msg )
void _mi_prim_out_stderr( const char* msg )
{
// on windows with redirection, the C runtime cannot handle locale dependent output
// after the main thread closes so we use direct console output.
@ -564,6 +563,23 @@ bool _mi_prim_getenv(const char* name, char* result, size_t result_size) {
}
//----------------------------------------------------------------
// Locks
//----------------------------------------------------------------
bool _mi_prim_lock(mi_lock_t* lock) {
EnterCriticalSection(lock);
return true;
}
bool _mi_prim_try_lock(mi_lock_t* lock) {
return TryEnterCriticalSection(lock);
}
void _mi_prim_unlock(mi_lock_t* lock) {
LeaveCriticalSection(lock);
}
//----------------------------------------------------------------
// Random
@ -600,7 +616,7 @@ bool _mi_prim_random_buf(void* buf, size_t buf_len) {
}
if (pBCryptGenRandom == NULL) return false;
}
return (pBCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0);
return (pBCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0);
}
#endif // MI_USE_RTLGENRANDOM
@ -636,9 +652,9 @@ void _mi_prim_thread_init_auto_done(void) {
}
void _mi_prim_thread_done_auto_done(void) {
// call thread-done on all threads (except the main thread) to prevent
// call thread-done on all threads (except the main thread) to prevent
// dangling callback pointer if statically linked with a DLL; Issue #208
FlsFree(mi_fls_key);
FlsFree(mi_fls_key);
}
void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {
@ -661,3 +677,4 @@ void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {
}
#endif

View File

@ -19,7 +19,7 @@
#endif
#ifdef _WIN32
#include <Windows.h>
#include <windows.h>
static void msleep(unsigned long msecs) { Sleep(msecs); }
#else
#include <unistd.h>
@ -43,7 +43,7 @@ static void test_stl_allocators();
int main() {
// mi_stats_reset(); // ignore earlier allocations
test_std_string();
// heap_thread_free_huge();
/*

View File

@ -200,7 +200,7 @@ static void test_stress(void) {
#ifndef NDEBUG
//mi_collect(false);
//mi_debug_show_arenas();
#endif
#endif
#if !defined(NDEBUG) || defined(MI_TSAN)
if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
#endif
@ -232,7 +232,7 @@ static void test_leak(void) {
int main(int argc, char** argv) {
#ifndef USE_STD_MALLOC
mi_stats_reset();
#endif
#endif
// > mimalloc-test-stress [THREADS] [SCALE] [ITER]
if (argc >= 2) {
@ -285,7 +285,7 @@ static void (*thread_entry_fun)(intptr_t) = &stress;
#ifdef _WIN32
#include <Windows.h>
#include <windows.h>
static DWORD WINAPI thread_entry(LPVOID param) {
thread_entry_fun((intptr_t)param);