diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h index bb9430b0..b25a0ac3 100644 --- a/include/mimalloc-atomic.h +++ b/include/mimalloc-atomic.h @@ -67,8 +67,8 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_atomic_decrement_acq_rel(p) mi_atomic_sub_acq_rel(p,(uintptr_t)1) static inline void mi_atomic_yield(void); -static inline intptr_t mi_atomic_addi(_Atomic(intptr_t)* p, intptr_t add); -static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)* p, intptr_t sub); +static inline intptr_t mi_atomic_addi(_Atomic(intptr_t)*p, intptr_t add); +static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub); #if defined(__cplusplus) || !defined(_MSC_VER) @@ -137,7 +137,7 @@ typedef enum mi_memory_order_e { mi_memory_order_seq_cst } mi_memory_order; -static inline uintptr_t mi_atomic_fetch_add_explicit(_Atomic(uintptr_t)* p, uintptr_t add, mi_memory_order mo) { +static inline uintptr_t mi_atomic_fetch_add_explicit(_Atomic(uintptr_t)*p, uintptr_t add, mi_memory_order mo) { (void)(mo); return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add); } @@ -145,15 +145,15 @@ static inline uintptr_t mi_atomic_fetch_sub_explicit(_Atomic(uintptr_t)*p, uintp (void)(mo); return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, -((msc_intptr_t)sub)); } -static inline uintptr_t mi_atomic_fetch_and_explicit(_Atomic(uintptr_t)* p, uintptr_t x, mi_memory_order mo) { +static inline uintptr_t mi_atomic_fetch_and_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) { (void)(mo); return (uintptr_t)MI_64(_InterlockedAnd)((volatile msc_intptr_t*)p, (msc_intptr_t)x); } -static inline uintptr_t mi_atomic_fetch_or_explicit(_Atomic(uintptr_t)* p, uintptr_t x, mi_memory_order mo) { +static inline uintptr_t mi_atomic_fetch_or_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) { (void)(mo); return (uintptr_t)MI_64(_InterlockedOr)((volatile msc_intptr_t*)p, (msc_intptr_t)x); } -static inline bool mi_atomic_compare_exchange_strong_explicit(_Atomic(uintptr_t)* p, uintptr_t* expected, uintptr_t desired, mi_memory_order mo1, mi_memory_order mo2) { +static inline bool mi_atomic_compare_exchange_strong_explicit(_Atomic(uintptr_t)*p, uintptr_t* expected, uintptr_t desired, mi_memory_order mo1, mi_memory_order mo2) { (void)(mo1); (void)(mo2); uintptr_t read = (uintptr_t)MI_64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)(*expected)); if (read == *expected) { @@ -167,7 +167,7 @@ static inline bool mi_atomic_compare_exchange_strong_explicit(_Atomic(uintptr_t) static inline bool mi_atomic_compare_exchange_weak_explicit(_Atomic(uintptr_t)*p, uintptr_t* expected, uintptr_t desired, mi_memory_order mo1, mi_memory_order mo2) { return mi_atomic_compare_exchange_strong_explicit(p, expected, desired, mo1, mo2); } -static inline uintptr_t mi_atomic_exchange_explicit(_Atomic(uintptr_t)* p, uintptr_t exchange, mi_memory_order mo) { +static inline uintptr_t mi_atomic_exchange_explicit(_Atomic(uintptr_t)*p, uintptr_t exchange, mi_memory_order mo) { (void)(mo); return (uintptr_t)MI_64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange); } @@ -178,23 +178,44 @@ static inline void mi_atomic_thread_fence(mi_memory_order mo) { } static inline uintptr_t mi_atomic_load_explicit(_Atomic(uintptr_t) const* p, mi_memory_order mo) { (void)(mo); - #if defined(_M_IX86) || defined(_M_X64) +#if defined(_M_IX86) || defined(_M_X64) return *p; - #else +#else uintptr_t x = *p; if (mo > mi_memory_order_relaxed) { while (!mi_atomic_compare_exchange_weak_explicit(p, &x, x, mo, mi_memory_order_relaxed)) { /* nothing */ }; } return x; - #endif +#endif } -static inline void mi_atomic_store_explicit(_Atomic(uintptr_t)* p, uintptr_t x, mi_memory_order mo) { +static inline void mi_atomic_store_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) { (void)(mo); - #if defined(_M_IX86) || defined(_M_X64) +#if defined(_M_IX86) || defined(_M_X64) *p = x; - #else - mi_atomic_exchange_explicit(p,x,mo); - #endif +#else + mi_atomic_exchange_explicit(p, x, mo); +#endif +} +static inline int64_t mi_atomic_loadi64_explicit(_Atomic(int64_t)*p, mi_memory_order mo) { + (void)(mo); +#if defined(_M_X64) + return *p; +#else + int64_t old = *p; + int64_t x = old; + while ((old = InterlockedCompareExchange64(p, x, old)) != x) { + x = old; + } + return x; +#endif +} +static inline void mi_atomic_storei64_explicit(_Atomic(int64_t)*p, int64_t x, mi_memory_order mo) { + (void)(mo); +#if defined(x_M_IX86) || defined(_M_X64) + *p = x; +#else + InterlockedExchange64(p, x); +#endif } static inline int64_t mi_atomic_loadi64_explicit(_Atomic(int64_t)* p, mi_memory_order mo) { (void)(mo); @@ -219,10 +240,10 @@ static inline void mi_atomic_storei64_explicit(_Atomic(int64_t)* p, int64_t x, m } // These are used by the statistics -static inline int64_t mi_atomic_addi64_relaxed(volatile _Atomic(int64_t)* p, int64_t add) { - #ifdef _WIN64 - return (int64_t)mi_atomic_addi((int64_t*)p,add); - #else +static inline int64_t mi_atomic_addi64_relaxed(volatile _Atomic(int64_t)*p, int64_t add) { +#ifdef _WIN64 + return (int64_t)mi_atomic_addi((int64_t*)p, add); +#else int64_t current; int64_t sum; do { @@ -230,7 +251,7 @@ static inline int64_t mi_atomic_addi64_relaxed(volatile _Atomic(int64_t)* p, int sum = current + add; } while (_InterlockedCompareExchange64(p, sum, current) != current); return current; - #endif +#endif } static inline void mi_atomic_maxi64_relaxed(volatile _Atomic(int64_t)*p, int64_t x) { int64_t current; @@ -284,13 +305,13 @@ static inline void mi_atomic_yield(void) { #elif (defined(__GNUC__) || defined(__clang__)) && \ (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__)) #if defined(__x86_64__) || defined(__i386__) - static inline void mi_atomic_yield(void) { - __asm__ volatile ("pause" ::: "memory"); - } +static inline void mi_atomic_yield(void) { + __asm__ volatile ("pause" ::: "memory"); +} #elif defined(__arm__) || defined(__aarch64__) - static inline void mi_atomic_yield(void) { - __asm__ volatile("yield"); - } +static inline void mi_atomic_yield(void) { + __asm__ volatile("yield"); +} #endif #elif defined(__wasi__) #include @@ -305,4 +326,4 @@ static inline void mi_atomic_yield(void) { #endif -#endif // __MIMALLOC_ATOMIC_H +#endif // __MIMALLOC_ATOMIC_H \ No newline at end of file diff --git a/include/mimalloc.h b/include/mimalloc.h index ec579304..908e8f20 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -153,6 +153,7 @@ mi_decl_export void mi_thread_init(void) mi_attr_noexcept; mi_decl_export void mi_thread_done(void) mi_attr_noexcept; mi_decl_export void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept; +mi_decl_export void mi_process_info(double* user_time, double* system_time, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept; // ------------------------------------------------------------------------------------- // Aligned allocation diff --git a/src/stats.c b/src/stats.c index d1580597..16c2c06d 100644 --- a/src/stats.c +++ b/src/stats.c @@ -276,7 +276,7 @@ static void mi_buffered_out(const char* msg, void* arg) { // Print statistics //------------------------------------------------------------ -static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit); +static void mi_stat_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults); static void _mi_stats_print(mi_stats_t* stats, mi_msecs_t elapsed, mi_output_fun* out0, void* arg0) mi_attr_noexcept { // wrap the output function to be line buffered @@ -323,15 +323,17 @@ static void _mi_stats_print(mi_stats_t* stats, mi_msecs_t elapsed, mi_output_fun mi_msecs_t user_time; mi_msecs_t sys_time; + size_t current_rss; size_t peak_rss; - size_t page_faults; - size_t page_reclaim; + size_t current_commit; size_t peak_commit; - mi_process_info(&user_time, &sys_time, &peak_rss, &page_faults, &page_reclaim, &peak_commit); - _mi_fprintf(out, arg, "%10s: user: %ld.%03ld s, system: %ld.%03ld s, faults: %lu, reclaims: %lu, rss: ", "process", user_time/1000, user_time%1000, sys_time/1000, sys_time%1000, (unsigned long)page_faults, (unsigned long)page_reclaim ); + size_t page_faults; + mi_stat_process_info(&user_time, &sys_time, ¤t_rss, &peak_rss, ¤t_commit, &peak_commit, &page_faults); + _mi_fprintf(out, arg, "%10s: user: %ld.%03ld s, system: %ld.%03ld s, faults: %lu, rss: ", "process", + user_time/1000, user_time%1000, sys_time/1000, sys_time%1000, (unsigned long)page_faults ); mi_printf_amount((int64_t)peak_rss, 1, out, arg, "%s"); if (peak_commit > 0) { - _mi_fprintf(out, arg, ", commit charge: "); + _mi_fprintf(out, arg, ", commit: "); mi_printf_amount((int64_t)peak_commit, 1, out, arg, "%s"); } _mi_fprintf(out, arg, "\n"); @@ -453,7 +455,9 @@ static mi_msecs_t filetime_msecs(const FILETIME* ftime) { mi_msecs_t msecs = (i.QuadPart / 10000); // FILETIME is in 100 nano seconds return msecs; } -static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) { + +static void mi_stat_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) +{ FILETIME ct; FILETIME ut; FILETIME st; @@ -461,13 +465,13 @@ static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_r GetProcessTimes(GetCurrentProcess(), &ct, &et, &st, &ut); *utime = filetime_msecs(&ut); *stime = filetime_msecs(&st); - PROCESS_MEMORY_COUNTERS info; GetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info)); - *peak_rss = (size_t)info.PeakWorkingSetSize; - *page_faults = (size_t)info.PageFaultCount; - *peak_commit = (size_t)info.PeakPagefileUsage; - *page_reclaim = 0; + *current_rss = (size_t)info.WorkingSetSize; + *peak_rss = (size_t)info.PeakWorkingSetSize; + *current_commit = (size_t)info.PagefileUsage; + *peak_commit = (size_t)info.PeakPagefileUsage; + *page_faults = (size_t)info.PageFaultCount; } #elif defined(__unix__) || defined(__unix) || defined(unix) || (defined(__APPLE__) && defined(__MACH__)) || defined(__HAIKU__) @@ -487,36 +491,37 @@ static mi_msecs_t timeval_secs(const struct timeval* tv) { return ((mi_msecs_t)tv->tv_sec * 1000L) + ((mi_msecs_t)tv->tv_usec / 1000L); } -static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) { +static void mi_stat_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) +{ struct rusage rusage; getrusage(RUSAGE_SELF, &rusage); -#if !defined(__HAIKU__) -#if defined(__APPLE__) && defined(__MACH__) - *peak_rss = rusage.ru_maxrss; -#else - *peak_rss = rusage.ru_maxrss * 1024; -#endif + *utime = timeval_secs(&rusage.ru_utime); + *stime = timeval_secs(&rusage.ru_stime); *page_faults = rusage.ru_majflt; - *page_reclaim = rusage.ru_minflt; - *peak_commit = 0; -#else -// Haiku does not have (yet?) a way to -// get these stats per process + // estimate commit using our stats + *peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak)); + *current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current)); + *current_rss = *current_commit; // estimate +#if defined(__HAIKU__) + // Haiku does not have (yet?) a way to + // get these stats per process thread_info tid; area_info mem; ssize_t c; - *peak_rss = 0; - *page_faults = 0; - *page_reclaim = 0; - *peak_commit = 0; get_thread_info(find_thread(0), &tid); - while (get_next_area_info(tid.team, &c, &mem) == B_OK) { - *peak_rss += mem.ram_size; + *peak_rss += mem.ram_size; } -#endif - *utime = timeval_secs(&rusage.ru_utime); - *stime = timeval_secs(&rusage.ru_stime); +#elif defined(__APPLE__) && defined(__MACH__) + *peak_rss = rusage.ru_maxrss; // BSD reports in bytes + struct mach_task_basic_info info; + mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT; + if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info, &infoCount) == KERN_SUCCESS) { + *current_rss = (size_t)info.resident_size; + } +#else + *peak_rss = rusage.ru_maxrss * 1024; // Linux reports in KiB +#endif } #else @@ -525,12 +530,35 @@ static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_r #pragma message("define a way to get process info") #endif -static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) { - *peak_rss = 0; +static void mi_stat_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) +{ + *peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak)); + *current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current)); + *peak_rss = *peak_commit; + *current_rss = *current_commit; *page_faults = 0; - *page_reclaim = 0; - *peak_commit = 0; *utime = 0; *stime = 0; } #endif + + +mi_decl_export void mi_process_info(double* user_time, double* system_time, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept +{ + mi_msecs_t utime = 0; + mi_msecs_t stime = 0; + size_t current_rss0 = 0; + size_t peak_rss0 = 0; + size_t current_commit0 = 0; + size_t peak_commit0 = 0; + size_t page_faults0 = 0; + mi_stat_process_info(&utime, &stime, ¤t_rss0, &peak_rss0, ¤t_commit0, &peak_commit0, &page_faults0); + if (user_time!=NULL) *user_time = ((double)(utime/1000)) + (((double)(utime%1000))*1e-3); + if (system_time!=NULL) *system_time = ((double)(stime/1000)) + (((double)(stime%1000))*1e-3); + if (current_rss!=NULL) *current_rss = current_rss0; + if (peak_rss!=NULL) *peak_rss = peak_rss0; + if (current_commit!=NULL) *current_commit = current_commit0; + if (peak_commit!=NULL) *peak_commit = peak_commit0; + if (page_faults!=NULL) *page_faults = page_faults0; +} + diff --git a/test/main-override-static.c b/test/main-override-static.c index 0067be04..4bc1300b 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -180,7 +180,7 @@ static void corrupt_free(); static void block_overflow1(); static void invalid_free(); static void test_aslr(void); - +static void test_process_info(void); int main() { @@ -214,6 +214,7 @@ int main() { //mi_free(p2); mi_collect(true); mi_stats_print(NULL); + test_process_info(); return 0; } @@ -300,3 +301,15 @@ static void test_aslr(void) { p[1] = malloc(1134626); printf("p1: %p, p2: %p\n", p[0], p[1]); } + +static void test_process_info(void) { + double utime = 0; + double stime = 0; + size_t current_rss = 0; + size_t peak_rss = 0; + size_t current_commit = 0; + size_t peak_commit = 0; + size_t page_faults = 0; + mi_process_info(&utime, &stime, ¤t_rss, &peak_rss, ¤t_commit, &peak_commit, &page_faults); + printf("process info: user: %.3f s, rss: %zd b, commit: %zd b\n\n", utime, peak_rss, peak_commit); +} \ No newline at end of file