merge from dev

2020-12-10 13:17:56 -08:00 · 2020-12-10 13:17:56 -08:00 · b803095b83
commit b803095b83
parent ad05829195 745cf1e2f5
8 changed files with 42 additions and 17 deletions
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@ -298,7 +298,7 @@ We try to circumvent this in an efficient way:
 - macOSX : we use an unused TLS slot from the OS allocated slots (MI_TLS_SLOT). On OSX, the
           loader itself calls `malloc` even before the modules are initialized.
 - OpenBSD: we use an unused slot from the pthread block (MI_TLS_PTHREAD_SLOT_OFS).
- DragonFly: not yet working.
+- DragonFly: the uniqueid use is buggy but kept for reference.
 ------------------------------------------------------------------------------------------- */

 extern const mi_heap_t _mi_heap_empty;  // read-only empty heap, initial value of the thread local default heap
@ -316,7 +316,7 @@ mi_heap_t*  _mi_heap_main_get(void);    // statically allocated main backing hea
 #define MI_TLS_PTHREAD_SLOT_OFS   (6*sizeof(int) + 4*sizeof(void*) + 24)  
 #elif defined(__DragonFly__)
 #warning "mimalloc is not working correctly on DragonFly yet."
-#define MI_TLS_PTHREAD_SLOT_OFS   (4 + 1*sizeof(void*))  // offset `uniqueid` (also used by gdb?) <https://github.com/DragonFlyBSD/DragonFlyBSD/blob/master/lib/libthread_xu/thread/thr_private.h#L458>
+//#define MI_TLS_PTHREAD_SLOT_OFS   (4 + 1*sizeof(void*))  // offset `uniqueid` (also used by gdb?) <https://github.com/DragonFlyBSD/DragonFlyBSD/blob/master/lib/libthread_xu/thread/thr_private.h#L458>
 #endif
 #endif

@ -328,7 +328,7 @@ static inline mi_heap_t** mi_tls_pthread_heap_slot(void) {
  pthread_t self = pthread_self();
  #if defined(__DragonFly__)
  if (self==NULL) {
-    static mi_heap_t* pheap_main = _mi_heap_main_get();
+    mi_heap_t* pheap_main = _mi_heap_main_get();
    return &pheap_main;
  }
  #endif
@ -822,6 +822,8 @@ static inline void* mi_tls_slot(size_t slot) mi_attr_noexcept {
  __asm__("movl %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : );  // 32-bit always uses GS
 #elif defined(__MACH__) && defined(__x86_64__)
  __asm__("movq %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : );  // x86_64 macOSX uses GS
+#elif defined(__x86_64__) && (MI_INTPTR_SIZE==4)
+  __asm__("movl %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : );  // x32 ABI
 #elif defined(__x86_64__)
  __asm__("movq %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : );  // x86_64 Linux, BSD uses FS
 #elif defined(__arm__)
@ -843,6 +845,8 @@ static inline void mi_tls_slot_set(size_t slot, void* value) mi_attr_noexcept {
  __asm__("movl %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : );  // 32-bit always uses GS
 #elif defined(__MACH__) && defined(__x86_64__)
  __asm__("movq %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : );  // x86_64 macOSX uses GS
+#elif defined(__x86_64__) && (MI_INTPTR_SIZE==4)
+  __asm__("movl %1,%%fs:%1" : "=m" (*((void**)ofs)) : "rn" (value) : );  // x32 ABI
 #elif defined(__x86_64__)
  __asm__("movq %1,%%fs:%1" : "=m" (*((void**)ofs)) : "rn" (value) : );  // x86_64 Linux, BSD uses FS
 #elif defined(__arm__)
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@ -479,6 +479,7 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount);
 #define mi_stat_counter_increase(stat,amount) (void)0
 #endif

+#define mi_heap_stat_counter_increase(heap,stat,amount)  mi_stat_counter_increase( (heap)->tld->stats.stat, amount)
 #define mi_heap_stat_increase(heap,stat,amount)  mi_stat_increase( (heap)->tld->stats.stat, amount)
 #define mi_heap_stat_decrease(heap,stat,amount)  mi_stat_decrease( (heap)->tld->stats.stat, amount)

--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@ -319,6 +319,7 @@ typedef enum mi_option_e {
  mi_option_limit_os_alloc,
  mi_option_os_tag,
  mi_option_max_errors,
+  mi_option_max_warnings,
  _mi_option_last
 } mi_option_t;

--- a/src/alloc.c
+++ b/src/alloc.c
@ -778,7 +778,12 @@ but we call `exit` instead (i.e. not returning).
 #ifdef __cplusplus
 #include <new>
 static bool mi_try_new_handler(bool nothrow) {
-  std::new_handler h = std::get_new_handler();
+  #if defined(_MSC_VER) || (__cplusplus >= 201103L)
+    std::new_handler h = std::get_new_handler();
+  #else
+    std::new_handler h = std::set_new_handler();
+    std::set_new_handler(h);
+  #endif  
  if (h==NULL) {
    if (!nothrow) throw std::bad_alloc();
    return false;
--- a/src/heap.c
+++ b/src/heap.c
@ -274,10 +274,10 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_
  const size_t bsize = mi_page_block_size(page);
  if (bsize > MI_MEDIUM_OBJ_SIZE_MAX) {
    if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
-      _mi_stat_decrease(&heap->tld->stats.large,bsize);
+      mi_heap_stat_decrease(heap, large, bsize);
    }
    else {
-      _mi_stat_decrease(&heap->tld->stats.huge, bsize);
+      mi_heap_stat_decrease(heap, huge, bsize);
    }
  }
 #if (MI_STAT>1)
--- a/src/options.c
+++ b/src/options.c
@ -19,7 +19,8 @@ terms of the MIT license. A copy of the license can be found in the file
 #endif


-static uintptr_t mi_max_error_count = 16;  // stop outputting errors after this
+static uintptr_t mi_max_error_count   = 16; // stop outputting errors after this
+static uintptr_t mi_max_warning_count = 16; // stop outputting warnings after this

 static void mi_add_stderr_output();

@ -93,7 +94,8 @@ static mi_option_desc_t options[_mi_option_last] =
  { 0,    UNINIT, MI_OPTION(use_numa_nodes) },    // 0 = use available numa nodes, otherwise use at most N nodes. 
  { 0,    UNINIT, MI_OPTION(limit_os_alloc) },    // 1 = do not use OS memory for allocation (but only reserved arenas)
  { 100,  UNINIT, MI_OPTION(os_tag) },            // only apple specific for now but might serve more or less related purpose
-  { 16,   UNINIT, MI_OPTION(max_errors) }         // maximum errors that are output
+  { 16,   UNINIT, MI_OPTION(max_errors) },        // maximum errors that are output
+  { 16,   UNINIT, MI_OPTION(max_warnings) }       // maximum warnings that are output
 };

 static void mi_option_init(mi_option_desc_t* desc);
@ -111,6 +113,7 @@ void _mi_options_init(void) {
    }
  }
  mi_max_error_count = mi_option_get(mi_option_max_errors);
+  mi_max_warning_count = mi_option_get(mi_option_max_warnings);
 }

 long mi_option_get(mi_option_t option) {
@ -251,7 +254,8 @@ static void mi_add_stderr_output() {
 // --------------------------------------------------------
 // Messages, all end up calling `_mi_fputs`.
 // --------------------------------------------------------
-static _Atomic(uintptr_t) error_count; // = 0;  // when MAX_ERROR_COUNT stop emitting errors and warnings
+static _Atomic(uintptr_t) error_count;   // = 0;  // when >= max_error_count stop emitting errors
+static _Atomic(uintptr_t) warning_count; // = 0;  // when >= max_warning_count stop emitting warnings

 // When overriding malloc, we may recurse into mi_vfprintf if an allocation
 // inside the C runtime causes another message.
@ -329,7 +333,7 @@ static void mi_show_error_message(const char* fmt, va_list args) {

 void _mi_warning_message(const char* fmt, ...) {
  if (!mi_option_is_enabled(mi_option_show_errors) && !mi_option_is_enabled(mi_option_verbose)) return;
-  if (mi_atomic_increment_acq_rel(&error_count) > mi_max_error_count) return;
+  if (mi_atomic_increment_acq_rel(&warning_count) > mi_max_warning_count) return;
  va_list args;
  va_start(args,fmt);
  mi_vfprintf(NULL, NULL, "mimalloc: warning: ", fmt, args);
--- a/src/os.c
+++ b/src/os.c
@ -425,7 +425,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
        #endif
        if (large_only) return p;
        if (p == NULL) {
-          mi_atomic_store_release(&large_page_try_ok, 10UL);  // on error, don't try again for the next N allocations
+          mi_atomic_store_release(&large_page_try_ok, (uintptr_t)10);  // on error, don't try again for the next N allocations
        }
      }
    }
@ -439,7 +439,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
    // though since properly aligned allocations will already use large pages if available
    // in that case -- in particular for our large regions (in `memory.c`).
    // However, some systems only allow THP if called with explicit `madvise`, so
-    // when large OS pages are enabled for mimalloc, we call `madvice` anyways.
+    // when large OS pages are enabled for mimalloc, we call `madvise` anyways.
    if (allow_large && use_large_os_page(size, try_alignment)) {
      if (madvise(p, size, MADV_HUGEPAGE) == 0) {
        *is_large = true; // possibly
@ -742,6 +742,9 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ
    // decommit: just disable access
    err = mprotect(start, csize, PROT_NONE);
    if (err != 0) { err = errno; }
+      #if defined(MADV_FREE_REUSE)
+      while ((err = madvise(start, csize, MADV_FREE_REUSE)) != 0 && errno == EAGAIN) { errno = 0; }
+      #endif
    #endif
  }
  #endif
@ -801,10 +804,17 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats)
  if (p != start) return false;
 #else
 #if defined(MADV_FREE)
-  static _Atomic(uintptr_t) advice = ATOMIC_VAR_INIT(MADV_FREE);
-  int err = madvise(start, csize, (int)mi_atomic_load_relaxed(&advice));
-  if (err != 0 && errno == EINVAL && advice == MADV_FREE) {
-    // if MADV_FREE is not supported, fall back to MADV_DONTNEED from now on
+  #if defined(MADV_FREE_REUSABLE)
+    #define KK_MADV_FREE_INITIAL  MADV_FREE_REUSABLE
+  #else
+    #define KK_MADV_FREE_INITIAL  MADV_FREE
+  #endif
+  static _Atomic(uintptr_t) advice = ATOMIC_VAR_INIT(KK_MADV_FREE_INITIAL);
+  int oadvice = (int)mi_atomic_load_relaxed(&advice);
+  int err;
+  while ((err = madvise(start, csize, oadvice)) != 0 && errno == EAGAIN) { errno = 0;  };
+  if (err != 0 && errno == EINVAL && oadvice == KK_MADV_FREE_INITIAL) {  
+    // if MADV_FREE/MADV_FREE_REUSABLE is not supported, fall back to MADV_DONTNEED from now on
    mi_atomic_store_release(&advice, (uintptr_t)MADV_DONTNEED);
    err = madvise(start, csize, MADV_DONTNEED);
  }
--- a/test/test-stress.c
+++ b/test/test-stress.c
@ -310,7 +310,7 @@ static void run_os_threads(size_t nthreads, void (*fun)(intptr_t)) {
  pthread_t* threads = (pthread_t*)custom_calloc(nthreads,sizeof(pthread_t));
  memset(threads, 0, sizeof(pthread_t) * nthreads);
  //pthread_setconcurrency(nthreads);
-  for (uintptr_t i = 0; i < nthreads; i++) {
+  for (size_t i = 0; i < nthreads; i++) {
    pthread_create(&threads[i], NULL, &thread_entry, (void*)i);
  }
  for (size_t i = 0; i < nthreads; i++) {