Merge branch 'dev' of https://github.com/microsoft/mimalloc into dev

2021-12-15 18:52:57 -08:00 · 2021-12-15 18:52:57 -08:00 · b8d69eb2a9
commit b8d69eb2a9
parent ee3ae2a425 8612d1228a
9 changed files with 107 additions and 45 deletions
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@ -297,7 +297,7 @@ We try to circumvent this in an efficient way:
 - macOSX : we use an unused TLS slot from the OS allocated slots (MI_TLS_SLOT). On OSX, the
           loader itself calls `malloc` even before the modules are initialized.
 - OpenBSD: we use an unused slot from the pthread block (MI_TLS_PTHREAD_SLOT_OFS).
- DragonFly: the uniqueid use is buggy but kept for reference.
+- DragonFly: defaults are working but seem slow compared to freeBSD (see PR #323)
 ------------------------------------------------------------------------------------------- */

 extern const mi_heap_t _mi_heap_empty;  // read-only empty heap, initial value of the thread local default heap
@ -314,9 +314,12 @@ mi_heap_t*  _mi_heap_main_get(void);    // statically allocated main backing hea
 // use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16) 
 // see <https://github.com/openbsd/src/blob/master/lib/libc/include/thread_private.h#L371>
 #define MI_TLS_PTHREAD_SLOT_OFS   (6*sizeof(int) + 4*sizeof(void*) + 24)  
-#elif defined(__DragonFly__)
-#warning "mimalloc is not working correctly on DragonFly yet."
+// #elif defined(__DragonFly__)
+// #warning "mimalloc is not working correctly on DragonFly yet."
 // #define MI_TLS_PTHREAD_SLOT_OFS   (4 + 1*sizeof(void*))  // offset `uniqueid` (also used by gdb?) <https://github.com/DragonFlyBSD/DragonFlyBSD/blob/master/lib/libthread_xu/thread/thr_private.h#L458>
+#elif defined(__ANDROID__)
+// See issue #381
+#define MI_TLS_PTHREAD
 #endif
 #endif

@ -766,8 +769,8 @@ static inline void mi_tls_slot_set(size_t slot, void* value) mi_attr_noexcept {
 }

 static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
-#if defined(__BIONIC__) && (defined(__arm__) || defined(__aarch64__))
-  // on Android, slot 1 is the thread ID (pointer to pthread internal struct)
+#if defined(__arm__) || (defined(__ANDROID__) && defined(__aarch64__))
+  // issue #384, #495: on arm32 and arm32/arm64 Android, slot 1 is the thread ID (pointer to pthread internal struct) 
  return (uintptr_t)mi_tls_slot(1);
 #else
  // in all our other targets, slot 0 is the pointer to the thread control block
--- a/include/mimalloc-override.h
+++ b/include/mimalloc-override.h
@ -48,6 +48,7 @@ not accidentally mix pointers from different allocators).
 #define valloc(n)               mi_valloc(n)
 #define pvalloc(n)              mi_pvalloc(n)
 #define reallocarray(p,s,n)     mi_reallocarray(p,s,n)
+#define reallocarr(p,s,n)       mi_reallocarr(p,s,n)
 #define memalign(a,n)           mi_memalign(a,n)
 #define aligned_alloc(a,n)      mi_aligned_alloc(a,n)
 #define posix_memalign(p,a,n)   mi_posix_memalign(p,a,n)
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@ -356,6 +356,7 @@ mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_pvalloc(size_t size)
 mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(1);

 mi_decl_nodiscard mi_decl_export void* mi_reallocarray(void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_alloc_size2(2,3);
+mi_decl_nodiscard mi_decl_export int   mi_reallocarr(void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_alloc_size2(2,3);
 mi_decl_nodiscard mi_decl_export void* mi_aligned_recalloc(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept;
 mi_decl_nodiscard mi_decl_export void* mi_aligned_offset_recalloc(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept;

--- a/src/alloc-override.c
+++ b/src/alloc-override.c
@ -231,7 +231,6 @@ extern "C" {
  size_t malloc_good_size(size_t size)     { return mi_malloc_good_size(size); }
  int    posix_memalign(void** p, size_t alignment, size_t size) { return mi_posix_memalign(p, alignment, size); }
  
-
  // `aligned_alloc` is only available when __USE_ISOC11 is defined.
  // Note: Conda has a custom glibc where `aligned_alloc` is declared `static inline` and we cannot
  // override it, but both _ISOC11_SOURCE and __USE_ISOC11 are undefined in Conda GCC7 or GCC9.
@ -246,6 +245,7 @@ extern "C" {
 void  cfree(void* p)                                    { mi_free(p); } 
 void* pvalloc(size_t size)                              { return mi_pvalloc(size); }
 void* reallocarray(void* p, size_t count, size_t size)  { return mi_reallocarray(p, count, size); }
+int   reallocarr(void* p, size_t count, size_t size)    { return mi_reallocarr(p, count, size); }
 void* memalign(size_t alignment, size_t size)           { return mi_memalign(alignment, size); }
 void* _aligned_malloc(size_t alignment, size_t size)    { return mi_aligned_alloc(alignment, size); }

--- a/src/alloc-posix.c
+++ b/src/alloc-posix.c
@ -92,13 +92,23 @@ mi_decl_restrict void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_n

 void* mi_reallocarray( void* p, size_t count, size_t size ) mi_attr_noexcept {  // BSD
  void* newp = mi_reallocn(p,count,size);
-  if (newp==NULL) errno = ENOMEM;
+  if (newp==NULL) { errno = ENOMEM; }
  return newp;
 }

+int mi_reallocarr( void* p, size_t count, size_t size ) mi_attr_noexcept { // NetBSD
+  mi_assert(p != NULL);
+  if (p == NULL) return EINVAL;  // should we set errno as well?
+  void** op = (void**)p;  
+  void* newp = mi_reallocarray(*op, count, size);
+  if (mi_unlikely(newp == NULL)) return errno;
+  *op = newp;
+  return 0;
+}
+
 void* mi__expand(void* p, size_t newsize) mi_attr_noexcept {  // Microsoft
  void* res = mi_expand(p, newsize);
-  if (res == NULL) errno = ENOMEM;
+  if (res == NULL) { errno = ENOMEM; }
  return res;
 }

--- a/src/os.c
+++ b/src/os.c
@ -375,42 +375,69 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment,
  return p;
 }

-#elif defined(MI_USE_SBRK)
+#elif defined(MI_USE_SBRK) || defined(__wasi__)
+
+#if defined(MI_USE_SBRK) 
 #define MI_SBRK_FAIL ((void*)(-1)) 
-static void* mi_sbrk_heap_grow(size_t size, size_t try_alignment) {
-  void* pbase0 = sbrk(0);
-  if (pbase0 == MI_SBRK_FAIL) {
+static void* mi_memory_grow( size_t size ) {
+  void* p = sbrk(size);
+  if (p == MI_SBRK_FAIL) {
    _mi_warning_message("unable to allocate sbrk() OS memory (%zu bytes)\n", size);
    errno = ENOMEM;
    return NULL;
  }
-  uintptr_t base = (uintptr_t)pbase0;
-  uintptr_t aligned_base = _mi_align_up(base, (uintptr_t) try_alignment);
-  size_t alloc_size = _mi_align_up( aligned_base - base + size, _mi_os_page_size());
-  mi_assert(alloc_size >= size && (alloc_size % _mi_os_page_size()) == 0);
-  if (alloc_size < size) return NULL;
-  void* pbase1 = sbrk(alloc_size);
-  if (pbase1 == MI_SBRK_FAIL) {
-    _mi_warning_message("unable to allocate sbrk() OS memory (%zu bytes, %zu requested)\n", size, alloc_size);
-    errno = ENOMEM;
-    return NULL;
+  return p;
 }
-  mi_assert(pbase0 == pbase1);
-  return (void*)aligned_base;
-}
-
 #elif defined(__wasi__)
- // currently unused as we use sbrk() on wasm
-static void* mi_wasm_heap_grow(size_t size, size_t try_alignment) {
-  uintptr_t base = __builtin_wasm_memory_size(0) * _mi_os_page_size();
-  uintptr_t aligned_base = _mi_align_up(base, (uintptr_t) try_alignment);
+static void* mi_memory_grow( size_t size ) {
+  if (size > 0) {
+    size_t base = __builtin_wasm_memory_grow( 0, _mi_divide_up(size, _mi_os_page_size()) );
+    if (base == SIZE_MAX) {
+      _mi_warning_message("unable to allocate wasm_memory_grow OS memory (%zu bytes)\n", size);    
+      errno = ENOMEM;
+      return NULL;
+    }
+    return (void*)(base * _mi_os_page_size());
+  }
+  else {
+    size_t base = __builtin_wasm_memory_size(0);
+    if (base == SIZE_MAX) {
+      errno = ENOMEM;      
+      return NULL; 
+    }
+    return (void*)(base * _mi_os_page_size());
+  }
+}
+#endif
+
+static void* mi_heap_grow(size_t size, size_t try_alignment) {
+  if (try_alignment == 0) { try_alignment = _mi_os_page_size(); };
+  void* pbase0 = mi_memory_grow(0);
+  if (pbase0 == NULL) { return NULL; }
+  uintptr_t base = (uintptr_t)pbase0;
+  uintptr_t aligned_base = _mi_align_up(base, try_alignment);
  size_t alloc_size = _mi_align_up( aligned_base - base + size, _mi_os_page_size());
  mi_assert(alloc_size >= size && (alloc_size % _mi_os_page_size()) == 0);
  if (alloc_size < size) return NULL;
-  if (__builtin_wasm_memory_grow(0, alloc_size / _mi_os_page_size()) == SIZE_MAX) {
-    _mi_warning_message("unable to allocate wasm_memory_grow() OS memory (%zu bytes, %zu requested)\n", size, alloc_size);
-    errno = ENOMEM;
-    return NULL;
+  void* pbase1 = mi_memory_grow(alloc_size);
+  if (pbase1 == NULL) { return NULL; }
+  if (pbase0 != pbase1) {
+    // another thread allocated in-between; now we may not be able to align correctly
+    base = (uintptr_t)pbase1;
+    aligned_base = _mi_align_up(base, try_alignment);
+    if (aligned_base + size > base + alloc_size) {
+      // we do not have enough space after alignment; since we cannot shrink safely,
+      // we waste the space :-( and allocate fresh with guaranteed enough overallocation
+      alloc_size = _mi_align_up( size + try_alignment, _mi_os_page_size() );
+      errno = 0;
+      void* pbase2 = mi_memory_grow( alloc_size );
+      if (pbase2 == NULL) { return NULL; }
+      aligned_base = _mi_align_up(base, try_alignment);
+      mi_assert_internal(aligned_base + size <= (uintptr_t)pbase2 + alloc_size);
+    }    
+  }
+  else {
+    mi_assert_internal(aligned_base + size <= (uintptr_t)pbase1 + alloc_size);
  }
  return (void*)aligned_base;
 }
@ -637,14 +664,10 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo
    int flags = MEM_RESERVE;
    if (commit) flags |= MEM_COMMIT;
    p = mi_win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large);
-  #elif defined(MI_USE_SBRK)
+  #elif defined(MI_USE_SBRK) || defined(__wasi__)
    MI_UNUSED(allow_large);
    *is_large = false;
-    p = mi_sbrk_heap_grow(size, try_alignment);
-  #elif defined(__wasi__)
-    MI_UNUSED(allow_large);
-    *is_large = false;
-    p = mi_wasm_heap_grow(size, try_alignment);
+    p = mi_heap_grow(size, try_alignment);
  #else
    int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE);
    p = mi_unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large);
--- a/src/random.c
+++ b/src/random.c
@ -160,7 +160,8 @@ uintptr_t _mi_random_next(mi_random_ctx_t* ctx) {
 /* ----------------------------------------------------------------------------
 To initialize a fresh random context we rely on the OS:
 - Windows     : BCryptGenRandom (or RtlGenRandom)
- osX,bsd,wasi: arc4random_buf
+- macOS       : CCRandomGenerateBytes, arc4random_buf
+- bsd,wasi    : arc4random_buf
 - Linux       : getrandom,/dev/urandom
 If we cannot get good randomness, we fall back to weak randomness based on a timer and ASLR.
 -----------------------------------------------------------------------------*/
@ -191,7 +192,24 @@ static bool os_random_buf(void* buf, size_t buf_len) {
 }
 #endif

-#elif defined(ANDROID) || defined(XP_DARWIN) || defined(__APPLE__) || defined(__DragonFly__) || \
+#elif defined(__APPLE__)
+#include <AvailabilityMacros.h>
+#if defined(MAC_OS_X_VERSION_10_10) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_10
+#include <CommonCrypto/CommonRandom.h>
+#endif
+static bool os_random_buf(void* buf, size_t buf_len) {
+  #if defined(MAC_OS_X_VERSION_10_15) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_15
+    // We prefere CCRandomGenerateBytes as it returns an error code while arc4random_buf
+    // may fail silently on macOS. See PR #390, and <https://opensource.apple.com/source/Libc/Libc-1439.40.11/gen/FreeBSD/arc4random.c.auto.html>      
+    return (CCRandomGenerateBytes(buf, buf_len) == kCCSuccess);
+  #else
+    // fall back on older macOS
+    arc4random_buf(buf, buf_len);
+    return true;
+  #endif
+}
+
+#elif defined(__ANDROID__) || defined(__DragonFly__) || \
      defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
      defined(__sun) // todo: what to use with __wasi__?
 #include <stdlib.h>
--- a/test/main-override.c
+++ b/test/main-override.c
@ -3,7 +3,7 @@
 #include <assert.h>
 #include <string.h>

-#include <mimalloc.h>
+#include <mimalloc-override.h>

 int main() {
  mi_version();       // ensure mimalloc library is linked
@ -25,6 +25,12 @@ int main() {
  //free(p1);
  //p2 = malloc(32);
  //mi_free(p2);
+  p1 = malloc(24);
+  p2 = reallocarray(p1, 16, 16);
+  free(p2);
+  p1 = malloc(24);
+  assert(reallocarr(&p1, 16, 16) == 0);
+  free(p1);
  mi_stats_print(NULL);
  return 0;
 }