Merge branch 'master' into dev

2020-09-24 16:32:56 -07:00 · 2020-09-24 16:32:56 -07:00 · ed8cc1fc19
commit ed8cc1fc19
parent 6f181194f6 13a4030619
3 changed files with 5 additions and 6 deletions
--- a/readme.md
+++ b/readme.md
@ -11,7 +11,7 @@ mimalloc (pronounced "me-malloc")
 is a general purpose allocator with excellent [performance](#performance) characteristics.
 Initially developed by Daan Leijen for the run-time systems of the
 [Koka](https://github.com/koka-lang/koka) and [Lean](https://github.com/leanprover/lean) languages.
-Latest release:`v1.6.6` (2020-09-24).
+Latest release:`v1.6.7` (2020-09-24).

 It is a drop-in replacement for `malloc` and can be used in other programs
 without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as:
@ -73,8 +73,8 @@ Enjoy!

 ### Releases

-* 2020-09-24, `v1.6.6`: stable release 1.6: using standard C atomics, passing tsan testing, improved
-  handling of failing to commit on Windows, add `mi_process_info` api call.
+* 2020-09-24, `v1.6.7`: stable release 1.6: using standard C atomics, passing tsan testing, improved
+  handling of failing to commit on Windows, add [`mi_process_info`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc.h#L156) api call.
 * 2020-08-06, `v1.6.4`: stable release 1.6: improved error recovery in low-memory situations,
  support for IllumOS and Haiku, NUMA support for Vista/XP, improved NUMA detection for AMD Ryzen, ubsan support.
 * 2020-05-05, `v1.6.3`: stable release 1.6: improved behavior in out-of-memory situations, improved malloc zones on macOS,
--- a/src/alloc.c
+++ b/src/alloc.c
@ -448,8 +448,7 @@ void mi_free(void* p) mi_attr_noexcept
    #endif
    mi_block_set_next(page, block, page->local_free);
    page->local_free = block;
-    page->used--;
-    if (mi_unlikely(mi_page_all_free(page))) {
+    if (mi_unlikely(--page->used == 0)) {   // using this expression generates better code than: page->used--; if (mi_page_all_free(page))    
      _mi_page_retire(page);
    }
  }
--- a/src/region.c
+++ b/src/region.c
@ -243,7 +243,7 @@ static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, boo
 static bool mi_region_try_claim(int numa_node, size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld)
 {
  // try all regions for a free slot  
-  const size_t count = mi_atomic_load_acquire(&regions_count);
+  const size_t count = mi_atomic_load_relaxed(&regions_count); // monotonic, so ok to be relaxed
  size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? Starting at 0 seems to increase latency though
  for (size_t visited = 0; visited < count; visited++, idx++) {
    if (idx >= count) idx = 0;  // wrap around