change arena allocator to atomic bitmap as well

2019-11-06 22:49:01 -08:00 · 2019-11-06 22:49:01 -08:00 · b09282bc0d
parent 00e19cad9a
commit b09282bc0d
5 changed files with 94 additions and 208 deletions
--- a/include/mimalloc.h
+++ b/include/mimalloc.h
@ -230,8 +230,8 @@ mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_b
 mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept;
 mi_decl_export bool mi_is_redirected() mi_attr_noexcept;

-mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept;
-mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept;
+mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages, size_t timeout_msecs) mi_attr_noexcept;
+mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept;

 // deprecated
 mi_decl_export int  mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept;
--- a/src/arena.c
+++ b/src/arena.c
@ -7,15 +7,19 @@ terms of the MIT license. A copy of the license can be found in the file

 /* ----------------------------------------------------------------------------
 "Arenas" are fixed area's of OS memory from which we can allocate
-large blocks (>= MI_ARENA_BLOCK_SIZE, 16MiB). Currently only used to
+large blocks (>= MI_ARENA_BLOCK_SIZE, 32MiB). Currently only used to
 allocate in one arena consisting of huge OS pages -- otherwise it 
 delegates to direct allocation from the OS.

 In the future, we can expose an API to manually add more arenas which
 is sometimes needed for embedded devices or shared memory for example.

-The arena allocation needs to be thread safe and we use a lock-free scan
-with on-demand coalescing.
+The arena allocation needs to be thread safe and we use an atomic
+bitmap to allocate. The current implementation of the bitmap can
+only do this within a field (`uintptr_t`) so we can allocate at most
+blocks of 2GiB (64*32MiB) and no object can cross the boundary. This
+can lead to fragmentation but fortunately most objects will be regions
+of 256MiB in practice.
 -----------------------------------------------------------------------------*/
 #include "mimalloc.h"
 #include "mimalloc-internal.h"
@ -23,6 +27,8 @@ with on-demand coalescing.

 #include <string.h>  // memset

+#include "bitmap.inc.c"  // atomic bitmap
+
 // os.c
 void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld);
 void  _mi_os_free(void* p, size_t size, mi_stats_t* stats);
@ -37,8 +43,10 @@ int   _mi_os_numa_node_count(void);
 ----------------------------------------------------------- */

 #define MI_SEGMENT_ALIGN      MI_SEGMENT_SIZE
-#define MI_ARENA_BLOCK_SIZE  (4*MI_SEGMENT_ALIGN)  // 16MiB
-#define MI_MAX_ARENAS        (64)
+#define MI_ARENA_BLOCK_SIZE   (8*MI_SEGMENT_ALIGN)     // 32MiB
+#define MI_ARENA_MAX_OBJ_SIZE (MI_BITMAP_FIELD_BITS * MI_ARENA_BLOCK_SIZE)  // 2GiB
+#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2)  // 16MiB
+#define MI_MAX_ARENAS         (64)                     // not more than 256 (since we use 8 bits in the memid)

 // Block info: bit 0 contains the `in_use` bit, the upper bits the
 // size in count of arena blocks.
@ -48,11 +56,13 @@ typedef uintptr_t mi_block_info_t;
 typedef struct mi_arena_s {
  uint8_t* start;                         // the start of the memory area
  size_t   block_count;                   // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`)
+  size_t   field_count;                   // number of bitmap fields
  int      numa_node;                     // associated NUMA node
  bool     is_zero_init;                  // is the arena zero initialized?
  bool     is_large;                      // large OS page allocated
-  _Atomic(uintptr_t)       block_bottom;  // optimization to start the search for free blocks
-  _Atomic(mi_block_info_t) blocks[1];     // `block_count` block info's
+  volatile _Atomic(uintptr_t) search_idx; // optimization to start the search for free blocks
+  mi_bitmap_field_t* blocks_dirty;         // are the blocks potentially non-zero?
+  mi_bitmap_field_t  blocks_map[1];        // bitmap of in-use blocks 
 } mi_arena_t;


@ -69,180 +79,55 @@ static _Atomic(uintptr_t)   mi_arena_count; // = 0
 // Use `0` as a special id for direct OS allocated memory.
 #define MI_MEMID_OS   0

-static size_t mi_memid_create(size_t arena_index, size_t block_index) {
+static size_t mi_memid_create(size_t arena_index, mi_bitmap_index_t bitmap_index) {
  mi_assert_internal(arena_index < 0xFE);
-  return ((block_index << 8) | ((arena_index+1) & 0xFF));
+  return ((bitmap_index << 8) | ((arena_index+1) & 0xFF));
 }

-static void mi_memid_indices(size_t memid, size_t* arena_index, size_t* block_index) {
+static void mi_memid_indices(size_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) {
  mi_assert_internal(memid != MI_MEMID_OS);
  *arena_index = (memid & 0xFF) - 1;
-  *block_index = (memid >> 8);
+  *bitmap_index = (memid >> 8);
 }

-/* -----------------------------------------------------------
-  Block info
----------------------------------------------------------- */

-static bool mi_block_is_in_use(mi_block_info_t info) {
-  return ((info&1) != 0);
+static size_t mi_arena_block_count_of_size(size_t size) {
+  const size_t asize = _mi_align_up(size, MI_ARENA_BLOCK_SIZE);
+  const size_t bcount = asize / MI_ARENA_BLOCK_SIZE;
+  return bcount;
 }

-static size_t mi_block_count(mi_block_info_t info) {
-  return (info>>1);
-}
-
-static mi_block_info_t mi_block_info_create(size_t bcount, bool in_use) {
-  return (((mi_block_info_t)bcount << 1) | (in_use ? 1 : 0));
-}
-
-
 /* -----------------------------------------------------------
  Thread safe allocation in an arena
 ----------------------------------------------------------- */
-
-static void* mi_arena_allocx(mi_arena_t* arena, size_t start_idx, size_t end_idx, size_t needed_bcount, bool* is_zero, size_t* block_index)
+static void* mi_arena_alloc(mi_arena_t* arena, size_t blocks, bool* is_zero, mi_bitmap_index_t* bitmap_idx) 
 {
-  // Scan linearly through all block info's
-  // Skipping used ranges, coalescing free ranges on demand.
-  mi_assert_internal(needed_bcount > 0);
-  mi_assert_internal(start_idx <= arena->block_count);
-  mi_assert_internal(end_idx <= arena->block_count);
-  _Atomic(mi_block_info_t)* block = &arena->blocks[start_idx];
-  _Atomic(mi_block_info_t)* end = &arena->blocks[end_idx];
-  while (block < end) {
-    mi_block_info_t binfo = mi_atomic_read_relaxed(block);
-    size_t bcount = mi_block_count(binfo);
-    if (mi_block_is_in_use(binfo)) {
-      // in-use, skip ahead
-      mi_assert_internal(bcount > 0);
-      block += bcount;
-    }
-    else {
-      // free blocks
-      if (bcount==0) {
-        // optimization:
-        // use 0 initialized blocks at the end, to use single atomic operation
-        // initially to reduce contention (as we don't need to split)
-        if (block + needed_bcount > end) {
-          return NULL; // does not fit
-        }
-        else if (!mi_atomic_cas_weak(block, mi_block_info_create(needed_bcount, true), binfo)) {
-          // ouch, someone else was quicker. Try again..
-          continue;
-        }
-        else {
-          // we got it: return a pointer to the claimed memory
-          ptrdiff_t idx = (block - arena->blocks);
-          *is_zero = arena->is_zero_init;
-          *block_index = idx;
-          return (arena->start + (idx*MI_ARENA_BLOCK_SIZE));
+  const size_t fcount = arena->field_count;
+  size_t idx = mi_atomic_read(&arena->search_idx);  // start from last search
+  for (size_t visited = 0; visited < fcount; visited++, idx++) {
+    if (idx >= fcount) idx = 0;  // wrap around
+    if (mi_bitmap_try_claim_field(arena->blocks_map, idx, blocks, bitmap_idx)) {
+      // claimed it! set the dirty bits
+      *is_zero = mi_bitmap_claim(arena->blocks_dirty, fcount, blocks, *bitmap_idx);
+      mi_atomic_write(&arena->search_idx, idx);  // start search from here next time
+      return (arena->start + (*bitmap_idx)*MI_ARENA_BLOCK_SIZE);
    }
  }
-
-      mi_assert_internal(bcount>0);
-      if (needed_bcount > bcount) {
-#if 0 // MI_NO_ARENA_COALESCE
-        block += bcount; // too small, skip to the next range
-        continue;
-#else
-        // too small, try to coalesce
-        _Atomic(mi_block_info_t)* block_next = block + bcount;
-        if (block_next >= end) {
-          return NULL; // does not fit
-        }
-        mi_block_info_t binfo_next = mi_atomic_read(block_next);
-        size_t bcount_next = mi_block_count(binfo_next);
-        if (mi_block_is_in_use(binfo_next)) {
-          // next block is in use, cannot coalesce
-          block += (bcount + bcount_next); // skip ahea over both blocks
-        }
-        else {
-          // next block is free, try to coalesce
-          // first set the next one to being used to prevent dangling ranges
-          if (!mi_atomic_cas_strong(block_next, mi_block_info_create(bcount_next, true), binfo_next)) {
-            // someone else got in before us.. try again
-            continue;
-          }
-          else {
-            if (!mi_atomic_cas_strong(block, mi_block_info_create(bcount + bcount_next, true), binfo)) {  // use strong to increase success chance
-              // someone claimed/coalesced the block in the meantime
-              // first free the next block again..
-              bool ok = mi_atomic_cas_strong(block_next, mi_block_info_create(bcount_next, false), binfo_next); // must be strong
-              mi_assert(ok); UNUSED(ok);
-              // and try again
-              continue;
-            }
-            else {
-              // coalesced! try again
-              // todo: we could optimize here to immediately claim the block if the
-              // coalesced size is a fit instead of retrying. Keep it simple for now.
-              continue;
-            }
-          }
-        }
-#endif
-      }
-      else {  // needed_bcount <= bcount
-        mi_assert_internal(needed_bcount <= bcount);
-        // it fits, claim the whole block
-        if (!mi_atomic_cas_weak(block, mi_block_info_create(bcount, true), binfo)) {
-          // ouch, someone else was quicker. Try again..
-          continue;
-        }
-        else {
-          // got it, now split off the needed part
-          if (needed_bcount < bcount) {
-            mi_atomic_write(block + needed_bcount, mi_block_info_create(bcount - needed_bcount, false));
-            mi_atomic_write(block, mi_block_info_create(needed_bcount, true));
-          }
-          // return a pointer to the claimed memory
-          ptrdiff_t idx = (block - arena->blocks);
-          *is_zero = false;
-          *block_index = idx;
-          return (arena->start + (idx*MI_ARENA_BLOCK_SIZE));
-        }
-      }
-    }
-  }
-  // no success
  return NULL;
 }

-// Try to reduce search time by starting from bottom and wrap around.
-static void* mi_arena_alloc(mi_arena_t* arena, size_t needed_bcount, bool* is_zero, size_t* block_index)
-{
-  uintptr_t bottom = mi_atomic_read_relaxed(&arena->block_bottom);
-  void* p = mi_arena_allocx(arena, bottom, arena->block_count, needed_bcount, is_zero, block_index);
-  if (p == NULL && bottom > 0) {
-    // try again from the start
-    p = mi_arena_allocx(arena, 0, bottom, needed_bcount, is_zero, block_index);
-  }
-  if (p != NULL) {
-    mi_atomic_write(&arena->block_bottom, *block_index);
-  }
-  return p;
-}

 /* -----------------------------------------------------------
  Arena Allocation
 ----------------------------------------------------------- */

 static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, 
-                                    bool* commit, bool* large, bool* is_zero,
-                                    size_t* memid) 
+                                 bool* commit, bool* large, bool* is_zero, size_t* memid) 
 {
-  size_t block_index = SIZE_MAX;
-  void* p = mi_arena_alloc(arena, needed_bcount, is_zero, &block_index);
+  mi_bitmap_index_t bitmap_index;
+  void* p = mi_arena_alloc(arena, needed_bcount, is_zero, &bitmap_index);
  if (p != NULL) {
-    mi_assert_internal(block_index != SIZE_MAX);
-    #if MI_DEBUG>=1
-    _Atomic(mi_block_info_t)* block = &arena->blocks[block_index];
-    mi_block_info_t binfo = mi_atomic_read(block);
-    mi_assert_internal(mi_block_is_in_use(binfo));
-    mi_assert_internal(mi_block_count(binfo) >= needed_bcount);
-    #endif
-    *memid = mi_memid_create(arena_index, block_index);
+    *memid = mi_memid_create(arena_index, bitmap_index);
    *commit = true;           // TODO: support commit on demand?
    *large = arena->is_large;
  }
@ -261,15 +146,13 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
  if (large==NULL) large = &default_large;  // ensure `large != NULL`

  // try to allocate in an arena if the alignment is small enough
-  // and if there is not too much waste around the `MI_ARENA_BLOCK_SIZE`.
+  // and the object is not too large or too small.
  if (alignment <= MI_SEGMENT_ALIGN && 
-      size >= 3*(MI_ARENA_BLOCK_SIZE/4) &&  // > 12MiB (not more than 25% waste)
-      !(size > MI_ARENA_BLOCK_SIZE && size < 3*(MI_ARENA_BLOCK_SIZE/2)) // ! <16MiB - 24MiB>
-     )
+      size <= MI_ARENA_MAX_OBJ_SIZE && 
+      size >= MI_ARENA_MIN_OBJ_SIZE)
  {
-    size_t asize = _mi_align_up(size, MI_ARENA_BLOCK_SIZE);
-    size_t bcount = asize / MI_ARENA_BLOCK_SIZE;
-    int numa_node = _mi_os_numa_node(tld); // current numa node
+    const size_t bcount = mi_arena_block_count_of_size(size);
+    const int numa_node = _mi_os_numa_node(tld); // current numa node

    mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE);
    // try numa affine allocation
@ -324,8 +207,8 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) {
  else {
    // allocated in an arena
    size_t arena_idx;
-    size_t block_idx;
-    mi_memid_indices(memid, &arena_idx, &block_idx);
+    size_t bitmap_idx;
+    mi_memid_indices(memid, &arena_idx, &bitmap_idx);
    mi_assert_internal(arena_idx < MI_MAX_ARENAS);
    mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[arena_idx]));
    mi_assert_internal(arena != NULL);
@ -333,27 +216,17 @@ void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) {
      _mi_fatal_error("trying to free from non-existent arena: %p, size %zu, memid: 0x%zx\n", p, size, memid);
      return;
    }
-    mi_assert_internal(arena->block_count > block_idx);
-    if (arena->block_count <= block_idx) {
-      _mi_fatal_error("trying to free from non-existent block: %p, size %zu, memid: 0x%zx\n", p, size, memid);
+    mi_assert_internal(arena->field_count > mi_bitmap_index_field(bitmap_idx));
+    if (arena->field_count <= mi_bitmap_index_field(bitmap_idx)) {
+      _mi_fatal_error("trying to free from non-existent arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid);
      return;
    }
-    _Atomic(mi_block_info_t)* block = &arena->blocks[block_idx];
-    mi_block_info_t binfo = mi_atomic_read_relaxed(block);
-    mi_assert_internal(mi_block_is_in_use(binfo));
-    mi_assert_internal(mi_block_count(binfo)*MI_ARENA_BLOCK_SIZE >= size);
-    if (!mi_block_is_in_use(binfo)) {
+    const size_t blocks = mi_arena_block_count_of_size(size);
+    bool ones = mi_bitmap_unclaim(arena->blocks_map, arena->field_count, blocks, bitmap_idx);
+    if (!ones) {
      _mi_fatal_error("trying to free an already freed block: %p, size %zu\n", p, size);
      return;
    };
-    bool ok = mi_atomic_cas_strong(block, mi_block_info_create(mi_block_count(binfo), false), binfo);
-    mi_assert_internal(ok);
-    if (!ok) {
-      _mi_warning_message("unable to free arena block: %p, info 0x%zx", p, binfo);
-    }
-    if (block_idx < mi_atomic_read_relaxed(&arena->block_bottom)) {
-      mi_atomic_write(&arena->block_bottom, block_idx);
-    }
  }
 }

@ -365,7 +238,6 @@ static bool mi_arena_add(mi_arena_t* arena) {
  mi_assert_internal(arena != NULL);
  mi_assert_internal((uintptr_t)arena->start % MI_SEGMENT_ALIGN == 0);
  mi_assert_internal(arena->block_count > 0);
-  mi_assert_internal(mi_mem_is_zero(arena->blocks,arena->block_count*sizeof(mi_block_info_t)));
  
  uintptr_t i = mi_atomic_addu(&mi_arena_count,1);
  if (i >= MI_MAX_ARENAS) {
@ -383,40 +255,49 @@ static bool mi_arena_add(mi_arena_t* arena) {
 #include <errno.h> // ENOMEM

 // reserve at a specific numa node
-int mi_reserve_huge_os_pages_at(size_t pages, int numa_node) mi_attr_noexcept {
+int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept {
  if (pages==0) return 0;
  if (numa_node < -1) numa_node = -1;
  if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count();
  size_t hsize = 0;
  size_t pages_reserved = 0;
-  void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, pages*500, &pages_reserved, &hsize);
+  void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize);
  if (p==NULL || pages_reserved==0) {
    _mi_warning_message("failed to reserve %zu gb huge pages\n", pages);
    return ENOMEM;
  }
  _mi_verbose_message("reserved %zu gb huge pages\n", pages_reserved);
  
-  size_t bcount = hsize / MI_ARENA_BLOCK_SIZE;
-  size_t asize = sizeof(mi_arena_t) + (bcount*sizeof(mi_block_info_t));  // one too much
+  size_t bcount = mi_arena_block_count_of_size(hsize);
+  size_t fields = (bcount + MI_BITMAP_FIELD_BITS - 1) / MI_BITMAP_FIELD_BITS;
+  size_t asize = sizeof(mi_arena_t) + (2*fields*sizeof(mi_bitmap_field_t));  
  mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS?
  if (arena == NULL) {
    _mi_os_free_huge_pages(p, hsize, &_mi_stats_main);
    return ENOMEM;
  }
  arena->block_count = bcount;
+  arena->field_count = fields;
  arena->start = (uint8_t*)p;  
-  arena->block_bottom = 0;
  arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1)
  arena->is_large = true;
  arena->is_zero_init = true;
-  memset(arena->blocks, 0, bcount * sizeof(mi_block_info_t));
+  arena->search_idx = 0;
+  arena->blocks_dirty = &arena->blocks_map[bcount];
+  size_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount;
+  if (post > 0) {
+    // don't use leftover bits at the end
+    mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post);
+    mi_bitmap_claim(arena->blocks_map, fields, post, postidx); 
+  }
+  
  mi_arena_add(arena);
  return 0;
 }


 // reserve huge pages evenly among all numa nodes. 
-int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept {
+int mi_reserve_huge_os_pages_interleave(size_t pages, size_t timeout_msecs) mi_attr_noexcept {
  if (pages == 0) return 0;

  // pages per numa node
@ -424,12 +305,13 @@ int mi_reserve_huge_os_pages_interleave(size_t pages) mi_attr_noexcept {
  if (numa_count <= 0) numa_count = 1;
  const size_t pages_per = pages / numa_count;
  const size_t pages_mod = pages % numa_count;
+  const size_t timeout_per = (timeout_msecs / numa_count) + 50;
  
  // reserve evenly among numa nodes
  for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
    size_t node_pages = pages_per;  // can be 0
    if ((size_t)numa_node < pages_mod) node_pages++;
-    int err = mi_reserve_huge_os_pages_at(node_pages, numa_node);
+    int err = mi_reserve_huge_os_pages_at(node_pages, numa_node, timeout_per);
    if (err) return err;
    if (pages < node_pages) {
      pages = 0;
@ -446,7 +328,7 @@ int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserv
  UNUSED(max_secs);
  _mi_warning_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n");
  if (pages_reserved != NULL) *pages_reserved = 0;
-  int err = mi_reserve_huge_os_pages_interleave(pages);  
+  int err = mi_reserve_huge_os_pages_interleave(pages, (size_t)(max_secs * 1000.0));  
  if (err==0 && pages_reserved!=NULL) *pages_reserved = pages;
  return err;
 }
--- a/src/bitmap.inc.c
+++ b/src/bitmap.inc.c
@ -135,13 +135,15 @@ static inline bool mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields,
 }

 // Set `count` bits at `bitmap_idx` to 0 atomically
-static inline void mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
+// Returns `true` if all `count` bits were 1 previously
+static inline bool mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
  const size_t idx = mi_bitmap_index_field(bitmap_idx);
  const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
  const uintptr_t mask = mi_bitmap_mask_(count, bitidx);
  mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields);
  mi_assert_internal((bitmap[idx] & mask) == mask);
-  mi_atomic_and(&bitmap[idx], ~mask);
+  uintptr_t prev = mi_atomic_and(&bitmap[idx], ~mask);
+  return ((prev & mask) == mask);
 }


--- a/src/init.c
+++ b/src/init.c
@ -434,7 +434,7 @@ static void mi_process_load(void) {

  if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
    size_t pages = mi_option_get(mi_option_reserve_huge_os_pages);    
-    mi_reserve_huge_os_pages_interleave(pages);
+    mi_reserve_huge_os_pages_interleave(pages, pages*500);
  }
 }

--- a/src/os.c
+++ b/src/os.c
@ -940,6 +940,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse
    _mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE);
    
    // check for timeout
+    if (max_msecs > 0) {
      mi_msecs_t elapsed = _mi_clock_end(start_t);
      if (page >= 1) {
        mi_msecs_t estimate = ((elapsed / (page+1)) * pages);
@ -952,6 +953,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse
        break;
      }
    }
+  }
  mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size);
  if (pages_reserved != NULL) *pages_reserved = page;
  if (psize != NULL) *psize = page * MI_HUGE_OS_PAGE_SIZE;