add zero parameter to primitive allocation to improve codegen for calloc etc

This commit is contained in:
Daan Leijen 2022-04-19 18:32:35 -07:00
parent 9d69e3ed06
commit d69d4c861f
5 changed files with 77 additions and 63 deletions

View File

@ -106,7 +106,7 @@ void _mi_abandoned_await_readers(void);
// "page.c"
void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc;
void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept mi_attr_malloc;
void _mi_page_retire(mi_page_t* page) mi_attr_noexcept; // free the page if there are no other pages with many free blocks
void _mi_page_unfull(mi_page_t* page);
@ -138,12 +138,11 @@ mi_msecs_t _mi_clock_end(mi_msecs_t start);
mi_msecs_t _mi_clock_start(void);
// "alloc.c"
void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept; // called from `_mi_malloc_generic`
void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept; // called from `_mi_malloc_generic`
void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept;
void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept;
mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p);
bool _mi_free_delayed_block(mi_block_t* block);
void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size);
#if MI_DEBUG>1
bool _mi_page_is_valid(mi_page_t* page);
@ -267,8 +266,8 @@ static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) {
static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) {
#define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX)
*total = count * size;
return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW)
&& size > 0 && (SIZE_MAX / size) < count);
// note: gcc/clang optimize this to directly check the overflow flag
return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW) && size > 0 && (SIZE_MAX / size) < count);
}
#endif
@ -279,7 +278,7 @@ static inline bool mi_count_size_overflow(size_t count, size_t size, size_t* tot
return false;
}
else if (mi_unlikely(mi_mul_overflow(count, size, total))) {
#if !defined(NDEBUG)
#if MI_DEBUG > 0
_mi_error_message(EOVERFLOW, "allocation request is too large (%zu * %zu bytes)\n", count, size);
#endif
*total = SIZE_MAX;
@ -925,7 +924,15 @@ static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
__movsb((unsigned char*)dst, (const unsigned char*)src, n);
}
else {
memcpy(dst, src, n); // todo: use noinline?
memcpy(dst, src, n);
}
}
static inline void _mi_memzero(void* dst, size_t n) {
if (_mi_cpu_has_fsrm) {
__stosb((unsigned char*)dst, 0, n);
}
else {
memset(dst, 0, n);
}
}
#else
@ -933,6 +940,9 @@ static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
memcpy(dst, src, n);
}
static inline void _mi_memzero(void* dst, size_t n) {
memset(dst, 0, n);
}
#endif
@ -950,12 +960,23 @@ static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) {
const void* asrc = __builtin_assume_aligned(src, MI_INTPTR_SIZE);
_mi_memcpy(adst, asrc, n);
}
static inline void _mi_memzero_aligned(void* dst, size_t n) {
mi_assert_internal((uintptr_t)dst % MI_INTPTR_SIZE == 0);
void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE);
_mi_memzero(adst, n);
}
#else
// Default fallback on `_mi_memcpy`
static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) {
mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0));
_mi_memcpy(dst, src, n);
}
static inline void _mi_memzero_aligned(void* dst, size_t n) {
mi_assert_internal((uintptr_t)dst % MI_INTPTR_SIZE == 0);
_mi_memzero(dst, n);
}
#endif

View File

@ -79,10 +79,9 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t
#if MI_STAT>1
mi_heap_stat_increase(heap, malloc, size);
#endif
void* p = _mi_page_malloc(heap, page, padsize); // TODO: inline _mi_page_malloc
void* p = _mi_page_malloc(heap, page, padsize, zero); // TODO: inline _mi_page_malloc
mi_assert_internal(p != NULL);
mi_assert_internal(((uintptr_t)p + offset) % alignment == 0);
if (zero) { _mi_block_zero_init(page, p, size); }
return p;
}
}

View File

@ -25,11 +25,11 @@ terms of the MIT license. A copy of the license can be found in the file
// Fast allocation in a page: just pop from the free list.
// Fall back to generic allocation only if the list is empty.
extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept {
extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept {
mi_assert_internal(page->xblock_size==0||mi_page_block_size(page) >= size);
mi_block_t* const block = page->free;
if (mi_unlikely(block == NULL)) {
return _mi_malloc_generic(heap, size);
return _mi_malloc_generic(heap, size, zero);
}
mi_assert_internal(block != NULL && _mi_ptr_page(block) == page);
// pop from the free list
@ -37,10 +37,17 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz
page->free = mi_block_next(page, block);
mi_assert_internal(page->free == NULL || _mi_ptr_page(page->free) == page);
// zero the block?
if (mi_unlikely(zero)) {
mi_assert_internal(page->xblock_size != 0); // do not call with zero'ing for huge blocks
const size_t zsize = (mi_unlikely(page->is_zero) ? sizeof(block->next) : page->xblock_size);
_mi_memzero_aligned(block, zsize);
}
#if (MI_DEBUG>0)
if (!page->is_zero) { memset(block, MI_DEBUG_UNINIT, size); }
if (!page->is_zero && !zero) { memset(block, MI_DEBUG_UNINIT, size); }
#elif (MI_SECURE!=0)
block->next = 0; // don't leak internal data
if (!zero) { block->next = 0; } // don't leak internal data
#endif
#if (MI_STAT>0)
@ -69,41 +76,45 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz
return block;
}
// allocate a small block
mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept {
mi_assert(heap!=NULL);
static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept {
mi_assert(heap != NULL);
mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local
mi_assert(size <= MI_SMALL_SIZE_MAX);
#if (MI_PADDING)
#if (MI_PADDING)
if (size == 0) {
size = sizeof(void*);
}
#endif
mi_page_t* page = _mi_heap_get_free_small_page(heap,size + MI_PADDING_SIZE);
void* p = _mi_page_malloc(heap, page, size + MI_PADDING_SIZE);
mi_assert_internal(p==NULL || mi_usable_size(p) >= size);
#if MI_STAT>1
#endif
mi_page_t* page = _mi_heap_get_free_small_page(heap, size + MI_PADDING_SIZE);
void* p = _mi_page_malloc(heap, page, size + MI_PADDING_SIZE, zero);
mi_assert_internal(p == NULL || mi_usable_size(p) >= size);
#if MI_STAT>1
if (p != NULL) {
if (!mi_heap_is_initialized(heap)) { heap = mi_get_default_heap(); }
mi_heap_stat_increase(heap, malloc, mi_usable_size(p));
}
#endif
#endif
return p;
}
// allocate a small block
mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept {
return mi_heap_malloc_small_zero(heap, size, false);
}
mi_decl_nodiscard extern inline mi_decl_restrict void* mi_malloc_small(size_t size) mi_attr_noexcept {
return mi_heap_malloc_small(mi_get_default_heap(), size);
}
// The main allocation function
mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept {
mi_decl_nodiscard extern inline void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept {
if (mi_likely(size <= MI_SMALL_SIZE_MAX)) {
return mi_heap_malloc_small(heap, size);
return mi_heap_malloc_small_zero(heap, size, zero);
}
else {
mi_assert(heap!=NULL);
mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local
void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE); // note: size can overflow but it is detected in malloc_generic
void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE, zero); // note: size can overflow but it is detected in malloc_generic
mi_assert_internal(p == NULL || mi_usable_size(p) >= size);
#if MI_STAT>1
if (p != NULL) {
@ -115,44 +126,17 @@ mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc(mi_heap_t*
}
}
mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept {
return _mi_heap_malloc_zero(heap, size, false);
}
mi_decl_nodiscard extern inline mi_decl_restrict void* mi_malloc(size_t size) mi_attr_noexcept {
return mi_heap_malloc(mi_get_default_heap(), size);
}
void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size) {
// note: we need to initialize the whole usable block size to zero, not just the requested size,
// or the recalloc/rezalloc functions cannot safely expand in place (see issue #63)
MI_UNUSED(size);
mi_assert_internal(p != NULL);
mi_assert_internal(mi_usable_size(p) >= size); // size can be zero
mi_assert_internal(_mi_ptr_page(p)==page);
if (page->is_zero && size > sizeof(mi_block_t)) {
// already zero initialized memory
((mi_block_t*)p)->next = 0; // clear the free list pointer
mi_assert_expensive(mi_mem_is_zero(p, mi_usable_size(p)));
}
else {
// otherwise memset
memset(p, 0, mi_usable_size(p));
}
}
// zero initialized small block
mi_decl_nodiscard mi_decl_restrict void* mi_zalloc_small(size_t size) mi_attr_noexcept {
void* p = mi_malloc_small(size);
if (p != NULL) {
_mi_block_zero_init(_mi_ptr_page(p), p, size); // todo: can we avoid getting the page again?
}
return p;
}
void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept {
void* p = mi_heap_malloc(heap,size);
if (zero && p != NULL) {
_mi_block_zero_init(_mi_ptr_page(p),p,size); // todo: can we avoid getting the page again?
}
return p;
return mi_heap_malloc_small_zero(mi_get_default_heap(), size, true);
}
mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_zalloc(mi_heap_t* heap, size_t size) mi_attr_noexcept {
@ -564,6 +548,7 @@ mi_decl_nodiscard size_t mi_usable_size(const void* p) mi_attr_noexcept {
#ifdef __cplusplus
void* _mi_externs[] = {
(void*)&_mi_page_malloc,
(void*)&_mi_heap_malloc_zero,
(void*)&mi_malloc,
(void*)&mi_malloc_small,
(void*)&mi_zalloc_small,

View File

@ -815,7 +815,7 @@ static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size) mi_attr_noexcept {
// Generic allocation routine if the fast path (`alloc.c:mi_page_malloc`) does not succeed.
// Note: in debug mode the size includes MI_PADDING_SIZE and might have overflowed.
void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept
void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept
{
mi_assert_internal(heap != NULL);
@ -849,6 +849,15 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept
mi_assert_internal(mi_page_immediate_available(page));
mi_assert_internal(mi_page_block_size(page) >= size);
// and try again, this time succeeding! (i.e. this should never recurse)
return _mi_page_malloc(heap, page, size);
// and try again, this time succeeding! (i.e. this should never recurse through _mi_page_malloc)
if (mi_unlikely(zero && page->xblock_size == 0)) {
// note: we cannot call _mi_page_malloc with zeroing for huge blocks; we zero it afterwards in that case.
void* p = _mi_page_malloc(heap, page, size, false);
mi_assert_internal(p != NULL);
_mi_memzero_aligned(p, mi_page_usable_block_size(page));
return p;
}
else {
return _mi_page_malloc(heap, page, size, zero);
}
}