add zero parameter to primitive allocation to improve codegen for calloc etc
This commit is contained in:
parent
9d69e3ed06
commit
d69d4c861f
@ -106,7 +106,7 @@ void _mi_abandoned_await_readers(void);
|
||||
|
||||
|
||||
// "page.c"
|
||||
void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc;
|
||||
void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept mi_attr_malloc;
|
||||
|
||||
void _mi_page_retire(mi_page_t* page) mi_attr_noexcept; // free the page if there are no other pages with many free blocks
|
||||
void _mi_page_unfull(mi_page_t* page);
|
||||
@ -138,12 +138,11 @@ mi_msecs_t _mi_clock_end(mi_msecs_t start);
|
||||
mi_msecs_t _mi_clock_start(void);
|
||||
|
||||
// "alloc.c"
|
||||
void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept; // called from `_mi_malloc_generic`
|
||||
void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept; // called from `_mi_malloc_generic`
|
||||
void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept;
|
||||
void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept;
|
||||
mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p);
|
||||
bool _mi_free_delayed_block(mi_block_t* block);
|
||||
void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size);
|
||||
|
||||
#if MI_DEBUG>1
|
||||
bool _mi_page_is_valid(mi_page_t* page);
|
||||
@ -267,8 +266,8 @@ static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) {
|
||||
static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) {
|
||||
#define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX)
|
||||
*total = count * size;
|
||||
return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW)
|
||||
&& size > 0 && (SIZE_MAX / size) < count);
|
||||
// note: gcc/clang optimize this to directly check the overflow flag
|
||||
return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW) && size > 0 && (SIZE_MAX / size) < count);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -279,7 +278,7 @@ static inline bool mi_count_size_overflow(size_t count, size_t size, size_t* tot
|
||||
return false;
|
||||
}
|
||||
else if (mi_unlikely(mi_mul_overflow(count, size, total))) {
|
||||
#if !defined(NDEBUG)
|
||||
#if MI_DEBUG > 0
|
||||
_mi_error_message(EOVERFLOW, "allocation request is too large (%zu * %zu bytes)\n", count, size);
|
||||
#endif
|
||||
*total = SIZE_MAX;
|
||||
@ -925,7 +924,15 @@ static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
|
||||
__movsb((unsigned char*)dst, (const unsigned char*)src, n);
|
||||
}
|
||||
else {
|
||||
memcpy(dst, src, n); // todo: use noinline?
|
||||
memcpy(dst, src, n);
|
||||
}
|
||||
}
|
||||
static inline void _mi_memzero(void* dst, size_t n) {
|
||||
if (_mi_cpu_has_fsrm) {
|
||||
__stosb((unsigned char*)dst, 0, n);
|
||||
}
|
||||
else {
|
||||
memset(dst, 0, n);
|
||||
}
|
||||
}
|
||||
#else
|
||||
@ -933,6 +940,9 @@ static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
|
||||
static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
|
||||
memcpy(dst, src, n);
|
||||
}
|
||||
static inline void _mi_memzero(void* dst, size_t n) {
|
||||
memset(dst, 0, n);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@ -950,12 +960,23 @@ static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) {
|
||||
const void* asrc = __builtin_assume_aligned(src, MI_INTPTR_SIZE);
|
||||
_mi_memcpy(adst, asrc, n);
|
||||
}
|
||||
|
||||
static inline void _mi_memzero_aligned(void* dst, size_t n) {
|
||||
mi_assert_internal((uintptr_t)dst % MI_INTPTR_SIZE == 0);
|
||||
void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE);
|
||||
_mi_memzero(adst, n);
|
||||
}
|
||||
#else
|
||||
// Default fallback on `_mi_memcpy`
|
||||
static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) {
|
||||
mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0));
|
||||
_mi_memcpy(dst, src, n);
|
||||
}
|
||||
|
||||
static inline void _mi_memzero_aligned(void* dst, size_t n) {
|
||||
mi_assert_internal((uintptr_t)dst % MI_INTPTR_SIZE == 0);
|
||||
_mi_memzero(dst, n);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -79,10 +79,9 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t
|
||||
#if MI_STAT>1
|
||||
mi_heap_stat_increase(heap, malloc, size);
|
||||
#endif
|
||||
void* p = _mi_page_malloc(heap, page, padsize); // TODO: inline _mi_page_malloc
|
||||
void* p = _mi_page_malloc(heap, page, padsize, zero); // TODO: inline _mi_page_malloc
|
||||
mi_assert_internal(p != NULL);
|
||||
mi_assert_internal(((uintptr_t)p + offset) % alignment == 0);
|
||||
if (zero) { _mi_block_zero_init(page, p, size); }
|
||||
return p;
|
||||
}
|
||||
}
|
||||
|
85
src/alloc.c
85
src/alloc.c
@ -25,11 +25,11 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
|
||||
// Fast allocation in a page: just pop from the free list.
|
||||
// Fall back to generic allocation only if the list is empty.
|
||||
extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept {
|
||||
extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept {
|
||||
mi_assert_internal(page->xblock_size==0||mi_page_block_size(page) >= size);
|
||||
mi_block_t* const block = page->free;
|
||||
if (mi_unlikely(block == NULL)) {
|
||||
return _mi_malloc_generic(heap, size);
|
||||
return _mi_malloc_generic(heap, size, zero);
|
||||
}
|
||||
mi_assert_internal(block != NULL && _mi_ptr_page(block) == page);
|
||||
// pop from the free list
|
||||
@ -37,10 +37,17 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz
|
||||
page->free = mi_block_next(page, block);
|
||||
mi_assert_internal(page->free == NULL || _mi_ptr_page(page->free) == page);
|
||||
|
||||
// zero the block?
|
||||
if (mi_unlikely(zero)) {
|
||||
mi_assert_internal(page->xblock_size != 0); // do not call with zero'ing for huge blocks
|
||||
const size_t zsize = (mi_unlikely(page->is_zero) ? sizeof(block->next) : page->xblock_size);
|
||||
_mi_memzero_aligned(block, zsize);
|
||||
}
|
||||
|
||||
#if (MI_DEBUG>0)
|
||||
if (!page->is_zero) { memset(block, MI_DEBUG_UNINIT, size); }
|
||||
if (!page->is_zero && !zero) { memset(block, MI_DEBUG_UNINIT, size); }
|
||||
#elif (MI_SECURE!=0)
|
||||
block->next = 0; // don't leak internal data
|
||||
if (!zero) { block->next = 0; } // don't leak internal data
|
||||
#endif
|
||||
|
||||
#if (MI_STAT>0)
|
||||
@ -69,41 +76,45 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz
|
||||
return block;
|
||||
}
|
||||
|
||||
// allocate a small block
|
||||
mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept {
|
||||
mi_assert(heap!=NULL);
|
||||
static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept {
|
||||
mi_assert(heap != NULL);
|
||||
mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local
|
||||
mi_assert(size <= MI_SMALL_SIZE_MAX);
|
||||
#if (MI_PADDING)
|
||||
#if (MI_PADDING)
|
||||
if (size == 0) {
|
||||
size = sizeof(void*);
|
||||
}
|
||||
#endif
|
||||
mi_page_t* page = _mi_heap_get_free_small_page(heap,size + MI_PADDING_SIZE);
|
||||
void* p = _mi_page_malloc(heap, page, size + MI_PADDING_SIZE);
|
||||
mi_assert_internal(p==NULL || mi_usable_size(p) >= size);
|
||||
#if MI_STAT>1
|
||||
#endif
|
||||
mi_page_t* page = _mi_heap_get_free_small_page(heap, size + MI_PADDING_SIZE);
|
||||
void* p = _mi_page_malloc(heap, page, size + MI_PADDING_SIZE, zero);
|
||||
mi_assert_internal(p == NULL || mi_usable_size(p) >= size);
|
||||
#if MI_STAT>1
|
||||
if (p != NULL) {
|
||||
if (!mi_heap_is_initialized(heap)) { heap = mi_get_default_heap(); }
|
||||
mi_heap_stat_increase(heap, malloc, mi_usable_size(p));
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
return p;
|
||||
}
|
||||
|
||||
// allocate a small block
|
||||
mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept {
|
||||
return mi_heap_malloc_small_zero(heap, size, false);
|
||||
}
|
||||
|
||||
mi_decl_nodiscard extern inline mi_decl_restrict void* mi_malloc_small(size_t size) mi_attr_noexcept {
|
||||
return mi_heap_malloc_small(mi_get_default_heap(), size);
|
||||
}
|
||||
|
||||
// The main allocation function
|
||||
mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept {
|
||||
mi_decl_nodiscard extern inline void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept {
|
||||
if (mi_likely(size <= MI_SMALL_SIZE_MAX)) {
|
||||
return mi_heap_malloc_small(heap, size);
|
||||
return mi_heap_malloc_small_zero(heap, size, zero);
|
||||
}
|
||||
else {
|
||||
mi_assert(heap!=NULL);
|
||||
mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local
|
||||
void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE); // note: size can overflow but it is detected in malloc_generic
|
||||
mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local
|
||||
void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE, zero); // note: size can overflow but it is detected in malloc_generic
|
||||
mi_assert_internal(p == NULL || mi_usable_size(p) >= size);
|
||||
#if MI_STAT>1
|
||||
if (p != NULL) {
|
||||
@ -115,44 +126,17 @@ mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc(mi_heap_t*
|
||||
}
|
||||
}
|
||||
|
||||
mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept {
|
||||
return _mi_heap_malloc_zero(heap, size, false);
|
||||
}
|
||||
|
||||
mi_decl_nodiscard extern inline mi_decl_restrict void* mi_malloc(size_t size) mi_attr_noexcept {
|
||||
return mi_heap_malloc(mi_get_default_heap(), size);
|
||||
}
|
||||
|
||||
|
||||
void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size) {
|
||||
// note: we need to initialize the whole usable block size to zero, not just the requested size,
|
||||
// or the recalloc/rezalloc functions cannot safely expand in place (see issue #63)
|
||||
MI_UNUSED(size);
|
||||
mi_assert_internal(p != NULL);
|
||||
mi_assert_internal(mi_usable_size(p) >= size); // size can be zero
|
||||
mi_assert_internal(_mi_ptr_page(p)==page);
|
||||
if (page->is_zero && size > sizeof(mi_block_t)) {
|
||||
// already zero initialized memory
|
||||
((mi_block_t*)p)->next = 0; // clear the free list pointer
|
||||
mi_assert_expensive(mi_mem_is_zero(p, mi_usable_size(p)));
|
||||
}
|
||||
else {
|
||||
// otherwise memset
|
||||
memset(p, 0, mi_usable_size(p));
|
||||
}
|
||||
}
|
||||
|
||||
// zero initialized small block
|
||||
mi_decl_nodiscard mi_decl_restrict void* mi_zalloc_small(size_t size) mi_attr_noexcept {
|
||||
void* p = mi_malloc_small(size);
|
||||
if (p != NULL) {
|
||||
_mi_block_zero_init(_mi_ptr_page(p), p, size); // todo: can we avoid getting the page again?
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept {
|
||||
void* p = mi_heap_malloc(heap,size);
|
||||
if (zero && p != NULL) {
|
||||
_mi_block_zero_init(_mi_ptr_page(p),p,size); // todo: can we avoid getting the page again?
|
||||
}
|
||||
return p;
|
||||
return mi_heap_malloc_small_zero(mi_get_default_heap(), size, true);
|
||||
}
|
||||
|
||||
mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_zalloc(mi_heap_t* heap, size_t size) mi_attr_noexcept {
|
||||
@ -564,6 +548,7 @@ mi_decl_nodiscard size_t mi_usable_size(const void* p) mi_attr_noexcept {
|
||||
#ifdef __cplusplus
|
||||
void* _mi_externs[] = {
|
||||
(void*)&_mi_page_malloc,
|
||||
(void*)&_mi_heap_malloc_zero,
|
||||
(void*)&mi_malloc,
|
||||
(void*)&mi_malloc_small,
|
||||
(void*)&mi_zalloc_small,
|
||||
|
15
src/page.c
15
src/page.c
@ -815,7 +815,7 @@ static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size) mi_attr_noexcept {
|
||||
|
||||
// Generic allocation routine if the fast path (`alloc.c:mi_page_malloc`) does not succeed.
|
||||
// Note: in debug mode the size includes MI_PADDING_SIZE and might have overflowed.
|
||||
void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept
|
||||
void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept
|
||||
{
|
||||
mi_assert_internal(heap != NULL);
|
||||
|
||||
@ -849,6 +849,15 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept
|
||||
mi_assert_internal(mi_page_immediate_available(page));
|
||||
mi_assert_internal(mi_page_block_size(page) >= size);
|
||||
|
||||
// and try again, this time succeeding! (i.e. this should never recurse)
|
||||
return _mi_page_malloc(heap, page, size);
|
||||
// and try again, this time succeeding! (i.e. this should never recurse through _mi_page_malloc)
|
||||
if (mi_unlikely(zero && page->xblock_size == 0)) {
|
||||
// note: we cannot call _mi_page_malloc with zeroing for huge blocks; we zero it afterwards in that case.
|
||||
void* p = _mi_page_malloc(heap, page, size, false);
|
||||
mi_assert_internal(p != NULL);
|
||||
_mi_memzero_aligned(p, mi_page_usable_block_size(page));
|
||||
return p;
|
||||
}
|
||||
else {
|
||||
return _mi_page_malloc(heap, page, size, zero);
|
||||
}
|
||||
}
|
||||
|
@ -256,7 +256,7 @@ int main(int argc, char** argv) {
|
||||
mi_collect(true);
|
||||
#endif
|
||||
mi_stats_print(NULL);
|
||||
#endif
|
||||
#endif
|
||||
//bench_end_program();
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user