fix padding check for aligned allocation; improve perf for small aligned allocations
This commit is contained in:
parent
7ff3ec2bf7
commit
4531367de2
@ -54,16 +54,19 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
#define MI_ENCODE_FREELIST 1
|
||||
#endif
|
||||
|
||||
// Reserve extra padding at the end of each block; must be a multiple of `sizeof(intptr_t)`!
|
||||
// Reserve extra padding at the end of each block; must be a multiple of `2*sizeof(intptr_t)`!
|
||||
// If free lists are encoded, the padding is checked if it was modified on free.
|
||||
#if (!defined(MI_PADDING))
|
||||
#if (MI_SECURE>=3 || MI_DEBUG>=1)
|
||||
#define MI_PADDING MI_MAX_ALIGN_SIZE
|
||||
#if (!defined(MI_PADDING) && (MI_SECURE>=3 || MI_DEBUG>=1))
|
||||
#define MI_PADDING
|
||||
#endif
|
||||
|
||||
#if defined(MI_PADDING)
|
||||
#define MI_PADDING_SIZE (2*sizeof(intptr_t))
|
||||
#else
|
||||
#define MI_PADDING 0
|
||||
#endif
|
||||
#define MI_PADDING_SIZE 0
|
||||
#endif
|
||||
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Platform specific values
|
||||
// ------------------------------------------------------
|
||||
|
@ -18,20 +18,22 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t
|
||||
// note: we don't require `size > offset`, we just guarantee that
|
||||
// the address at offset is aligned regardless of the allocated size.
|
||||
mi_assert(alignment > 0 && alignment % sizeof(void*) == 0);
|
||||
|
||||
if (alignment <= MI_MAX_ALIGN_SIZE && offset==0) return _mi_heap_malloc_zero(heap, size, zero);
|
||||
if (mi_unlikely(size > PTRDIFF_MAX)) return NULL; // we don't allocate more than PTRDIFF_MAX (see <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
|
||||
if (mi_unlikely(alignment==0 || !_mi_is_power_of_two(alignment))) return NULL; // require power-of-two (see <https://en.cppreference.com/w/c/memory/aligned_alloc>)
|
||||
const uintptr_t align_mask = alignment-1; // for any x, `(x & align_mask) == (x % alignment)`
|
||||
|
||||
// try if there is a small block available with just the right alignment
|
||||
if (mi_likely(size <= MI_SMALL_SIZE_MAX)) {
|
||||
mi_page_t* page = _mi_heap_get_free_small_page(heap,size);
|
||||
if (mi_likely(size <= (MI_SMALL_SIZE_MAX - MI_PADDING_SIZE))) {
|
||||
mi_page_t* page = _mi_heap_get_free_small_page(heap,size + MI_PADDING_SIZE);
|
||||
const bool is_aligned = (((uintptr_t)page->free+offset) & align_mask)==0;
|
||||
if (mi_likely(page->free != NULL && is_aligned))
|
||||
{
|
||||
#if MI_STAT>1
|
||||
mi_heap_stat_increase( heap, malloc, size);
|
||||
#endif
|
||||
void* p = _mi_page_malloc(heap,page,size); // TODO: inline _mi_page_malloc
|
||||
void* p = _mi_page_malloc(heap,page,size + MI_PADDING_SIZE); // TODO: inline _mi_page_malloc
|
||||
mi_assert_internal(p != NULL);
|
||||
mi_assert_internal(((uintptr_t)p + offset) % alignment == 0);
|
||||
if (zero) _mi_block_zero_init(page,p,size);
|
||||
|
@ -47,16 +47,19 @@ int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept
|
||||
// Note: The spec dictates we should not modify `*p` on an error. (issue#27)
|
||||
// <http://man7.org/linux/man-pages/man3/posix_memalign.3.html>
|
||||
if (p == NULL) return EINVAL;
|
||||
if (alignment % sizeof(void*) != 0) return EINVAL; // natural alignment
|
||||
if (alignment % sizeof(void*) != 0) return EINVAL; // natural alignment
|
||||
if (!_mi_is_power_of_two(alignment)) return EINVAL; // not a power of 2
|
||||
void* q = mi_malloc_aligned(size, alignment);
|
||||
void* q = (alignment <= MI_MAX_ALIGN_SIZE ? mi_malloc(size) : mi_malloc_aligned(size, alignment));
|
||||
if (q==NULL && size != 0) return ENOMEM;
|
||||
mi_assert_internal(((uintptr_t)q % alignment) == 0);
|
||||
*p = q;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void* mi_memalign(size_t alignment, size_t size) mi_attr_noexcept {
|
||||
return mi_malloc_aligned(size, alignment);
|
||||
void* p = (alignment <= MI_MAX_ALIGN_SIZE ? mi_malloc(size) : mi_malloc_aligned(size, alignment));
|
||||
mi_assert_internal(((uintptr_t)p % alignment) == 0);
|
||||
return p;
|
||||
}
|
||||
|
||||
void* mi_valloc(size_t size) mi_attr_noexcept {
|
||||
@ -73,7 +76,9 @@ void* mi_pvalloc(size_t size) mi_attr_noexcept {
|
||||
void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept {
|
||||
if (alignment==0 || !_mi_is_power_of_two(alignment)) return NULL;
|
||||
if ((size&(alignment-1)) != 0) return NULL; // C11 requires integral multiple, see <https://en.cppreference.com/w/c/memory/aligned_alloc>
|
||||
return mi_malloc_aligned(size, alignment);
|
||||
void* p = (alignment <= MI_MAX_ALIGN_SIZE ? mi_malloc(size) : mi_malloc_aligned(size, alignment));
|
||||
mi_assert_internal(((uintptr_t)p % alignment) == 0);
|
||||
return p;
|
||||
}
|
||||
|
||||
void* mi_reallocarray( void* p, size_t count, size_t size ) mi_attr_noexcept { // BSD
|
||||
|
60
src/alloc.c
60
src/alloc.c
@ -43,9 +43,9 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz
|
||||
mi_heap_stat_increase(heap,normal[bin], 1);
|
||||
}
|
||||
#endif
|
||||
#if (MI_PADDING>0) && defined(MI_ENCODE_FREELIST)
|
||||
mi_assert_internal((MI_PADDING % sizeof(mi_block_t*)) == 0);
|
||||
mi_block_t* const padding = (mi_block_t*)((uint8_t*)block + page->xblock_size - MI_PADDING);
|
||||
#if defined(MI_PADDING) && defined(MI_ENCODE_FREELIST)
|
||||
mi_assert_internal((MI_PADDING_SIZE % sizeof(mi_block_t*)) == 0);
|
||||
mi_block_t* const padding = (mi_block_t*)((uint8_t*)block + page->xblock_size - MI_PADDING_SIZE);
|
||||
mi_block_set_nextx(page, padding, block, page->key[0], page->key[1]);
|
||||
#endif
|
||||
return block;
|
||||
@ -53,39 +53,27 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz
|
||||
|
||||
// allocate a small block
|
||||
extern inline mi_decl_allocator void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept {
|
||||
mi_assert(size <= MI_SMALL_SIZE_MAX);
|
||||
mi_page_t* page = _mi_heap_get_free_small_page(heap,size);
|
||||
return _mi_page_malloc(heap, page, size);
|
||||
mi_assert(size <= (MI_SMALL_SIZE_MAX - MI_PADDING_SIZE));
|
||||
mi_page_t* page = _mi_heap_get_free_small_page(heap,size + MI_PADDING_SIZE);
|
||||
void* p = _mi_page_malloc(heap, page, size + MI_PADDING_SIZE);
|
||||
mi_assert_internal(p==NULL || mi_page_block_size(_mi_ptr_page(p)) >= (size + MI_PADDING_SIZE));
|
||||
return p;
|
||||
}
|
||||
|
||||
extern inline mi_decl_allocator void* mi_malloc_small(size_t size) mi_attr_noexcept {
|
||||
#if (MI_PADDING>0)
|
||||
size += MI_PADDING;
|
||||
#endif
|
||||
return mi_heap_malloc_small(mi_get_default_heap(), size);
|
||||
}
|
||||
|
||||
|
||||
// zero initialized small block
|
||||
mi_decl_allocator void* mi_zalloc_small(size_t size) mi_attr_noexcept {
|
||||
void* p = mi_malloc_small(size);
|
||||
if (p != NULL) { memset(p, 0, size); }
|
||||
return p;
|
||||
}
|
||||
|
||||
// The main allocation function
|
||||
extern inline mi_decl_allocator void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept {
|
||||
mi_assert(heap!=NULL);
|
||||
mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local
|
||||
#if (MI_PADDING>0)
|
||||
size += MI_PADDING;
|
||||
#endif
|
||||
void* p;
|
||||
if (mi_likely(size <= MI_SMALL_SIZE_MAX)) {
|
||||
if (mi_likely(size <= (MI_SMALL_SIZE_MAX - MI_PADDING_SIZE))) {
|
||||
p = mi_heap_malloc_small(heap, size);
|
||||
}
|
||||
else {
|
||||
p = _mi_malloc_generic(heap, size);
|
||||
p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE);
|
||||
}
|
||||
#if MI_STAT>1
|
||||
if (p != NULL) {
|
||||
@ -93,6 +81,7 @@ extern inline mi_decl_allocator void* mi_heap_malloc(mi_heap_t* heap, size_t siz
|
||||
mi_heap_stat_increase( heap, malloc, mi_good_size(size) ); // overestimate for aligned sizes
|
||||
}
|
||||
#endif
|
||||
mi_assert_internal(p == NULL || mi_page_block_size(_mi_ptr_page(p)) >= (size + MI_PADDING_SIZE));
|
||||
return p;
|
||||
}
|
||||
|
||||
@ -100,24 +89,34 @@ extern inline mi_decl_allocator void* mi_malloc(size_t size) mi_attr_noexcept {
|
||||
return mi_heap_malloc(mi_get_default_heap(), size);
|
||||
}
|
||||
|
||||
|
||||
void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size) {
|
||||
// note: we need to initialize the whole block to zero, not just size
|
||||
// or the recalloc/rezalloc functions cannot safely expand in place (see issue #63)
|
||||
UNUSED_RELEASE(size);
|
||||
mi_assert_internal(p != NULL);
|
||||
mi_assert_internal(mi_page_block_size(page) >= size); // size can be zero
|
||||
mi_assert_internal(mi_page_block_size(page) >= (size + MI_PADDING_SIZE)); // size can be zero
|
||||
mi_assert_internal(_mi_ptr_page(p)==page);
|
||||
if (page->is_zero) {
|
||||
// already zero initialized memory?
|
||||
((mi_block_t*)p)->next = 0; // clear the free list pointer
|
||||
mi_assert_expensive(mi_mem_is_zero(p, mi_page_block_size(page) - MI_PADDING));
|
||||
mi_assert_expensive(mi_mem_is_zero(p, mi_page_block_size(page) - MI_PADDING_SIZE));
|
||||
}
|
||||
else {
|
||||
// otherwise memset
|
||||
memset(p, 0, mi_page_block_size(page) - MI_PADDING);
|
||||
memset(p, 0, mi_page_block_size(page) - MI_PADDING_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
// zero initialized small block
|
||||
mi_decl_allocator void* mi_zalloc_small(size_t size) mi_attr_noexcept {
|
||||
void* p = mi_malloc_small(size);
|
||||
if (p != NULL) {
|
||||
_mi_block_zero_init(_mi_ptr_page(p), p, size); // todo: can we avoid getting the page again?
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) {
|
||||
void* p = mi_heap_malloc(heap,size);
|
||||
if (zero && p != NULL) {
|
||||
@ -182,9 +181,9 @@ static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (MI_PADDING>0) && defined(MI_ENCODE_FREELIST)
|
||||
#if defined(MI_PADDING) && defined(MI_ENCODE_FREELIST)
|
||||
static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) {
|
||||
mi_block_t* const padding = (mi_block_t*)((uint8_t*)block + page->xblock_size - MI_PADDING);
|
||||
mi_block_t* const padding = (mi_block_t*)((uint8_t*)block + page->xblock_size - MI_PADDING_SIZE);
|
||||
mi_block_t* const decoded = mi_block_nextx(page, padding, page->key[0], page->key[1]);
|
||||
if (decoded != block) {
|
||||
_mi_error_message(EFAULT, "buffer overflow in heap block %p: write after %zu bytes\n", block, page->xblock_size);
|
||||
@ -285,7 +284,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc
|
||||
static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block)
|
||||
{
|
||||
#if (MI_DEBUG)
|
||||
memset(block, MI_DEBUG_FREED, mi_page_block_size(page) - MI_PADDING);
|
||||
memset(block, MI_DEBUG_FREED, mi_page_block_size(page) - MI_PADDING_SIZE);
|
||||
#endif
|
||||
|
||||
// and push it on the free list
|
||||
@ -411,10 +410,7 @@ size_t mi_usable_size(const void* p) mi_attr_noexcept {
|
||||
if (p==NULL) return 0;
|
||||
const mi_segment_t* segment = _mi_ptr_segment(p);
|
||||
const mi_page_t* page = _mi_segment_page_of(segment, p);
|
||||
size_t size = mi_page_block_size(page);
|
||||
#if defined(MI_PADDING)
|
||||
size -= MI_PADDING;
|
||||
#endif
|
||||
size_t size = mi_page_block_size(page) - MI_PADDING_SIZE;
|
||||
if (mi_unlikely(mi_page_has_aligned(page))) {
|
||||
ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)_mi_page_ptr_unalign(segment,page,p);
|
||||
mi_assert_internal(adjust >= 0 && (size_t)adjust <= size);
|
||||
|
@ -67,10 +67,10 @@ static mi_option_desc_t options[_mi_option_last] =
|
||||
{ 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
|
||||
{ 0, UNINIT, MI_OPTION(reserve_huge_os_pages) },
|
||||
{ 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread
|
||||
{ 1, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free
|
||||
{ 0, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free
|
||||
{ 0, UNINIT, MI_OPTION(abandoned_page_reset) },// reset free page memory when a thread terminates
|
||||
{ 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit)
|
||||
{ 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed
|
||||
{ 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed
|
||||
{ 100, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds
|
||||
{ 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes.
|
||||
{ 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose
|
||||
|
Loading…
Reference in New Issue
Block a user