merge from dev
This commit is contained in:
commit
36b7a3cb03
@ -180,21 +180,6 @@ bool _mi_page_is_valid(mi_page_t* page);
|
||||
#endif
|
||||
|
||||
|
||||
// -----------------------------------------------------------------------------------
|
||||
// On windows x86/x64 with msvc/clang-cl, use `rep movsb` for `memcpy` (issue #201)
|
||||
// -----------------------------------------------------------------------------------
|
||||
|
||||
#if defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64))
|
||||
#include <intrin.h>
|
||||
static inline void _mi_memcpy_rep_movsb(void* d, const void* s, size_t n) {
|
||||
__movsb((unsigned char*)d, (const unsigned char*)s, n);
|
||||
}
|
||||
#define _mi_memcpy(d,s,n) _mi_memcpy_rep_movsb(d,s,n)
|
||||
#else
|
||||
#define _mi_memcpy(d,s,n) memcpy(d,s,n)
|
||||
#endif
|
||||
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Inlined definitions
|
||||
----------------------------------------------------------- */
|
||||
@ -997,4 +982,55 @@ static inline size_t mi_bsr(uintptr_t x) {
|
||||
}
|
||||
|
||||
|
||||
// ---------------------------------------------------------------------------------
|
||||
// Provide our own `_mi_memcpy` for potential performance optimizations.
|
||||
//
|
||||
// For now, only on Windows with msvc/clang-cl we optimize to `rep movsb` if
|
||||
// we happen to run on x86/x64 cpu's that have "fast short rep movsb" (FSRM) support
|
||||
// (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017). See also issue #201 and pr #253.
|
||||
// ---------------------------------------------------------------------------------
|
||||
|
||||
#if defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64))
|
||||
#include <intrin.h>
|
||||
#include <string.h>
|
||||
extern bool _mi_cpu_has_fsrm;
|
||||
static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
|
||||
if (_mi_cpu_has_fsrm) {
|
||||
__movsb((unsigned char*)dst, (const unsigned char*)src, n);
|
||||
}
|
||||
else {
|
||||
memcpy(dst, src, n); // todo: use noinline?
|
||||
}
|
||||
}
|
||||
#else
|
||||
#include <string.h>
|
||||
static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
|
||||
memcpy(dst, src, n);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
// -------------------------------------------------------------------------------
|
||||
// The `_mi_memcpy_aligned` can be used if the pointers are machine-word aligned
|
||||
// This is used for example in `mi_realloc`.
|
||||
// -------------------------------------------------------------------------------
|
||||
|
||||
#if (__GNUC__ >= 4) || defined(__clang__)
|
||||
// On GCC/CLang we provide a hint that the pointers are word aligned.
|
||||
#include <string.h>
|
||||
static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) {
|
||||
mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0));
|
||||
void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE);
|
||||
const void* asrc = __builtin_assume_aligned(src, MI_INTPTR_SIZE);
|
||||
memcpy(adst, asrc, n);
|
||||
}
|
||||
#else
|
||||
// Default fallback on `_mi_memcpy`
|
||||
static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) {
|
||||
mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0));
|
||||
_mi_memcpy(dst, src, n);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif
|
||||
|
@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
#include "mimalloc.h"
|
||||
#include "mimalloc-internal.h"
|
||||
|
||||
#include <string.h> // memset, memcpy
|
||||
#include <string.h> // memset
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Aligned Allocation
|
||||
@ -137,7 +137,7 @@ static void* mi_heap_realloc_zero_aligned_at(mi_heap_t* heap, void* p, size_t ne
|
||||
memset((uint8_t*)newp + start, 0, newsize - start);
|
||||
}
|
||||
}
|
||||
_mi_memcpy(newp, p, (newsize > size ? size : newsize));
|
||||
_mi_memcpy_aligned(newp, p, (newsize > size ? size : newsize));
|
||||
mi_free(p); // only free if successful
|
||||
}
|
||||
return newp;
|
||||
|
@ -17,7 +17,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
// ------------------------------------------------------
|
||||
|
||||
#include <errno.h>
|
||||
#include <string.h> // memcpy
|
||||
#include <string.h> // memset
|
||||
#include <stdlib.h> // getenv
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
#include "mimalloc-internal.h"
|
||||
#include "mimalloc-atomic.h"
|
||||
|
||||
#include <string.h> // memset, memcpy, strlen
|
||||
#include <string.h> // memset, strlen
|
||||
#include <stdlib.h> // malloc, exit
|
||||
|
||||
#define MI_IN_ALLOC_C
|
||||
@ -628,7 +628,7 @@ void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero)
|
||||
size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0);
|
||||
memset((uint8_t*)newp + start, 0, newsize - start);
|
||||
}
|
||||
_mi_memcpy(newp, p, (newsize > size ? size : newsize));
|
||||
_mi_memcpy_aligned(newp, p, (newsize > size ? size : newsize));
|
||||
mi_free(p); // only free if successful
|
||||
}
|
||||
return newp;
|
||||
|
@ -193,7 +193,7 @@ mi_heap_t* mi_heap_new(void) {
|
||||
mi_heap_t* bheap = mi_heap_get_backing();
|
||||
mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t); // todo: OS allocate in secure mode?
|
||||
if (heap==NULL) return NULL;
|
||||
_mi_memcpy(heap, &_mi_heap_empty, sizeof(mi_heap_t));
|
||||
_mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(mi_heap_t));
|
||||
heap->tld = bheap->tld;
|
||||
heap->thread_id = _mi_thread_id();
|
||||
_mi_random_split(&bheap->random, &heap->random);
|
||||
@ -220,7 +220,7 @@ static void mi_heap_reset_pages(mi_heap_t* heap) {
|
||||
#ifdef MI_MEDIUM_DIRECT
|
||||
memset(&heap->pages_free_medium, 0, sizeof(heap->pages_free_medium));
|
||||
#endif
|
||||
_mi_memcpy(&heap->pages, &_mi_heap_empty.pages, sizeof(heap->pages));
|
||||
_mi_memcpy_aligned(&heap->pages, &_mi_heap_empty.pages, sizeof(heap->pages));
|
||||
heap->thread_delayed_free = NULL;
|
||||
heap->page_count = 0;
|
||||
}
|
||||
|
25
src/init.c
25
src/init.c
@ -102,7 +102,7 @@ const mi_page_t _mi_page_empty = {
|
||||
// may lead to allocation itself on some platforms)
|
||||
// --------------------------------------------------------
|
||||
|
||||
const mi_heap_t _mi_heap_empty = {
|
||||
mi_decl_cache_align const mi_heap_t _mi_heap_empty = {
|
||||
NULL,
|
||||
MI_SMALL_PAGES_EMPTY,
|
||||
MI_PAGE_QUEUES_EMPTY,
|
||||
@ -120,7 +120,7 @@ const mi_heap_t _mi_heap_empty = {
|
||||
#define tld_empty_stats ((mi_stats_t*)((uint8_t*)&tld_empty + offsetof(mi_tld_t,stats)))
|
||||
#define tld_empty_os ((mi_os_tld_t*)((uint8_t*)&tld_empty + offsetof(mi_tld_t,os)))
|
||||
|
||||
static const mi_tld_t tld_empty = {
|
||||
mi_decl_cache_align static const mi_tld_t tld_empty = {
|
||||
0,
|
||||
false,
|
||||
NULL, NULL,
|
||||
@ -213,8 +213,8 @@ static bool _mi_heap_init(void) {
|
||||
// OS allocated so already zero initialized
|
||||
mi_tld_t* tld = &td->tld;
|
||||
mi_heap_t* heap = &td->heap;
|
||||
_mi_memcpy(tld, &tld_empty, sizeof(*tld));
|
||||
_mi_memcpy(heap, &_mi_heap_empty, sizeof(*heap));
|
||||
_mi_memcpy_aligned(tld, &tld_empty, sizeof(*tld));
|
||||
_mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(*heap));
|
||||
heap->thread_id = _mi_thread_id();
|
||||
_mi_random_init(&heap->random);
|
||||
heap->cookie = _mi_heap_random_next(heap) | 1;
|
||||
@ -483,6 +483,22 @@ static void mi_process_load(void) {
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64))
|
||||
#include <intrin.h>
|
||||
mi_decl_cache_align bool _mi_cpu_has_fsrm = false;
|
||||
|
||||
static void mi_detect_cpu_features(void) {
|
||||
// FSRM for fast rep movsb support (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017))
|
||||
int32_t cpu_info[4];
|
||||
__cpuid(cpu_info, 7);
|
||||
_mi_cpu_has_fsrm = ((cpu_info[3] & (1 << 4)) != 0); // bit 4 of EDX : see <https ://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features>
|
||||
}
|
||||
#else
|
||||
static void mi_detect_cpu_features(void) {
|
||||
// nothing
|
||||
}
|
||||
#endif
|
||||
|
||||
// Initialize the process; called by thread_init or the process loader
|
||||
void mi_process_init(void) mi_attr_noexcept {
|
||||
// ensure we are called once
|
||||
@ -491,6 +507,7 @@ void mi_process_init(void) mi_attr_noexcept {
|
||||
mi_process_setup_auto_thread_done();
|
||||
|
||||
_mi_verbose_message("process init: 0x%zx\n", _mi_thread_id());
|
||||
mi_detect_cpu_features();
|
||||
_mi_os_init();
|
||||
mi_heap_main_init();
|
||||
#if (MI_DEBUG)
|
||||
|
Loading…
Reference in New Issue
Block a user