Merge pull request #253 from haneefmubarak/memcpy-rep-movsb-windows-201

resolve #201 with a platform-selective REP MOVSB implementation
This commit is contained in:
Daan 2021-01-29 16:00:00 -08:00 committed by GitHub
commit 9b966c3492
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 29 additions and 10 deletions

View File

@ -172,6 +172,25 @@ bool _mi_page_is_valid(mi_page_t* page);
#define EOVERFLOW (75)
#endif
// ------------------------------------------------------
// Fast `memcpy()` on x86(_64) platforms unavailable
// on Windows, use REP MOVSB if necessary
// ------------------------------------------------------
#if defined(_M_IX86) || defined(_M_X64)
#include <intrin.h>
#define _mi_memcpy _mi_memcpy_rep_movsb
static inline void _mi_memcpy_rep_movsb (void *d, const void *s, size_t n) {
unsigned char* Destination = (unsigned char*) d;
unsigned const char* Source = (unsigned const char*) s;
size_t Count = n;
__movsb(Destination, Source, Count);
return;
}
#else
#define _mi_memcpy memcpy
#endif
/* -----------------------------------------------------------
Inlined definitions

View File

@ -137,7 +137,7 @@ static void* mi_heap_realloc_zero_aligned_at(mi_heap_t* heap, void* p, size_t ne
memset((uint8_t*)newp + start, 0, newsize - start);
}
}
memcpy(newp, p, (newsize > size ? size : newsize));
_mi_memcpy(newp, p, (newsize > size ? size : newsize));
mi_free(p); // only free if successful
}
return newp;

View File

@ -103,7 +103,7 @@ mi_decl_restrict unsigned short* mi_wcsdup(const unsigned short* s) mi_attr_noex
size_t size = (len+1)*sizeof(unsigned short);
unsigned short* p = (unsigned short*)mi_malloc(size);
if (p != NULL) {
memcpy(p,s,size);
_mi_memcpy(p,s,size);
}
return p;
}

View File

@ -628,7 +628,7 @@ void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero)
size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0);
memset((uint8_t*)newp + start, 0, newsize - start);
}
memcpy(newp, p, (newsize > size ? size : newsize));
_mi_memcpy(newp, p, (newsize > size ? size : newsize));
mi_free(p); // only free if successful
}
return newp;
@ -695,7 +695,7 @@ mi_decl_restrict char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_no
if (s == NULL) return NULL;
size_t n = strlen(s);
char* t = (char*)mi_heap_malloc(heap,n+1);
if (t != NULL) memcpy(t, s, n + 1);
if (t != NULL) _mi_memcpy(t, s, n + 1);
return t;
}
@ -711,7 +711,7 @@ mi_decl_restrict char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n)
mi_assert_internal(m <= n);
char* t = (char*)mi_heap_malloc(heap, m+1);
if (t == NULL) return NULL;
memcpy(t, s, m);
_mi_memcpy(t, s, m);
t[m] = 0;
return t;
}

View File

@ -193,7 +193,7 @@ mi_heap_t* mi_heap_new(void) {
mi_heap_t* bheap = mi_heap_get_backing();
mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t); // todo: OS allocate in secure mode?
if (heap==NULL) return NULL;
memcpy(heap, &_mi_heap_empty, sizeof(mi_heap_t));
_mi_memcpy(heap, &_mi_heap_empty, sizeof(mi_heap_t));
heap->tld = bheap->tld;
heap->thread_id = _mi_thread_id();
_mi_random_split(&bheap->random, &heap->random);
@ -220,7 +220,7 @@ static void mi_heap_reset_pages(mi_heap_t* heap) {
#ifdef MI_MEDIUM_DIRECT
memset(&heap->pages_free_medium, 0, sizeof(heap->pages_free_medium));
#endif
memcpy(&heap->pages, &_mi_heap_empty.pages, sizeof(heap->pages));
_mi_memcpy(&heap->pages, &_mi_heap_empty.pages, sizeof(heap->pages));
heap->thread_delayed_free = NULL;
heap->page_count = 0;
}

View File

@ -189,7 +189,7 @@ static bool _mi_heap_init(void) {
// OS allocated so already zero initialized
mi_tld_t* tld = &td->tld;
mi_heap_t* heap = &td->heap;
memcpy(heap, &_mi_heap_empty, sizeof(*heap));
_mi_memcpy(heap, &_mi_heap_empty, sizeof(*heap));
heap->thread_id = _mi_thread_id();
_mi_random_init(&heap->random);
heap->cookie = _mi_heap_random_next(heap) | 1;

View File

@ -194,7 +194,7 @@ static void mi_out_buf(const char* msg, void* arg) {
if (start+n >= MI_MAX_DELAY_OUTPUT) {
n = MI_MAX_DELAY_OUTPUT-start-1;
}
memcpy(&out_buf[start], msg, n);
_mi_memcpy(&out_buf[start], msg, n);
}
static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf, void* arg) {

View File

@ -115,7 +115,7 @@ static void chacha_init(mi_random_ctx_t* ctx, const uint8_t key[32], uint64_t no
static void chacha_split(mi_random_ctx_t* ctx, uint64_t nonce, mi_random_ctx_t* ctx_new) {
memset(ctx_new, 0, sizeof(*ctx_new));
memcpy(ctx_new->input, ctx->input, sizeof(ctx_new->input));
_mi_memcpy(ctx_new->input, ctx->input, sizeof(ctx_new->input));
ctx_new->input[12] = 0;
ctx_new->input[13] = 0;
ctx_new->input[14] = (uint32_t)nonce;