Optimized memcpy/memset for x86_64.

This commit is contained in:
Alex Smith 2012-07-21 11:55:13 +01:00
parent 6497f6b1ec
commit 5234e66d32
4 changed files with 106 additions and 40 deletions

View File

@ -113,15 +113,12 @@ extern int memcpy_generic_end;
extern "C" void memset_generic(void* dest, int value, size_t count);
extern int memset_generic_end;
// TODO x86_64
#ifndef __x86_64__
x86_optimized_functions gOptimizedFunctions = {
memcpy_generic,
&memcpy_generic_end,
memset_generic,
&memset_generic_end
};
#endif
static status_t

View File

@ -1,5 +1,9 @@
SubDir HAIKU_TOP src system kernel lib arch x86_64 ;
# find the generated asm_offsets.h
SubDirHdrs [ FDirName $(TARGET_COMMON_DEBUG_OBJECT_DIR) system kernel arch
$(TARGET_KERNEL_ARCH) ] ;
SEARCH_SOURCE += [ FDirName $(SUBDIR) $(DOTDOT) generic ] ;
local librootSources = [ FDirName $(HAIKU_TOP) src system libroot ] ;
@ -24,7 +28,12 @@ KernelMergeObject kernel_lib_posix_arch_$(TARGET_ARCH).o :
kernel_longjmp_return.c
kernel_setjmp_save_sigs.c
arch_string.cpp
arch_string.S
: $(TARGET_KERNEL_PIC_CCFLAGS)
;
# Explicitly tell the build system that arch_string.S includes the generated
# asm_offsets.h.
Includes [ FGristFiles arch_string.S ]
: <src!system!kernel!arch!x86>asm_offsets.h ;

View File

@ -0,0 +1,96 @@
/*
* Copyright 2012, Alex Smith, alex@alex-smith.me.uk.
* Distributed under the terms of the MIT License.
*/
#include <asm_defs.h>
#include "asm_offsets.h"
.align 8
FUNCTION(memcpy_generic):
push %rbp
movq %rsp, %rbp
// Preserve original destination address for return value.
movq %rdi, %rax
// size -> %rcx
movq %rdx, %rcx
// For small copies, always do it bytewise, the additional overhead is
// not worth it.
cmp $24, %rcx
jl .Lmemcpy_generic_byte_copy
// Do both source and dest have the same alignment?
movq %rsi, %r8
xorq %rdi, %r8
test $7, %r8
jnz .Lmemcpy_generic_byte_copy
// Align up to an 8-byte boundary.
movq %rdi, %r8
andq $7, %r8
jz .Lmemcpy_generic_qword_copy
movq $8, %rcx
subq %r8, %rcx
subq %rcx, %rdx // Subtract from the overall count.
rep
movsb
// Get back the original count value.
movq %rdx, %rcx
.Lmemcpy_generic_qword_copy:
// Move by quadwords.
shrq $3, %rcx
rep
movsq
// Get the remaining count.
movq %rdx, %rcx
andq $7, %rcx
.Lmemcpy_generic_byte_copy:
// Move any remaining data by bytes.
rep
movsb
pop %rbp
ret
FUNCTION_END(memcpy_generic)
SYMBOL(memcpy_generic_end):
.align 8
FUNCTION(memset_generic):
push %rbp
movq %rsp, %rbp
// Preserve original destination address for return value.
movq %rdi, %r8
// size -> %rcx, value -> %al
movq %rdx, %rcx
movl %esi, %eax
// Move by bytes.
rep
stosb
movq %r8, %rax
pop %rbp
ret
FUNCTION_END(memset_generic)
SYMBOL(memset_generic_end):
FUNCTION(memcpy):
jmp *(gOptimizedFunctions + X86_OPTIMIZED_FUNCTIONS_memcpy)
FUNCTION_END(memcpy)
FUNCTION(memset):
jmp *(gOptimizedFunctions + X86_OPTIMIZED_FUNCTIONS_memset)
FUNCTION_END(memset)

View File

@ -1,36 +0,0 @@
/*
* Copyright 2012, Alex Smith, alex@alex-smith.me.uk.
* Distributed under the terms of the MIT License.
*/
// TODO: Replace these with optimized implementations.
#include <string.h>
void *
memcpy(void *dest, const void *src, size_t count)
{
const unsigned char *s = reinterpret_cast<const unsigned char *>(src);
unsigned char *d = reinterpret_cast<unsigned char *>(dest);
for (; count != 0; count--) {
*d++ = *s++;
}
return dest;
}
void *
memset(void *dest, int val, size_t count)
{
unsigned char *d = reinterpret_cast<unsigned char *>(dest);
for (; count != 0; count--) {
*d++ = static_cast<unsigned char>(val);
}
return dest;
}