Optimized memcpy/memset for x86_64.
This commit is contained in:
parent
6497f6b1ec
commit
5234e66d32
@ -113,15 +113,12 @@ extern int memcpy_generic_end;
|
||||
extern "C" void memset_generic(void* dest, int value, size_t count);
|
||||
extern int memset_generic_end;
|
||||
|
||||
// TODO x86_64
|
||||
#ifndef __x86_64__
|
||||
x86_optimized_functions gOptimizedFunctions = {
|
||||
memcpy_generic,
|
||||
&memcpy_generic_end,
|
||||
memset_generic,
|
||||
&memset_generic_end
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
static status_t
|
||||
|
@ -1,5 +1,9 @@
|
||||
SubDir HAIKU_TOP src system kernel lib arch x86_64 ;
|
||||
|
||||
# find the generated asm_offsets.h
|
||||
SubDirHdrs [ FDirName $(TARGET_COMMON_DEBUG_OBJECT_DIR) system kernel arch
|
||||
$(TARGET_KERNEL_ARCH) ] ;
|
||||
|
||||
SEARCH_SOURCE += [ FDirName $(SUBDIR) $(DOTDOT) generic ] ;
|
||||
|
||||
local librootSources = [ FDirName $(HAIKU_TOP) src system libroot ] ;
|
||||
@ -24,7 +28,12 @@ KernelMergeObject kernel_lib_posix_arch_$(TARGET_ARCH).o :
|
||||
kernel_longjmp_return.c
|
||||
kernel_setjmp_save_sigs.c
|
||||
|
||||
arch_string.cpp
|
||||
arch_string.S
|
||||
|
||||
: $(TARGET_KERNEL_PIC_CCFLAGS)
|
||||
;
|
||||
|
||||
# Explicitly tell the build system that arch_string.S includes the generated
|
||||
# asm_offsets.h.
|
||||
Includes [ FGristFiles arch_string.S ]
|
||||
: <src!system!kernel!arch!x86>asm_offsets.h ;
|
||||
|
96
src/system/kernel/lib/arch/x86_64/arch_string.S
Normal file
96
src/system/kernel/lib/arch/x86_64/arch_string.S
Normal file
@ -0,0 +1,96 @@
|
||||
/*
|
||||
* Copyright 2012, Alex Smith, alex@alex-smith.me.uk.
|
||||
* Distributed under the terms of the MIT License.
|
||||
*/
|
||||
|
||||
|
||||
#include <asm_defs.h>
|
||||
|
||||
#include "asm_offsets.h"
|
||||
|
||||
|
||||
.align 8
|
||||
FUNCTION(memcpy_generic):
|
||||
push %rbp
|
||||
movq %rsp, %rbp
|
||||
|
||||
// Preserve original destination address for return value.
|
||||
movq %rdi, %rax
|
||||
|
||||
// size -> %rcx
|
||||
movq %rdx, %rcx
|
||||
|
||||
// For small copies, always do it bytewise, the additional overhead is
|
||||
// not worth it.
|
||||
cmp $24, %rcx
|
||||
jl .Lmemcpy_generic_byte_copy
|
||||
|
||||
// Do both source and dest have the same alignment?
|
||||
movq %rsi, %r8
|
||||
xorq %rdi, %r8
|
||||
test $7, %r8
|
||||
jnz .Lmemcpy_generic_byte_copy
|
||||
|
||||
// Align up to an 8-byte boundary.
|
||||
movq %rdi, %r8
|
||||
andq $7, %r8
|
||||
jz .Lmemcpy_generic_qword_copy
|
||||
movq $8, %rcx
|
||||
subq %r8, %rcx
|
||||
subq %rcx, %rdx // Subtract from the overall count.
|
||||
rep
|
||||
movsb
|
||||
|
||||
// Get back the original count value.
|
||||
movq %rdx, %rcx
|
||||
.Lmemcpy_generic_qword_copy:
|
||||
// Move by quadwords.
|
||||
shrq $3, %rcx
|
||||
rep
|
||||
movsq
|
||||
|
||||
// Get the remaining count.
|
||||
movq %rdx, %rcx
|
||||
andq $7, %rcx
|
||||
.Lmemcpy_generic_byte_copy:
|
||||
// Move any remaining data by bytes.
|
||||
rep
|
||||
movsb
|
||||
|
||||
pop %rbp
|
||||
ret
|
||||
FUNCTION_END(memcpy_generic)
|
||||
SYMBOL(memcpy_generic_end):
|
||||
|
||||
|
||||
.align 8
|
||||
FUNCTION(memset_generic):
|
||||
push %rbp
|
||||
movq %rsp, %rbp
|
||||
|
||||
// Preserve original destination address for return value.
|
||||
movq %rdi, %r8
|
||||
|
||||
// size -> %rcx, value -> %al
|
||||
movq %rdx, %rcx
|
||||
movl %esi, %eax
|
||||
|
||||
// Move by bytes.
|
||||
rep
|
||||
stosb
|
||||
|
||||
movq %r8, %rax
|
||||
pop %rbp
|
||||
ret
|
||||
FUNCTION_END(memset_generic)
|
||||
SYMBOL(memset_generic_end):
|
||||
|
||||
|
||||
FUNCTION(memcpy):
|
||||
jmp *(gOptimizedFunctions + X86_OPTIMIZED_FUNCTIONS_memcpy)
|
||||
FUNCTION_END(memcpy)
|
||||
|
||||
FUNCTION(memset):
|
||||
jmp *(gOptimizedFunctions + X86_OPTIMIZED_FUNCTIONS_memset)
|
||||
FUNCTION_END(memset)
|
||||
|
@ -1,36 +0,0 @@
|
||||
/*
|
||||
* Copyright 2012, Alex Smith, alex@alex-smith.me.uk.
|
||||
* Distributed under the terms of the MIT License.
|
||||
*/
|
||||
|
||||
// TODO: Replace these with optimized implementations.
|
||||
|
||||
|
||||
#include <string.h>
|
||||
|
||||
|
||||
void *
|
||||
memcpy(void *dest, const void *src, size_t count)
|
||||
{
|
||||
const unsigned char *s = reinterpret_cast<const unsigned char *>(src);
|
||||
unsigned char *d = reinterpret_cast<unsigned char *>(dest);
|
||||
|
||||
for (; count != 0; count--) {
|
||||
*d++ = *s++;
|
||||
}
|
||||
|
||||
return dest;
|
||||
}
|
||||
|
||||
|
||||
void *
|
||||
memset(void *dest, int val, size_t count)
|
||||
{
|
||||
unsigned char *d = reinterpret_cast<unsigned char *>(dest);
|
||||
|
||||
for (; count != 0; count--) {
|
||||
*d++ = static_cast<unsigned char>(val);
|
||||
}
|
||||
|
||||
return dest;
|
||||
}
|
Loading…
Reference in New Issue
Block a user