Significantly faster memcpy/memmove/bcopy and memset/bzero

This commit is contained in:
dsl 2003-04-15 22:49:50 +00:00
parent 7c8e4cdaea
commit c327a133c6
5 changed files with 234 additions and 173 deletions

View File

@ -1,101 +1,5 @@
/*-
* Copyright (c) 1990 The Regents of the University of California.
* All rights reserved.
*
* This code is derived from locore.s.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/* $NetBSD: bcopy.S,v 1.4 2003/04/15 22:49:50 dsl Exp $ */
#include <machine/asm.h>
#if defined(LIBC_SCCS)
RCSID("$NetBSD: bcopy.S,v 1.3 2002/07/10 06:02:09 kent Exp $")
#endif
/*
* (ov)bcopy (src,dst,cnt)
* ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
*/
#ifdef MEMCOPY
ENTRY(memcpy)
#else
#ifdef MEMMOVE
ENTRY(memmove)
#else
ENTRY(bcopy)
#endif
#endif
pushl %esi
pushl %edi
#if defined(MEMCOPY) || defined(MEMMOVE)
movl 12(%esp),%edi
movl 16(%esp),%esi
movl %edi,%eax /* return value */
#else
movl 12(%esp),%esi
movl 16(%esp),%edi
#endif
movl 20(%esp),%ecx
movl %edi,%edx
subl %esi,%edx
cmpl %ecx,%edx /* overlapping? */
movl %ecx,%edx
jb 1f
cld /* nope, copy forwards. */
shrl $2,%ecx /* copy by words */
rep
movsl
movl %edx,%ecx
andl $3,%ecx /* any bytes left? */
rep
movsb
popl %edi
popl %esi
ret
1:
addl %ecx,%edi /* copy backwards. */
addl %ecx,%esi
std
andl $3,%ecx /* any fractional bytes? */
decl %edi
decl %esi
rep
movsb
movl %edx,%ecx /* copy remainder by words */
shrl $2,%ecx
subl $3,%esi
subl $3,%edi
rep
movsl
popl %edi
popl %esi
cld
ret
#define BCOPY
#define NO_OVERLAP
#include "memcpy.S"

View File

@ -1,46 +1,10 @@
/*
* Written by J.T. Conklin <jtc@netbsd.org>.
* Public domain.
*/
/* $NetBSD: bzero.S,v 1.7 2003/04/15 22:49:50 dsl Exp $ */
#include <machine/asm.h>
#if defined(LIBC_SCCS)
RCSID("$NetBSD: bzero.S,v 1.6 1998/02/22 08:14:57 mycroft Exp $")
RCSID("$NetBSD: bzero.S,v 1.7 2003/04/15 22:49:50 dsl Exp $")
#endif
ENTRY(bzero)
pushl %edi
movl 8(%esp),%edi
movl 12(%esp),%edx
cld /* set fill direction forward */
xorl %eax,%eax /* set fill data to 0 */
/*
* if the string is too short, it's really not worth the overhead
* of aligning to word boundries, etc. So we jump to a plain
* unaligned set.
*/
cmpl $16,%edx
jb L1
movl %edi,%ecx /* compute misalignment */
negl %ecx
andl $3,%ecx
subl %ecx,%edx
rep /* zero until word aligned */
stosb
movl %edx,%ecx /* zero by words */
shrl $2,%ecx
andl $3,%edx
rep
stosl
L1: movl %edx,%ecx /* zero remainder by bytes */
rep
stosb
popl %edi
ret
#define BZERO
#include "memset.S"

View File

@ -1,4 +1,136 @@
/* $NetBSD: memcpy.S,v 1.2 1998/02/22 08:14:58 mycroft Exp $ */
/* $NetBSD: memcpy.S,v 1.3 2003/04/15 22:49:50 dsl Exp $ */
#define MEMCOPY
#include "bcopy.S"
/*-
* Copyright (c) 1990 The Regents of the University of California.
* All rights reserved.
*
* This code is derived from locore.s.
* Optimised by David Laight 2003
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <machine/asm.h>
#if defined(LIBC_SCCS)
RCSID("$NetBSD: memcpy.S,v 1.3 2003/04/15 22:49:50 dsl Exp $")
#endif
/*
* (ov)bcopy (src,dst,cnt)
* ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
*/
#ifdef BCOPY
ENTRY(bcopy)
#else
#ifdef MEMMOVE
ENTRY(memmove)
#else
#define NO_OVERLAP
ENTRY(memcpy)
#endif
#endif
push %esi
mov %edi,%edx
#if defined(BCOPY)
movl 8(%esp),%esi
movl 12(%esp),%edi
#else
movl 8(%esp),%edi
movl 12(%esp),%esi
#endif
movl 16(%esp),%ecx
#if defined(NO_OVERLAP)
movl %ecx,%eax
#else
movl %edi,%eax
subl %esi,%eax
cmpl %ecx,%eax /* overlapping? */
movl %ecx,%eax
jb backwards
#endif
cld /* nope, copy forwards. */
shrl $2,%ecx /* copy by words */
rep
movsl
and $3,%eax /* any bytes left? */
jnz trailing
done:
#if defined(MEMCPY) || defined(MEMMOVE)
movl 8(%esp),%eax
#endif
mov %edx,%edi
pop %esi
ret
trailing:
cmp $2,%eax
jb 1f
movw (%esi),%ax
movw %ax,(%edi)
je done
movb 2(%esi),%al
movb %al,2(%edi)
jmp done
1: movb (%esi),%al
movb %al,(%edi)
jmp done
#if !defined(NO_OVERLAP)
backwards:
addl %ecx,%edi /* copy backwards. */
addl %ecx,%esi
and $3,%eax /* any fractional bytes? */
jnz back_align
back_aligned:
shrl $2,%ecx
subl $4,%esi
subl $4,%edi
std
rep
movsl
cld
jmp done
back_align:
sub %eax,%esi
sub %eax,%edi
cmp $2,%eax
jb 1f
je 2f
movb 2(%esi),%al
movb %al,2(%edi)
2: movw (%esi),%ax
movw %ax,(%edi)
jmp back_aligned
1: movb (%esi),%al
movb %al,(%edi)
jmp back_aligned
#endif

View File

@ -1,4 +1,4 @@
/* $NetBSD: memmove.S,v 1.1 1998/08/04 01:41:26 perry Exp $ */
/* $NetBSD: memmove.S,v 1.2 2003/04/15 22:49:50 dsl Exp $ */
#define MEMMOVE
#include "bcopy.S"
#include "memcpy.S"

View File

@ -1,58 +1,119 @@
/*
* Written by J.T. Conklin <jtc@netbsd.org>.
* Public domain.
/* $NetBSD: memset.S,v 1.5 2003/04/15 22:49:50 dsl Exp $ */
/*-
* Copyright (c) 2003 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by David Laight.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the NetBSD
* Foundation, Inc. and its contributors.
* 4. Neither the name of The NetBSD Foundation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <machine/asm.h>
#if defined(LIBC_SCCS)
RCSID("$NetBSD: memset.S,v 1.4 1998/02/22 08:14:58 mycroft Exp $")
RCSID("$NetBSD: memset.S,v 1.5 2003/04/15 22:49:50 dsl Exp $")
#endif
#ifdef BZERO
ENTRY(bzero)
#else
ENTRY(memset)
#endif
#ifdef BZERO
movl 8(%esp),%ecx
xor %eax,%eax
#else
movl 12(%esp),%ecx
movzbl 8(%esp),%eax /* unsigned char, zero extend */
#endif
#ifndef LIBKERN_OPTIMISE_SPACE
cmpl $0x0f,%ecx /* avoid mispredicted branch... */
#endif
pushl %edi
pushl %ebx
movl 12(%esp),%edi
movzbl 16(%esp),%eax /* unsigned char, zero extend */
movl 20(%esp),%ecx
pushl %edi /* push address of buffer */
movl 8(%esp),%edi
cld /* set fill direction forward */
#ifndef LIBKERN_OPTIMISE_SPACE
/*
* if the string is too short, it's really not worth the overhead
* of aligning to word boundries, etc. So we jump to a plain
* unaligned set.
*
* NB aligning the transfer is actually pointless on my athlon 700,
* It does make a difference to a PII though.
*
* The PII, PIII and PIV allseem to have a massive performance
* drop when the initial target address is an odd multiple of 4.
*/
cmpl $0x0f,%ecx
jle L1
jbe by_bytes
#ifndef BZERO
movb %al,%ah /* copy char to all bytes in word */
movl %eax,%edx
sall $16,%eax
orl %edx,%eax
#endif
movl %edi,%edx /* compute misalignment */
negl %edx
andl $3,%edx
movl %ecx,%ebx
subl %edx,%ebx
movl %edx,%ecx /* set until word aligned */
rep
stosb
movl %ebx,%ecx
shrl $2,%ecx /* set by words */
movl %edi,%edx /* detect misalignment */
neg %edx
andl $7,%edx
jnz align
aligned:
movl %eax,-4(%edi,%ecx) /* zap last 4 bytes */
shrl $2,%ecx /* zero by words */
rep
stosl
done:
#ifndef BZERO
movl 8(%esp),%eax /* return address of buffer */
#endif
pop %edi
ret
movl %ebx,%ecx /* set remainder by bytes */
andl $3,%ecx
L1: rep
align:
movl %eax,(%edi) /* zap first 8 bytes */
movl %eax,4(%edi)
subl %edx,%ecx /* remove from main count */
add %edx,%edi
jmp aligned
by_bytes:
#endif /* LIBKERN_OPTIMISE_SPACE */
rep
stosb
popl %eax /* pop address of buffer */
popl %ebx
#ifndef BZERO
movl 8(%esp),%eax /* return address of buffer */
#endif
popl %edi
ret