Assembly version of string functions.

Originally from SHIMIZU Ryo.
This commit is contained in:
itohy 2002-11-20 14:23:54 +00:00
parent 6e73936f81
commit 6cde54195b
6 changed files with 583 additions and 2 deletions

View File

@ -1,6 +1,7 @@
# $NetBSD: Makefile.inc,v 1.2 2002/08/24 06:39:48 itohy Exp $
# $NetBSD: Makefile.inc,v 1.3 2002/11/20 14:23:54 itohy Exp $
SRCS+= bcmp.c bcopy.c bzero.c ffs.S index.c memchr.c memcmp.c memset.c \
SRCS+= bcmp.c bcopy.S bzero.S ffs.S index.c memchr.c memcmp.c memset.S \
rindex.c strcat.c strcmp.c strcpy.c strcspn.c strlen.c \
strncat.c strncmp.c strncpy.c strpbrk.c strsep.c \
strspn.c strstr.c swab.c
SRCS+= memcpy.S memmove.S

View File

@ -0,0 +1,269 @@
/* $NetBSD: bcopy.S,v 1.1 2002/11/20 14:23:54 itohy Exp $ */
/*
* Copyright (c) 2000 SHIMIZU Ryo <ryo@misakimix.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <machine/asm.h>
#if defined(LIBC_SCCS) && !defined(lint)
RCSID("$NetBSD: bcopy.S,v 1.1 2002/11/20 14:23:54 itohy Exp $")
#endif
#if defined(MEMCOPY) || defined(MEMMOVE)
#define REG_DST0 r3
#define REG_SRC r5
#define REG_DST r4
#else
#define REG_SRC r4
#define REG_DST r5
#endif
#define REG_LEN r6
#ifdef MEMCOPY
ENTRY(memcpy)
#else
#ifdef MEMMOVE
ENTRY(memmove)
#else
ENTRY(bcopy)
#endif
#endif
#ifdef REG_DST0
mov REG_DST,REG_DST0
#endif
cmp/eq REG_DST,REG_SRC /* if ( src == dst ) return; */
bt/s bcopy_return
cmp/hi REG_DST,REG_SRC
bf/s bcopy_overlap
mov REG_SRC,r0
xor REG_DST,r0
and #3,r0
mov r0,r1
tst r0,r0 /* (src ^ dst) & 3 */
bf/s word_align
longword_align:
tst REG_LEN,REG_LEN /* if ( len==0 ) return; */
bt/s bcopy_return
mov REG_SRC,r0
tst #1,r0 /* if ( src & 1 ) */
bt 1f
mov.b @REG_SRC+,r0 /* *dst++ = *src++; */
add #-1,REG_LEN
mov.b r0,@REG_DST
add #1,REG_DST
1:
mov #1,r0
cmp/hi r0,REG_LEN /* if ( (len > 1) && */
bf/s 1f
mov REG_SRC,r0
tst #2,r0 /* (src & 2) { */
bt 1f
mov.w @REG_SRC+,r0 /* *((unsigned short*)dst)++ = *((unsigned short*)src)++; */
add #-2,REG_LEN /* len -= 2; */
mov.w r0,@REG_DST
add #2,REG_DST /* } */
1:
mov #3,r1
cmp/hi r1,REG_LEN /* while ( len > 3 ) { */
bf/s no_align_delay
tst REG_LEN,REG_LEN
2:
mov.l @REG_SRC+,r0 /* *((unsigned long*)dst)++ = *((unsigned long*)src)++; */
add #-4,REG_LEN /* len -= 4; */
mov.l r0,@REG_DST
cmp/hi r1,REG_LEN
bt/s 2b
add #4,REG_DST /* } */
bra no_align_delay
tst REG_LEN,REG_LEN
word_align:
mov r1,r0
tst #1,r0
bf/s no_align_delay
tst REG_LEN,REG_LEN /* if ( len == 0 ) return; */
bt bcopy_return
mov REG_SRC,r0 /* if ( src & 1 ) */
tst #1,r0
bt 1f
mov.b @REG_SRC+,r0 /* *dst++ = *src++; */
add #-1,REG_LEN
mov.b r0,@REG_DST
add #1,REG_DST
1:
mov #1,r1
cmp/hi r1,REG_LEN /* while ( len > 1 ) { */
bf/s no_align_delay
tst REG_LEN,REG_LEN
2:
mov.w @REG_SRC+,r0 /* *((unsigned short*)dst)++ = *((unsigned short*)src)++; */
add #-2,REG_LEN /* len -= 2; */
mov.w r0,@REG_DST
cmp/hi r1,REG_LEN
bt/s 2b
add #2,REG_DST /* } */
no_align:
tst REG_LEN,REG_LEN /* while ( len!= ) { */
no_align_delay:
bt bcopy_return
1:
mov.b @REG_SRC+,r0 /* *dst++ = *src++; */
add #-1,REG_LEN /* len--; */
mov.b r0,@REG_DST
tst REG_LEN,REG_LEN
bf/s 1b
add #1,REG_DST /* } */
bcopy_return:
rts
#ifdef REG_DST0
mov REG_DST0,r0
#else
nop
#endif
bcopy_overlap:
add REG_LEN,REG_SRC
add REG_LEN,REG_DST
mov REG_SRC,r0
xor REG_DST,r0
and #3,r0
mov r0,r1
tst r0,r0 /* (src ^ dst) & 3 */
bf/s ov_word_align
ov_longword_align:
tst REG_LEN,REG_LEN /* if ( len==0 ) return; */
bt/s bcopy_return
mov REG_SRC,r0
tst #1,r0 /* if ( src & 1 ) */
bt 1f
add #-1,REG_SRC /* *--dst = *--src; */
mov.b @REG_SRC,r0
mov.b r0,@-REG_DST
add #-1,REG_LEN
1:
mov #1,r0
cmp/hi r0,REG_LEN /* if ( (len > 1) && */
bf/s 1f
mov REG_SRC,r0
tst #2,r0 /* (src & 2) { */
bt 1f
add #-2,REG_SRC /* *--((unsigned short*)dst) = *--((unsigned short*)src); */
mov.w @REG_SRC,r0
add #-2,REG_LEN /* len -= 2; */
mov.w r0,@-REG_DST /* } */
1:
mov #3,r1
cmp/hi r1,REG_LEN /* while ( len > 3 ) { */
bf/s ov_no_align_delay
tst REG_LEN,REG_LEN
2:
add #-4,REG_SRC
mov.l @REG_SRC,r0 /* *((unsigned long*)dst)++ = *((unsigned long*)src)++; */
add #-4,REG_LEN /* len -= 4; */
cmp/hi r1,REG_LEN
bt/s 2b
mov.l r0,@-REG_DST /* } */
bra ov_no_align_delay
tst REG_LEN,REG_LEN
ov_word_align:
mov r1,r0
tst #1,r0
bf/s ov_no_align_delay
tst REG_LEN,REG_LEN /* if ( len == 0 ) return; */
bt bcopy_return
mov REG_SRC,r0 /* if ( src & 1 ) */
tst #1,r0
bt 1f
add #-1,REG_SRC
mov.b @REG_SRC,r0 /* *--dst = *--src; */
add #-1,REG_LEN
mov.b r0,@-REG_DST
1:
mov #1,r1
cmp/hi r1,REG_LEN /* while ( len > 1 ) { */
bf/s ov_no_align_delay
tst REG_LEN,REG_LEN
2:
add #-2,REG_SRC
mov.w @REG_SRC,r0 /* *--((unsigned short*)dst) = *--((unsigned short*)src); */
add #-2,REG_LEN /* len -= 2; */
cmp/hi r1,REG_LEN
bt/s 2b
mov.w r0,@-REG_DST /* } */
ov_no_align:
tst REG_LEN,REG_LEN /* while ( len!= ) { */
ov_no_align_delay:
bt 9f
1:
add #-1,REG_SRC
mov.b @REG_SRC,r0 /* *--dst = *--src; */
add #-1,REG_LEN /* len--; */
tst REG_LEN,REG_LEN
bf/s 1b
mov.b r0,@-REG_DST /* } */
9:
rts
#ifdef REG_DST0
mov REG_DST0,r0
#else
nop
#endif

View File

@ -0,0 +1,5 @@
/* $NetBSD: bzero.S,v 1.1 2002/11/20 14:23:54 itohy Exp $ */
#define BZERO
#include "memset.S"

View File

@ -0,0 +1,4 @@
/* $NetBSD: memcpy.S,v 1.1 2002/11/20 14:23:54 itohy Exp $ */
#define MEMCOPY
#include "bcopy.S"

View File

@ -0,0 +1,4 @@
/* $NetBSD: memmove.S,v 1.1 2002/11/20 14:23:54 itohy Exp $ */
#define MEMMOVE
#include "bcopy.S"

View File

@ -0,0 +1,298 @@
/* $NetBSD: memset.S,v 1.1 2002/11/20 14:23:54 itohy Exp $ */
/*-
* Copyright (c) 2002 SHIMIZU Ryo. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <machine/asm.h>
#if defined(LIBC_SCCS) && !defined(lint)
RCSID("$NetBSD: memset.S,v 1.1 2002/11/20 14:23:54 itohy Exp $")
#endif
#define REG_PTR r0
#define REG_TMP1 r1
#ifdef BZERO
# define REG_C r2
# define REG_DST r4
# define REG_LEN r5
#else
# define REG_DST0 r3
# define REG_DST r4
# define REG_C r5
# define REG_LEN r6
#endif
#ifdef BZERO
ENTRY(bzero)
#else
ENTRY(memset)
mov REG_DST,REG_DST0 /* for return value */
#endif
/* small amount to fill ? */
mov #28,REG_TMP1
cmp/hs REG_TMP1,REG_LEN /* if (len >= 28) goto large; */
bt/s large
mov #12,REG_TMP1 /* if (len >= 12) goto small; */
cmp/hs REG_TMP1,REG_LEN
bt/s small
#ifdef BZERO
mov #0,REG_C
#endif
/* very little fill (0 ~ 11 bytes) */
tst REG_LEN,REG_LEN
add REG_DST,REG_LEN
bt/s done
add #1,REG_DST
/* unroll 4 loops */
cmp/eq REG_DST,REG_LEN
1: mov.b REG_C,@-REG_LEN
bt/s done
cmp/eq REG_DST,REG_LEN
mov.b REG_C,@-REG_LEN
bt/s done
cmp/eq REG_DST,REG_LEN
mov.b REG_C,@-REG_LEN
bt/s done
cmp/eq REG_DST,REG_LEN
mov.b REG_C,@-REG_LEN
bf/s 1b
cmp/eq REG_DST,REG_LEN
done:
#ifdef BZERO
rts
nop
#else
rts
mov REG_DST0,r0
#endif
small:
mov REG_DST,r0
tst #1,r0
bt/s small_aligned
mov REG_DST,REG_TMP1
shll REG_LEN
mova 1f,r0 /* 1f must be 4bytes aligned! */
add #16,REG_TMP1 /* REG_TMP1 = dst+16; */
sub REG_LEN,r0
jmp @r0
mov REG_C,r0
.align 2
mov.b r0,@(15,REG_TMP1)
mov.b r0,@(14,REG_TMP1)
mov.b r0,@(13,REG_TMP1)
mov.b r0,@(12,REG_TMP1)
mov.b r0,@(11,REG_TMP1)
mov.b r0,@(10,REG_TMP1)
mov.b r0,@(9,REG_TMP1)
mov.b r0,@(8,REG_TMP1)
mov.b r0,@(7,REG_TMP1)
mov.b r0,@(6,REG_TMP1)
mov.b r0,@(5,REG_TMP1)
mov.b r0,@(4,REG_TMP1)
mov.b r0,@(3,REG_TMP1)
mov.b r0,@(2,REG_TMP1)
mov.b r0,@(1,REG_TMP1)
mov.b r0,@REG_TMP1
mov.b r0,@(15,REG_DST)
mov.b r0,@(14,REG_DST)
mov.b r0,@(13,REG_DST)
mov.b r0,@(12,REG_DST)
mov.b r0,@(11,REG_DST)
mov.b r0,@(10,REG_DST)
mov.b r0,@(9,REG_DST)
mov.b r0,@(8,REG_DST)
mov.b r0,@(7,REG_DST)
mov.b r0,@(6,REG_DST)
mov.b r0,@(5,REG_DST)
mov.b r0,@(4,REG_DST)
mov.b r0,@(3,REG_DST)
mov.b r0,@(2,REG_DST)
mov.b r0,@(1,REG_DST)
#ifdef BZERO
rts
1: mov.b r0,@REG_DST
#else
mov.b r0,@REG_DST
1: rts
mov REG_DST0,r0
#endif
/* 2 bytes aligned small fill */
small_aligned:
#ifndef BZERO
extu.b REG_C,REG_TMP1 /* REG_C = ??????xx, REG_TMP1 = ????00xx */
shll8 REG_C /* REG_C = ????xx00, REG_TMP1 = ????00xx */
or REG_TMP1,REG_C /* REG_C = ????xxxx */
#endif
mov REG_LEN,r0
tst #1,r0 /* len is aligned? */
bt/s 1f
add #-1,r0
mov.b REG_C,@(r0,REG_DST) /* fill last a byte */
mov r0,REG_LEN
1:
mova 1f,r0 /* 1f must be 4bytes aligned! */
sub REG_LEN,r0
jmp @r0
mov REG_C,r0
.align 2
mov.w r0,@(30,REG_DST)
mov.w r0,@(28,REG_DST)
mov.w r0,@(26,REG_DST)
mov.w r0,@(24,REG_DST)
mov.w r0,@(22,REG_DST)
mov.w r0,@(20,REG_DST)
mov.w r0,@(18,REG_DST)
mov.w r0,@(16,REG_DST)
mov.w r0,@(14,REG_DST)
mov.w r0,@(12,REG_DST)
mov.w r0,@(10,REG_DST)
mov.w r0,@(8,REG_DST)
mov.w r0,@(6,REG_DST)
mov.w r0,@(4,REG_DST)
mov.w r0,@(2,REG_DST)
#ifdef BZERO
rts
1: mov.w r0,@REG_DST
#else
mov.w r0,@REG_DST
1: rts
mov REG_DST0,r0
#endif
.align 2
large:
#ifdef BZERO
mov #0,REG_C
#else
extu.b REG_C,REG_TMP1 /* REG_C = ??????xx, REG_TMP1 = ????00xx */
shll8 REG_C /* REG_C = ????xx00, REG_TMP1 = ????00xx */
or REG_C,REG_TMP1 /* REG_C = ????xx00, REG_TMP1 = ????xxxx */
swap.w REG_TMP1,REG_C /* REG_C = xxxx????, REG_TMP1 = ????xxxx */
xtrct REG_TMP1,REG_C /* REG_C = xxxxxxxx */
#endif
mov #3,REG_TMP1
tst REG_TMP1,REG_DST
mov REG_DST,REG_PTR
bf/s unaligned_dst
add REG_LEN,REG_PTR /* REG_PTR = dst + len; */
tst REG_TMP1,REG_LEN
bf/s unaligned_len
aligned:
/* fill 32*n bytes */
mov #32,REG_TMP1
cmp/hi REG_LEN,REG_TMP1
bt 9f
.align 2
1: sub REG_TMP1,REG_PTR
mov.l REG_C,@REG_PTR
sub REG_TMP1,REG_LEN
mov.l REG_C,@(4,REG_PTR)
cmp/hi REG_LEN,REG_TMP1
mov.l REG_C,@(8,REG_PTR)
mov.l REG_C,@(12,REG_PTR)
mov.l REG_C,@(16,REG_PTR)
mov.l REG_C,@(20,REG_PTR)
mov.l REG_C,@(24,REG_PTR)
bf/s 1b
mov.l REG_C,@(28,REG_PTR)
9:
/* fill left 4*n bytes */
cmp/eq REG_DST,REG_PTR
bt 9f
add #4,REG_DST
cmp/eq REG_DST,REG_PTR
1: mov.l REG_C,@-REG_PTR
bt/s 9f
cmp/eq REG_DST,REG_PTR
mov.l REG_C,@-REG_PTR
bt/s 9f
cmp/eq REG_DST,REG_PTR
mov.l REG_C,@-REG_PTR
bt/s 9f
cmp/eq REG_DST,REG_PTR
mov.l REG_C,@-REG_PTR
bf/s 1b
cmp/eq REG_DST,REG_PTR
9:
#ifdef BZERO
rts
nop
#else
rts
mov REG_DST0,r0
#endif
unaligned_dst:
mov #1,REG_TMP1
tst REG_TMP1,REG_DST /* if (dst & 1) { */
add #1,REG_TMP1
bt/s 2f
tst REG_TMP1,REG_DST
mov.b REG_C,@REG_DST /* *dst++ = c; */
add #1,REG_DST
tst REG_TMP1,REG_DST
2: /* } */
/* if (dst & 2) { */
bt 4f
mov.w REG_C,@REG_DST /* *(u_int16_t*)dst++ = c; */
add #2,REG_DST
4: /* } */
tst #3,REG_PTR /* if (ptr & 3) { */
bt/s 4f /* */
unaligned_len:
tst #1,REG_PTR /* if (ptr & 1) { */
bt/s 2f
tst #2,REG_PTR
mov.b REG_C,@-REG_PTR /* --ptr = c; */
2: /* } */
/* if (ptr & 2) { */
bt 4f
mov.w REG_C,@-REG_PTR /* *--(u_int16_t*)ptr = c; */
4: /* } */
/* } */
mov REG_PTR,REG_LEN
bra aligned
sub REG_DST,REG_LEN