Add assembler version of strlen.
This commit is contained in:
parent
a9c86c6ce0
commit
8231ab74c5
@ -1,15 +1,15 @@
|
||||
# $NetBSD: Makefile.inc,v 1.17 2001/11/29 00:27:07 mjl Exp $
|
||||
# $NetBSD: Makefile.inc,v 1.18 2001/11/30 02:27:20 mjl Exp $
|
||||
|
||||
SRCS+= __main.c __assert.c \
|
||||
imax.c imin.c lmax.c lmin.c max.c min.c ulmax.c ulmin.c \
|
||||
bswap16.c bswap32.c bswap64.c \
|
||||
bcmp.c \
|
||||
memchr.c memcmp.c \
|
||||
strcat.c strcmp.c strcpy.c strlen.c strcasecmp.c \
|
||||
strcat.c strcmp.c strcpy.c strcasecmp.c \
|
||||
strncasecmp.c strncmp.c strncpy.c \
|
||||
scanc.c skpc.c \
|
||||
htonl.c htons.c ntohl.c ntohs.c \
|
||||
random.c strtoul.c \
|
||||
syncicache.c
|
||||
|
||||
SRCS+= ffs.S bzero.S
|
||||
SRCS+= ffs.S bzero.S strlen.S
|
||||
|
112
sys/lib/libkern/arch/powerpc/strlen.S
Normal file
112
sys/lib/libkern/arch/powerpc/strlen.S
Normal file
@ -0,0 +1,112 @@
|
||||
/* $NetBSD: strlen.S,v 1.1 2001/11/30 02:27:20 mjl Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (C) 2001 Martin J. Laubach <mjl@netbsd.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the author may not be used to endorse or promote products
|
||||
* derived from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
/*----------------------------------------------------------------------*/
|
||||
|
||||
#include <machine/asm.h>
|
||||
|
||||
/*----------------------------------------------------------------------*/
|
||||
/* The algorithm here uses the following techniques:
|
||||
|
||||
1) Given a word 'x', we can test to see if it contains any 0 bytes
|
||||
by subtracting 0x01010101, and seeing if any of the high bits of each
|
||||
byte changed from 0 to 1. This works because the least significant
|
||||
0 byte must have had no incoming carry (otherwise it's not the least
|
||||
significant), so it is 0x00 - 0x01 == 0xff. For all other
|
||||
byte values, either they have the high bit set initially, or when
|
||||
1 is subtracted you get a value in the range 0x00-0x7f, none of which
|
||||
have their high bit set. The expression here is
|
||||
(x + 0xfefefeff) & ~(x | 0x7f7f7f7f), which gives 0x00000000 when
|
||||
there were no 0x00 bytes in the word.
|
||||
|
||||
2) Given a word 'x', we can test to see _which_ byte was zero by
|
||||
calculating ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f).
|
||||
This produces 0x80 in each byte that was zero, and 0x00 in all
|
||||
the other bytes. The '| 0x7f7f7f7f' clears the low 7 bits in each
|
||||
byte, and the '| x' part ensures that bytes with the high bit set
|
||||
produce 0x00. The addition will carry into the high bit of each byte
|
||||
iff that byte had one of its low 7 bits set. We can then just see
|
||||
which was the most significant bit set and divide by 8 to find how
|
||||
many to add to the index.
|
||||
This is from the book 'The PowerPC Compiler Writer's Guide',
|
||||
by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren.
|
||||
*/
|
||||
/*----------------------------------------------------------------------*/
|
||||
|
||||
.text
|
||||
.align 4
|
||||
|
||||
ENTRY(strlen)
|
||||
|
||||
/* Setup constants */
|
||||
lis r10, 0x7f7f
|
||||
lis r9, 0xfefe
|
||||
ori r10, r10, 0x7f7f
|
||||
ori r9, r9, 0xfeff
|
||||
|
||||
/* Mask out leading bytes on non aligned strings */
|
||||
rlwinm. r8, r3, 3, 27, 28 /* leading bits to mask */
|
||||
clrrwi r5, r3, 2 /* clear low 2 addr bits */
|
||||
li r0, -1
|
||||
beq+ 3f /* skip alignment if already */
|
||||
/* aligned */
|
||||
|
||||
srw r0, r0, r8 /* make 0000...1111 mask */
|
||||
|
||||
lwz r7, 0(r5)
|
||||
nor r0, r0, r0 /* invert mask */
|
||||
or r7, r7, r0 /* make leading bytes != 0 */
|
||||
b 2f
|
||||
|
||||
3: subi r5, r5, 4
|
||||
|
||||
1: lwzu r7, 4(r5) /* fetch data word */
|
||||
|
||||
2: nor r0, r7, r10 /* do step 1 */
|
||||
add r6, r7, r9
|
||||
and. r0, r0, r6
|
||||
|
||||
beq+ 1b /* no NUL bytes here */
|
||||
|
||||
and r8, r7, r10 /* ok, a NUL is somewhere */
|
||||
or r7, r7, r10 /* do step 2 to find out */
|
||||
add r0, r8, r10 /* where */
|
||||
nor r8, r7, r0
|
||||
|
||||
cntlzw r0, r8 /* offset from this word */
|
||||
srwi r4, r0, 3
|
||||
|
||||
add r4, r5, r4 /* r4 contains end pointer */
|
||||
/* NOTE: Keep it so this function returns the end pointer
|
||||
in r4, so we can it use from other str* calls (strcat
|
||||
comes to mind */
|
||||
|
||||
subf r3, r3, r4
|
||||
blr
|
||||
|
||||
/*----------------------------------------------------------------------*/
|
Loading…
Reference in New Issue
Block a user