From 8231ab74c5927d32fca5870b661ac5d14c3b9430 Mon Sep 17 00:00:00 2001 From: mjl Date: Fri, 30 Nov 2001 02:27:20 +0000 Subject: [PATCH] Add assembler version of strlen. --- sys/lib/libkern/arch/powerpc/Makefile.inc | 6 +- sys/lib/libkern/arch/powerpc/strlen.S | 112 ++++++++++++++++++++++ 2 files changed, 115 insertions(+), 3 deletions(-) create mode 100644 sys/lib/libkern/arch/powerpc/strlen.S diff --git a/sys/lib/libkern/arch/powerpc/Makefile.inc b/sys/lib/libkern/arch/powerpc/Makefile.inc index 9fb9291ef33a..b9030695c9b8 100644 --- a/sys/lib/libkern/arch/powerpc/Makefile.inc +++ b/sys/lib/libkern/arch/powerpc/Makefile.inc @@ -1,15 +1,15 @@ -# $NetBSD: Makefile.inc,v 1.17 2001/11/29 00:27:07 mjl Exp $ +# $NetBSD: Makefile.inc,v 1.18 2001/11/30 02:27:20 mjl Exp $ SRCS+= __main.c __assert.c \ imax.c imin.c lmax.c lmin.c max.c min.c ulmax.c ulmin.c \ bswap16.c bswap32.c bswap64.c \ bcmp.c \ memchr.c memcmp.c \ - strcat.c strcmp.c strcpy.c strlen.c strcasecmp.c \ + strcat.c strcmp.c strcpy.c strcasecmp.c \ strncasecmp.c strncmp.c strncpy.c \ scanc.c skpc.c \ htonl.c htons.c ntohl.c ntohs.c \ random.c strtoul.c \ syncicache.c -SRCS+= ffs.S bzero.S +SRCS+= ffs.S bzero.S strlen.S diff --git a/sys/lib/libkern/arch/powerpc/strlen.S b/sys/lib/libkern/arch/powerpc/strlen.S new file mode 100644 index 000000000000..4276e68b64ec --- /dev/null +++ b/sys/lib/libkern/arch/powerpc/strlen.S @@ -0,0 +1,112 @@ +/* $NetBSD: strlen.S,v 1.1 2001/11/30 02:27:20 mjl Exp $ */ + +/*- + * Copyright (C) 2001 Martin J. Laubach + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/*----------------------------------------------------------------------*/ + +#include + +/*----------------------------------------------------------------------*/ +/* The algorithm here uses the following techniques: + + 1) Given a word 'x', we can test to see if it contains any 0 bytes + by subtracting 0x01010101, and seeing if any of the high bits of each + byte changed from 0 to 1. This works because the least significant + 0 byte must have had no incoming carry (otherwise it's not the least + significant), so it is 0x00 - 0x01 == 0xff. For all other + byte values, either they have the high bit set initially, or when + 1 is subtracted you get a value in the range 0x00-0x7f, none of which + have their high bit set. The expression here is + (x + 0xfefefeff) & ~(x | 0x7f7f7f7f), which gives 0x00000000 when + there were no 0x00 bytes in the word. + + 2) Given a word 'x', we can test to see _which_ byte was zero by + calculating ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f). + This produces 0x80 in each byte that was zero, and 0x00 in all + the other bytes. The '| 0x7f7f7f7f' clears the low 7 bits in each + byte, and the '| x' part ensures that bytes with the high bit set + produce 0x00. The addition will carry into the high bit of each byte + iff that byte had one of its low 7 bits set. We can then just see + which was the most significant bit set and divide by 8 to find how + many to add to the index. + This is from the book 'The PowerPC Compiler Writer's Guide', + by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren. +*/ +/*----------------------------------------------------------------------*/ + + .text + .align 4 + +ENTRY(strlen) + + /* Setup constants */ + lis r10, 0x7f7f + lis r9, 0xfefe + ori r10, r10, 0x7f7f + ori r9, r9, 0xfeff + + /* Mask out leading bytes on non aligned strings */ + rlwinm. r8, r3, 3, 27, 28 /* leading bits to mask */ + clrrwi r5, r3, 2 /* clear low 2 addr bits */ + li r0, -1 + beq+ 3f /* skip alignment if already */ + /* aligned */ + + srw r0, r0, r8 /* make 0000...1111 mask */ + + lwz r7, 0(r5) + nor r0, r0, r0 /* invert mask */ + or r7, r7, r0 /* make leading bytes != 0 */ + b 2f + +3: subi r5, r5, 4 + +1: lwzu r7, 4(r5) /* fetch data word */ + +2: nor r0, r7, r10 /* do step 1 */ + add r6, r7, r9 + and. r0, r0, r6 + + beq+ 1b /* no NUL bytes here */ + + and r8, r7, r10 /* ok, a NUL is somewhere */ + or r7, r7, r10 /* do step 2 to find out */ + add r0, r8, r10 /* where */ + nor r8, r7, r0 + + cntlzw r0, r8 /* offset from this word */ + srwi r4, r0, 3 + + add r4, r5, r4 /* r4 contains end pointer */ + /* NOTE: Keep it so this function returns the end pointer + in r4, so we can it use from other str* calls (strcat + comes to mind */ + + subf r3, r3, r4 + blr + +/*----------------------------------------------------------------------*/