replaced strlen with glibc optimized version i586 and powerpc32

if build fails, simply delete the object file posix_build.o manually


git-svn-id: file:///srv/svn/repos/haiku/haiku/trunk@16819 a95241bf-73f2-0310-859d-f6bbb57e9c96
This commit is contained in:
Jérôme Duval 2006-03-16 16:37:05 +00:00
parent 7ca5966a15
commit 3b74cca68e
6 changed files with 349 additions and 21 deletions

View File

@ -118,6 +118,7 @@ MergeObject posix_gnu_arch_$(TARGET_ARCH)_other.o :
ldbl2mpn.c
lshift.S rshift.S
mul_1.S
strlen.S
sub_n.S
submul_1.S

View File

@ -0,0 +1,159 @@
/* Optimized strlen implementation for PowerPC.
Copyright (C) 1997, 1999, 2000 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#include <sysdep.h>
#include <bp-sym.h>
#include <bp-asm.h>
/* The algorithm here uses the following techniques:
1) Given a word 'x', we can test to see if it contains any 0 bytes
by subtracting 0x01010101, and seeing if any of the high bits of each
byte changed from 0 to 1. This works because the least significant
0 byte must have had no incoming carry (otherwise it's not the least
significant), so it is 0x00 - 0x01 == 0xff. For all other
byte values, either they have the high bit set initially, or when
1 is subtracted you get a value in the range 0x00-0x7f, none of which
have their high bit set. The expression here is
(x + 0xfefefeff) & ~(x | 0x7f7f7f7f), which gives 0x00000000 when
there were no 0x00 bytes in the word.
2) Given a word 'x', we can test to see _which_ byte was zero by
calculating ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f).
This produces 0x80 in each byte that was zero, and 0x00 in all
the other bytes. The '| 0x7f7f7f7f' clears the low 7 bits in each
byte, and the '| x' part ensures that bytes with the high bit set
produce 0x00. The addition will carry into the high bit of each byte
iff that byte had one of its low 7 bits set. We can then just see
which was the most significant bit set and divide by 8 to find how
many to add to the index.
This is from the book 'The PowerPC Compiler Writer's Guide',
by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren.
We deal with strings not aligned to a word boundary by taking the
first word and ensuring that bytes not part of the string
are treated as nonzero. To allow for memory latency, we unroll the
loop a few times, being careful to ensure that we do not read ahead
across cache line boundaries.
Questions to answer:
1) How long are strings passed to strlen? If they're often really long,
we should probably use cache management instructions and/or unroll the
loop more. If they're often quite short, it might be better to use
fact (2) in the inner loop than have to recalculate it.
2) How popular are bytes with the high bit set? If they are very rare,
on some processors it might be useful to use the simpler expression
~((x - 0x01010101) | 0x7f7f7f7f) (that is, on processors with only one
ALU), but this fails when any character has its high bit set. */
/* Some notes on register usage: Under the SVR4 ABI, we can use registers
0 and 3 through 12 (so long as we don't call any procedures) without
saving them. We can also use registers 14 through 31 if we save them.
We can't use r1 (it's the stack pointer), r2 nor r13 because the user
program may expect them to hold their usual value if we get sent
a signal. Integer parameters are passed in r3 through r10.
We can use condition registers cr0, cr1, cr5, cr6, and cr7 without saving
them, the others we must save. */
/* int [r3] strlen (char *s [r3]) */
ENTRY (BP_SYM (strlen))
#define rTMP1 r0
#define rRTN r3 /* incoming STR arg, outgoing result */
#define rSTR r4 /* current string position */
#define rPADN r5 /* number of padding bits we prepend to the
string to make it start at a word boundary */
#define rFEFE r6 /* constant 0xfefefeff (-0x01010101) */
#define r7F7F r7 /* constant 0x7f7f7f7f */
#define rWORD1 r8 /* current string word */
#define rWORD2 r9 /* next string word */
#define rMASK r9 /* mask for first string word */
#define rTMP2 r10
#define rTMP3 r11
#define rTMP4 r12
CHECK_BOUNDS_LOW (rRTN, rTMP1, rTMP2)
clrrwi rSTR, rRTN, 2
lis r7F7F, 0x7f7f
rlwinm rPADN, rRTN, 3, 27, 28
lwz rWORD1, 0(rSTR)
li rMASK, -1
addi r7F7F, r7F7F, 0x7f7f
/* That's the setup done, now do the first pair of words.
We make an exception and use method (2) on the first two words, to reduce
overhead. */
srw rMASK, rMASK, rPADN
and rTMP1, r7F7F, rWORD1
or rTMP2, r7F7F, rWORD1
add rTMP1, rTMP1, r7F7F
nor rTMP1, rTMP2, rTMP1
and. rWORD1, rTMP1, rMASK
mtcrf 0x01, rRTN
bne L(done0)
lis rFEFE, -0x101
addi rFEFE, rFEFE, -0x101
/* Are we now aligned to a doubleword boundary? */
bt 29, L(loop)
/* Handle second word of pair. */
lwzu rWORD1, 4(rSTR)
and rTMP1, r7F7F, rWORD1
or rTMP2, r7F7F, rWORD1
add rTMP1, rTMP1, r7F7F
nor. rWORD1, rTMP2, rTMP1
bne L(done0)
/* The loop. */
L(loop):
lwz rWORD1, 4(rSTR)
lwzu rWORD2, 8(rSTR)
add rTMP1, rFEFE, rWORD1
nor rTMP2, r7F7F, rWORD1
and. rTMP1, rTMP1, rTMP2
add rTMP3, rFEFE, rWORD2
nor rTMP4, r7F7F, rWORD2
bne L(done1)
and. rTMP1, rTMP3, rTMP4
beq L(loop)
and rTMP1, r7F7F, rWORD2
add rTMP1, rTMP1, r7F7F
andc rWORD1, rTMP4, rTMP1
b L(done0)
L(done1):
and rTMP1, r7F7F, rWORD1
subi rSTR, rSTR, 4
add rTMP1, rTMP1, r7F7F
andc rWORD1, rTMP2, rTMP1
/* When we get to here, rSTR points to the first word in the string that
contains a zero byte, and the most significant set bit in rWORD1 is in that
byte. */
L(done0):
cntlzw rTMP3, rWORD1
subf rTMP1, rRTN, rSTR
srwi rTMP3, rTMP3, 3
add rRTN, rTMP1, rTMP3
/* GKM FIXME: check high bound. */
blr
END (BP_SYM (strlen))

View File

@ -101,6 +101,7 @@ MergeObject posix_gnu_arch_$(TARGET_ARCH)_other.o :
mplog.c
mul_1.S
lshift.S rshift.S
strlen.S
sub_n.S
submul_1.S
;

View File

@ -0,0 +1,188 @@
/* strlen -- Compute length of NUL terminated string.
Highly optimized version for ix86, x>=5.
Copyright (C) 1995, 1996, 1997, 2000, 2002 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#include <sysdep.h>
#include "asm-syntax.h"
#include "bp-sym.h"
#include "bp-asm.h"
/* This version is especially optimized for the i586 (and following?)
processors. This is mainly done by using the two pipelines. The
version optimized for i486 is weak in this aspect because to get
as much parallelism we have to execute some *more* instructions.
The code below is structured to reflect the pairing of the instructions
as *I think* it is. I have no processor data book to verify this.
If you find something you think is incorrect let me know. */
/* The magic value which is used throughout in the whole code. */
#define magic 0xfefefeff
#define PARMS LINKAGE /* no space for saved regs */
#define STR PARMS
.text
ENTRY (BP_SYM (strlen))
ENTER
movl STR(%esp), %eax
CHECK_BOUNDS_LOW (%eax, STR(%esp))
movl $3, %edx /* load mask (= 3) */
andl %eax, %edx /* separate last two bits of address */
jz L(1) /* aligned => start loop */
jp L(0) /* exactly two bits set */
cmpb %dh, (%eax) /* is byte NUL? */
je L(2) /* yes => return */
incl %eax /* increment pointer */
cmpb %dh, (%eax) /* is byte NUL? */
je L(2) /* yes => return */
incl %eax /* increment pointer */
xorl $2, %edx
jz L(1)
L(0): cmpb %dh, (%eax) /* is byte NUL? */
je L(2) /* yes => return */
incl %eax /* increment pointer */
xorl %edx, %edx /* We need %edx == 0 for later */
/* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
change any of the hole bits of LONGWORD.
1) Is this safe? Will it catch all the zero bytes?
Suppose there is a byte with all zeros. Any carry bits
propagating from its left will fall into the hole at its
least significant bit and stop. Since there will be no
carry from its most significant bit, the LSB of the
byte to the left will be unchanged, and the zero will be
detected.
2) Is this worthwhile? Will it ignore everything except
zero bytes? Suppose every byte of LONGWORD has a bit set
somewhere. There will be a carry into bit 8. If bit 8
is set, this will carry into bit 16. If bit 8 is clear,
one of bits 9-15 must be set, so there will be a carry
into bit 16. Similarly, there will be a carry into bit
24. If one of bits 24-31 is set, there will be a carry
into bit 32 (=carry flag), so all of the hole bits will
be changed.
Note: %edx == 0 in any case here. */
L(1):
movl (%eax), %ecx /* get word (= 4 bytes) in question */
addl $4, %eax /* adjust pointer for *next* word */
subl %ecx, %edx /* first step to negate word */
addl $magic, %ecx /* add magic word */
decl %edx /* complete negation of word */
jnc L(3) /* previous addl caused overflow? */
xorl %ecx, %edx /* (word+magic)^word */
andl $~magic, %edx /* any of the carry flags set? */
jne L(3) /* yes => determine byte */
movl (%eax), %ecx /* get word (= 4 bytes) in question */
addl $4, %eax /* adjust pointer for *next* word */
subl %ecx, %edx /* first step to negate word */
addl $magic, %ecx /* add magic word */
decl %edx /* complete negation of word */
jnc L(3) /* previous addl caused overflow? */
xorl %ecx, %edx /* (word+magic)^word */
andl $~magic, %edx /* any of the carry flags set? */
jne L(3) /* yes => determine byte */
movl (%eax), %ecx /* get word (= 4 bytes) in question */
addl $4, %eax /* adjust pointer for *next* word */
subl %ecx, %edx /* first step to negate word */
addl $magic, %ecx /* add magic word */
decl %edx /* complete negation of word */
jnc L(3) /* previous addl caused overflow? */
xorl %ecx, %edx /* (word+magic)^word */
andl $~magic, %edx /* any of the carry flags set? */
jne L(3) /* yes => determine byte */
movl (%eax), %ecx /* get word (= 4 bytes) in question */
addl $4, %eax /* adjust pointer for *next* word */
subl %ecx, %edx /* first step to negate word */
addl $magic, %ecx /* add magic word */
decl %edx /* complete negation of word */
jnc L(3) /* previous addl caused overflow? */
xorl %ecx, %edx /* (word+magic)^word */
andl $~magic, %edx /* any of the carry flags set? */
je L(1) /* no => start loop again */
L(3): subl $4, %eax /* correct too early pointer increment */
subl $magic, %ecx
cmpb $0, %cl /* lowest byte NUL? */
jz L(2) /* yes => return */
inc %eax /* increment pointer */
testb %ch, %ch /* second byte NUL? */
jz L(2) /* yes => return */
shrl $16, %ecx /* make upper bytes accessible */
incl %eax /* increment pointer */
cmpb $0, %cl /* is third byte NUL? */
jz L(2) /* yes => return */
incl %eax /* increment pointer */
L(2): CHECK_BOUNDS_HIGH (%eax, STR(%esp), jb)
subl STR(%esp), %eax /* now compute the length as difference
between start and terminating NUL
character */
LEAVE
ret
END (BP_SYM (strlen))

View File

@ -25,7 +25,6 @@ MergeObject posix_string.o :
strerror.c
strlcat.c
strlcpy.c
strlen.c
strncat.c
strncmp.c
strncpy.c

View File

@ -1,20 +0,0 @@
/*
** Copyright 2001, Travis Geiselbrecht. All rights reserved.
** Distributed under the terms of the NewOS License.
*/
#include <sys/types.h>
#include <string.h>
size_t
strlen(char const *s)
{
size_t i = 0;
while (s[i]) {
i += 1;
}
return i;
}