replaced strlen with glibc optimized version i586 and powerpc32
if build fails, simply delete the object file posix_build.o manually git-svn-id: file:///srv/svn/repos/haiku/haiku/trunk@16819 a95241bf-73f2-0310-859d-f6bbb57e9c96
This commit is contained in:
parent
7ca5966a15
commit
3b74cca68e
@ -118,6 +118,7 @@ MergeObject posix_gnu_arch_$(TARGET_ARCH)_other.o :
|
||||
ldbl2mpn.c
|
||||
lshift.S rshift.S
|
||||
mul_1.S
|
||||
strlen.S
|
||||
sub_n.S
|
||||
submul_1.S
|
||||
|
||||
|
159
src/system/libroot/posix/glibc/arch/ppc/strlen.S
Normal file
159
src/system/libroot/posix/glibc/arch/ppc/strlen.S
Normal file
@ -0,0 +1,159 @@
|
||||
/* Optimized strlen implementation for PowerPC.
|
||||
Copyright (C) 1997, 1999, 2000 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include <bp-sym.h>
|
||||
#include <bp-asm.h>
|
||||
|
||||
/* The algorithm here uses the following techniques:
|
||||
|
||||
1) Given a word 'x', we can test to see if it contains any 0 bytes
|
||||
by subtracting 0x01010101, and seeing if any of the high bits of each
|
||||
byte changed from 0 to 1. This works because the least significant
|
||||
0 byte must have had no incoming carry (otherwise it's not the least
|
||||
significant), so it is 0x00 - 0x01 == 0xff. For all other
|
||||
byte values, either they have the high bit set initially, or when
|
||||
1 is subtracted you get a value in the range 0x00-0x7f, none of which
|
||||
have their high bit set. The expression here is
|
||||
(x + 0xfefefeff) & ~(x | 0x7f7f7f7f), which gives 0x00000000 when
|
||||
there were no 0x00 bytes in the word.
|
||||
|
||||
2) Given a word 'x', we can test to see _which_ byte was zero by
|
||||
calculating ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f).
|
||||
This produces 0x80 in each byte that was zero, and 0x00 in all
|
||||
the other bytes. The '| 0x7f7f7f7f' clears the low 7 bits in each
|
||||
byte, and the '| x' part ensures that bytes with the high bit set
|
||||
produce 0x00. The addition will carry into the high bit of each byte
|
||||
iff that byte had one of its low 7 bits set. We can then just see
|
||||
which was the most significant bit set and divide by 8 to find how
|
||||
many to add to the index.
|
||||
This is from the book 'The PowerPC Compiler Writer's Guide',
|
||||
by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren.
|
||||
|
||||
We deal with strings not aligned to a word boundary by taking the
|
||||
first word and ensuring that bytes not part of the string
|
||||
are treated as nonzero. To allow for memory latency, we unroll the
|
||||
loop a few times, being careful to ensure that we do not read ahead
|
||||
across cache line boundaries.
|
||||
|
||||
Questions to answer:
|
||||
1) How long are strings passed to strlen? If they're often really long,
|
||||
we should probably use cache management instructions and/or unroll the
|
||||
loop more. If they're often quite short, it might be better to use
|
||||
fact (2) in the inner loop than have to recalculate it.
|
||||
2) How popular are bytes with the high bit set? If they are very rare,
|
||||
on some processors it might be useful to use the simpler expression
|
||||
~((x - 0x01010101) | 0x7f7f7f7f) (that is, on processors with only one
|
||||
ALU), but this fails when any character has its high bit set. */
|
||||
|
||||
/* Some notes on register usage: Under the SVR4 ABI, we can use registers
|
||||
0 and 3 through 12 (so long as we don't call any procedures) without
|
||||
saving them. We can also use registers 14 through 31 if we save them.
|
||||
We can't use r1 (it's the stack pointer), r2 nor r13 because the user
|
||||
program may expect them to hold their usual value if we get sent
|
||||
a signal. Integer parameters are passed in r3 through r10.
|
||||
We can use condition registers cr0, cr1, cr5, cr6, and cr7 without saving
|
||||
them, the others we must save. */
|
||||
|
||||
/* int [r3] strlen (char *s [r3]) */
|
||||
|
||||
ENTRY (BP_SYM (strlen))
|
||||
|
||||
#define rTMP1 r0
|
||||
#define rRTN r3 /* incoming STR arg, outgoing result */
|
||||
#define rSTR r4 /* current string position */
|
||||
#define rPADN r5 /* number of padding bits we prepend to the
|
||||
string to make it start at a word boundary */
|
||||
#define rFEFE r6 /* constant 0xfefefeff (-0x01010101) */
|
||||
#define r7F7F r7 /* constant 0x7f7f7f7f */
|
||||
#define rWORD1 r8 /* current string word */
|
||||
#define rWORD2 r9 /* next string word */
|
||||
#define rMASK r9 /* mask for first string word */
|
||||
#define rTMP2 r10
|
||||
#define rTMP3 r11
|
||||
#define rTMP4 r12
|
||||
|
||||
CHECK_BOUNDS_LOW (rRTN, rTMP1, rTMP2)
|
||||
|
||||
clrrwi rSTR, rRTN, 2
|
||||
lis r7F7F, 0x7f7f
|
||||
rlwinm rPADN, rRTN, 3, 27, 28
|
||||
lwz rWORD1, 0(rSTR)
|
||||
li rMASK, -1
|
||||
addi r7F7F, r7F7F, 0x7f7f
|
||||
/* That's the setup done, now do the first pair of words.
|
||||
We make an exception and use method (2) on the first two words, to reduce
|
||||
overhead. */
|
||||
srw rMASK, rMASK, rPADN
|
||||
and rTMP1, r7F7F, rWORD1
|
||||
or rTMP2, r7F7F, rWORD1
|
||||
add rTMP1, rTMP1, r7F7F
|
||||
nor rTMP1, rTMP2, rTMP1
|
||||
and. rWORD1, rTMP1, rMASK
|
||||
mtcrf 0x01, rRTN
|
||||
bne L(done0)
|
||||
lis rFEFE, -0x101
|
||||
addi rFEFE, rFEFE, -0x101
|
||||
/* Are we now aligned to a doubleword boundary? */
|
||||
bt 29, L(loop)
|
||||
|
||||
/* Handle second word of pair. */
|
||||
lwzu rWORD1, 4(rSTR)
|
||||
and rTMP1, r7F7F, rWORD1
|
||||
or rTMP2, r7F7F, rWORD1
|
||||
add rTMP1, rTMP1, r7F7F
|
||||
nor. rWORD1, rTMP2, rTMP1
|
||||
bne L(done0)
|
||||
|
||||
/* The loop. */
|
||||
|
||||
L(loop):
|
||||
lwz rWORD1, 4(rSTR)
|
||||
lwzu rWORD2, 8(rSTR)
|
||||
add rTMP1, rFEFE, rWORD1
|
||||
nor rTMP2, r7F7F, rWORD1
|
||||
and. rTMP1, rTMP1, rTMP2
|
||||
add rTMP3, rFEFE, rWORD2
|
||||
nor rTMP4, r7F7F, rWORD2
|
||||
bne L(done1)
|
||||
and. rTMP1, rTMP3, rTMP4
|
||||
beq L(loop)
|
||||
|
||||
and rTMP1, r7F7F, rWORD2
|
||||
add rTMP1, rTMP1, r7F7F
|
||||
andc rWORD1, rTMP4, rTMP1
|
||||
b L(done0)
|
||||
|
||||
L(done1):
|
||||
and rTMP1, r7F7F, rWORD1
|
||||
subi rSTR, rSTR, 4
|
||||
add rTMP1, rTMP1, r7F7F
|
||||
andc rWORD1, rTMP2, rTMP1
|
||||
|
||||
/* When we get to here, rSTR points to the first word in the string that
|
||||
contains a zero byte, and the most significant set bit in rWORD1 is in that
|
||||
byte. */
|
||||
L(done0):
|
||||
cntlzw rTMP3, rWORD1
|
||||
subf rTMP1, rRTN, rSTR
|
||||
srwi rTMP3, rTMP3, 3
|
||||
add rRTN, rTMP1, rTMP3
|
||||
/* GKM FIXME: check high bound. */
|
||||
blr
|
||||
END (BP_SYM (strlen))
|
@ -101,6 +101,7 @@ MergeObject posix_gnu_arch_$(TARGET_ARCH)_other.o :
|
||||
mplog.c
|
||||
mul_1.S
|
||||
lshift.S rshift.S
|
||||
strlen.S
|
||||
sub_n.S
|
||||
submul_1.S
|
||||
;
|
||||
|
188
src/system/libroot/posix/glibc/arch/x86/strlen.S
Normal file
188
src/system/libroot/posix/glibc/arch/x86/strlen.S
Normal file
@ -0,0 +1,188 @@
|
||||
/* strlen -- Compute length of NUL terminated string.
|
||||
Highly optimized version for ix86, x>=5.
|
||||
Copyright (C) 1995, 1996, 1997, 2000, 2002 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, write to the Free
|
||||
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||
02111-1307 USA. */
|
||||
|
||||
#include <sysdep.h>
|
||||
#include "asm-syntax.h"
|
||||
#include "bp-sym.h"
|
||||
#include "bp-asm.h"
|
||||
|
||||
/* This version is especially optimized for the i586 (and following?)
|
||||
processors. This is mainly done by using the two pipelines. The
|
||||
version optimized for i486 is weak in this aspect because to get
|
||||
as much parallelism we have to execute some *more* instructions.
|
||||
|
||||
The code below is structured to reflect the pairing of the instructions
|
||||
as *I think* it is. I have no processor data book to verify this.
|
||||
If you find something you think is incorrect let me know. */
|
||||
|
||||
|
||||
/* The magic value which is used throughout in the whole code. */
|
||||
#define magic 0xfefefeff
|
||||
|
||||
#define PARMS LINKAGE /* no space for saved regs */
|
||||
#define STR PARMS
|
||||
|
||||
.text
|
||||
ENTRY (BP_SYM (strlen))
|
||||
ENTER
|
||||
|
||||
movl STR(%esp), %eax
|
||||
CHECK_BOUNDS_LOW (%eax, STR(%esp))
|
||||
movl $3, %edx /* load mask (= 3) */
|
||||
|
||||
andl %eax, %edx /* separate last two bits of address */
|
||||
|
||||
jz L(1) /* aligned => start loop */
|
||||
jp L(0) /* exactly two bits set */
|
||||
|
||||
cmpb %dh, (%eax) /* is byte NUL? */
|
||||
je L(2) /* yes => return */
|
||||
|
||||
incl %eax /* increment pointer */
|
||||
cmpb %dh, (%eax) /* is byte NUL? */
|
||||
|
||||
je L(2) /* yes => return */
|
||||
|
||||
incl %eax /* increment pointer */
|
||||
xorl $2, %edx
|
||||
|
||||
jz L(1)
|
||||
|
||||
L(0): cmpb %dh, (%eax) /* is byte NUL? */
|
||||
je L(2) /* yes => return */
|
||||
|
||||
incl %eax /* increment pointer */
|
||||
xorl %edx, %edx /* We need %edx == 0 for later */
|
||||
|
||||
/* We exit the loop if adding MAGIC_BITS to LONGWORD fails to
|
||||
change any of the hole bits of LONGWORD.
|
||||
|
||||
1) Is this safe? Will it catch all the zero bytes?
|
||||
Suppose there is a byte with all zeros. Any carry bits
|
||||
propagating from its left will fall into the hole at its
|
||||
least significant bit and stop. Since there will be no
|
||||
carry from its most significant bit, the LSB of the
|
||||
byte to the left will be unchanged, and the zero will be
|
||||
detected.
|
||||
|
||||
2) Is this worthwhile? Will it ignore everything except
|
||||
zero bytes? Suppose every byte of LONGWORD has a bit set
|
||||
somewhere. There will be a carry into bit 8. If bit 8
|
||||
is set, this will carry into bit 16. If bit 8 is clear,
|
||||
one of bits 9-15 must be set, so there will be a carry
|
||||
into bit 16. Similarly, there will be a carry into bit
|
||||
24. If one of bits 24-31 is set, there will be a carry
|
||||
into bit 32 (=carry flag), so all of the hole bits will
|
||||
be changed.
|
||||
|
||||
Note: %edx == 0 in any case here. */
|
||||
|
||||
L(1):
|
||||
movl (%eax), %ecx /* get word (= 4 bytes) in question */
|
||||
addl $4, %eax /* adjust pointer for *next* word */
|
||||
|
||||
subl %ecx, %edx /* first step to negate word */
|
||||
addl $magic, %ecx /* add magic word */
|
||||
|
||||
decl %edx /* complete negation of word */
|
||||
jnc L(3) /* previous addl caused overflow? */
|
||||
|
||||
xorl %ecx, %edx /* (word+magic)^word */
|
||||
|
||||
andl $~magic, %edx /* any of the carry flags set? */
|
||||
|
||||
jne L(3) /* yes => determine byte */
|
||||
|
||||
|
||||
movl (%eax), %ecx /* get word (= 4 bytes) in question */
|
||||
addl $4, %eax /* adjust pointer for *next* word */
|
||||
|
||||
subl %ecx, %edx /* first step to negate word */
|
||||
addl $magic, %ecx /* add magic word */
|
||||
|
||||
decl %edx /* complete negation of word */
|
||||
jnc L(3) /* previous addl caused overflow? */
|
||||
|
||||
xorl %ecx, %edx /* (word+magic)^word */
|
||||
|
||||
andl $~magic, %edx /* any of the carry flags set? */
|
||||
|
||||
jne L(3) /* yes => determine byte */
|
||||
|
||||
|
||||
movl (%eax), %ecx /* get word (= 4 bytes) in question */
|
||||
addl $4, %eax /* adjust pointer for *next* word */
|
||||
|
||||
subl %ecx, %edx /* first step to negate word */
|
||||
addl $magic, %ecx /* add magic word */
|
||||
|
||||
decl %edx /* complete negation of word */
|
||||
jnc L(3) /* previous addl caused overflow? */
|
||||
|
||||
xorl %ecx, %edx /* (word+magic)^word */
|
||||
|
||||
andl $~magic, %edx /* any of the carry flags set? */
|
||||
|
||||
jne L(3) /* yes => determine byte */
|
||||
|
||||
|
||||
movl (%eax), %ecx /* get word (= 4 bytes) in question */
|
||||
addl $4, %eax /* adjust pointer for *next* word */
|
||||
|
||||
subl %ecx, %edx /* first step to negate word */
|
||||
addl $magic, %ecx /* add magic word */
|
||||
|
||||
decl %edx /* complete negation of word */
|
||||
jnc L(3) /* previous addl caused overflow? */
|
||||
|
||||
xorl %ecx, %edx /* (word+magic)^word */
|
||||
|
||||
andl $~magic, %edx /* any of the carry flags set? */
|
||||
|
||||
je L(1) /* no => start loop again */
|
||||
|
||||
|
||||
L(3): subl $4, %eax /* correct too early pointer increment */
|
||||
subl $magic, %ecx
|
||||
|
||||
cmpb $0, %cl /* lowest byte NUL? */
|
||||
jz L(2) /* yes => return */
|
||||
|
||||
inc %eax /* increment pointer */
|
||||
testb %ch, %ch /* second byte NUL? */
|
||||
|
||||
jz L(2) /* yes => return */
|
||||
|
||||
shrl $16, %ecx /* make upper bytes accessible */
|
||||
incl %eax /* increment pointer */
|
||||
|
||||
cmpb $0, %cl /* is third byte NUL? */
|
||||
jz L(2) /* yes => return */
|
||||
|
||||
incl %eax /* increment pointer */
|
||||
|
||||
L(2): CHECK_BOUNDS_HIGH (%eax, STR(%esp), jb)
|
||||
subl STR(%esp), %eax /* now compute the length as difference
|
||||
between start and terminating NUL
|
||||
character */
|
||||
LEAVE
|
||||
ret
|
||||
END (BP_SYM (strlen))
|
@ -25,7 +25,6 @@ MergeObject posix_string.o :
|
||||
strerror.c
|
||||
strlcat.c
|
||||
strlcpy.c
|
||||
strlen.c
|
||||
strncat.c
|
||||
strncmp.c
|
||||
strncpy.c
|
||||
|
@ -1,20 +0,0 @@
|
||||
/*
|
||||
** Copyright 2001, Travis Geiselbrecht. All rights reserved.
|
||||
** Distributed under the terms of the NewOS License.
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <string.h>
|
||||
|
||||
|
||||
size_t
|
||||
strlen(char const *s)
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
while (s[i]) {
|
||||
i += 1;
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
Loading…
Reference in New Issue
Block a user