swap r1 & ip

use adds, eors, etc.
teq -> cmp
This commit is contained in:
matt 2013-08-20 08:05:49 +00:00
parent 7eb7e9aa67
commit b628072a9d
1 changed files with 39 additions and 33 deletions

View File

@ -29,7 +29,7 @@
#include <machine/asm.h>
RCSID("$NetBSD: strrchr_arm.S,v 1.4 2013/08/11 04:56:32 matt Exp $")
RCSID("$NetBSD: strrchr_arm.S,v 1.5 2013/08/20 08:05:49 matt Exp $")
#ifdef __ARMEL__
#define BYTE0 0x000000ff
@ -37,49 +37,52 @@ RCSID("$NetBSD: strrchr_arm.S,v 1.4 2013/08/11 04:56:32 matt Exp $")
#define BYTE2 0x00ff0000
#define BYTE3 0xff000000
#define lshi lsl
#define lshis lsls
#else
#define BYTE0 0xff000000
#define BYTE1 0x00ff0000
#define BYTE2 0x0000ff00
#define BYTE3 0x000000ff
#define lshi lsr
#define lshis lsrs
#endif
.text
ENTRY(strrchr)
teq r1, #0 /* searching for NUL? */
ands r2, r1, #0xff /* is the byte value NUL? */
bne 1f /* no, do it the hard way */
push {r0, lr} /* save pointer and return addr */
bl PLT_SYM(strlen) /* get length */
pop {r1, lr} /* restore pointer and returna addr */
add r0, r0, r1 /* add pointer to length */
RET /* return */
pop {r1, r2} /* restore pointer / return addr */
adds r0, r0, r1 /* add pointer to length */
RETr(r2) /* return */
1: mov ip, r0 /* we use r0 at the return value */
mov r0, #0 /* return NULL by default */
and r2, r1, #0xff /* restrict to byte value */
2: tst ip, #3 /* test for word alignment */
1: mov r1, r0 /* we use r0 at the return value */
movs r0, #0 /* return NULL by default */
2: tst r1, #3 /* test for word alignment */
beq .Lpre_main_loop /* finally word aligned */
ldrb r3, [ip], #1 /* load a byte */
ldrb r3, [r1], #1 /* load a byte */
cmp r3, r2 /* did it match? */
subeq r0, ip, #1 /* yes, remember that it did */
teq r3, #0 /* was it NUL? */
#ifdef __thumb__
it eq
subeq r0, r1, #1 /* yes, remember that it did */
#endif
cmp r3, #0 /* was it NUL? */
bne 2b /* no, try next byte */
RET /* return */
.Lpre_main_loop:
push {r4, r5} /* save some registers */
#if defined(_ARM_ARCH_7)
movw r1, #0xfefe /* magic constant; 254 in each byte */
movt r1, #0xfefe /* magic constant; 254 in each byte */
movw ip, #0xfefe /* magic constant; 254 in each byte */
movt ip, #0xfefe /* magic constant; 254 in each byte */
#elif defined(_ARM_ARCH_6)
mov r1, #0xfe /* put 254 in low byte */
orr r1, r1, r1, lsl #8 /* move to next byte */
orr r1, r1, r1, lsl #16 /* move to next halfword */
mov ip, #0xfe /* put 254 in low byte */
orr ip, ip, ip, lsl #8 /* move to next byte */
orr ip, ip, ip, lsl #16 /* move to next halfword */
#endif /* _ARM_ARCH_6 */
orr r2, r2, r2, lsl #8 /* move to next byte */
orr r2, r2, r2, lsl #16 /* move to next halfword */
.Lmain_loop:
ldr r3, [ip], #4 /* load next word */
ldr r3, [r1], #4 /* load next word */
#if defined(_ARM_ARCH_6)
/*
* Add 254 to each byte using the UQADD8 (unsigned saturating add 8)
@ -87,10 +90,10 @@ ENTRY(strrchr)
* become 255. For NUL, it will be 254. When we complement the
* result, if the result is non-0 then we must have encountered a NUL.
*/
uqadd8 r4, r3, r1 /* NUL detection happens here */
uqadd8 r4, r3, ip /* NUL detection happens here */
usub8 r3, r3, r2 /* bias for char looked for? */
uqadd8 r5, r3, r1 /* char detection happens here */
and r3, r4, r5 /* merge results */
uqadd8 r5, r3, ip /* char detection happens here */
ands r3, r4, r5 /* merge results */
mvns r3, r3 /* is the complement non-0? */
beq .Lmain_loop /* no, then keep going */
@ -104,7 +107,10 @@ ENTRY(strrchr)
*/
bics r5, r5, r4 /* clear any NUL match bits */
beq .Ldone /* no remaining matches, we're done */
movs r3, r4, lshi #8 /* shift up a byte */
lshis r3, r4, #8 /* shift up a byte */
#ifdef __thumb__
itt ne
#endif
orrsne r3, r3, r3, lshi #8 /* if non 0, copy up to next byte */
orrsne r3, r3, r3, lshi #8 /* if non 0, copy up to last byte */
bics r5, r5, r3 /* clear match bits */
@ -117,13 +123,13 @@ ENTRY(strrchr)
* If we have multiple matches, we want to the select the "last" match
* in the word which will be the lowest bit set.
*/
sub r3, r5, #1 /* subtract 1 */
and r3, r3, r5 /* and with mask */
eor r5, r5, r3 /* only have the lowest bit set left */
subs r3, r5, #1 /* subtract 1 */
ands r3, r3, r5 /* and with mask */
eors r5, r5, r3 /* only have the lowest bit set left */
clz r5, r5 /* count how many leading zeros */
add r0, ip, r5, lsr #3 /* divide that by 8 and add to count */
sub r0, r0, #4 /* compensate for the post-inc */
teq r4, #0 /* did we read any NULs? */
add r0, r1, r5, lsr #3 /* divide that by 8 and add to count */
subs r0, r0, #4 /* compensate for the post-inc */
cmp r4, #0 /* did we read any NULs? */
beq .Lmain_loop /* no, get next word */
#else
/*
@ -134,19 +140,19 @@ ENTRY(strrchr)
tst r3, #BYTE0 /* is byte 0 a NUL? */
beq .Ldone /* yes, then we're done */
tst r4, #BYTE0 /* is byte 0 a match? */
subeq r0, ip, #4 /* yes, remember its location */
subeq r0, r1, #4 /* yes, remember its location */
tst r3, #BYTE1 /* is byte 1 a NUL? */
beq .Ldone /* yes, then we're done */
tst r4, #BYTE1 /* is byte 1 a match? */
subeq r0, ip, #3 /* yes, remember its location */
subeq r0, r1, #3 /* yes, remember its location */
tst r3, #BYTE2 /* is byte 2 a NUL? */
beq .Ldone /* yes, then we're done */
tst r4, #BYTE2 /* is byte 2 a match? */
subeq r0, ip, #2 /* yes, remember its location */
subeq r0, r1, #2 /* yes, remember its location */
tst r3, #BYTE3 /* is byte 3 a NUL? */
beq .Ldone /* yes, then we're done */
tst r4, #BYTE3 /* is byte 3 a match? */
subeq r0, ip, #1 /* yes, remember its location */
subeq r0, r1, #1 /* yes, remember its location */
b .Lmain_loop
#endif /* _ARM_ARCH_6 */
.Ldone: