Make this work. Various fixes and some further optimizations.

This commit is contained in:
matt 2012-12-20 07:18:33 +00:00
parent 6fd79daca4
commit 69105f68aa
1 changed files with 65 additions and 20 deletions

View File

@ -29,7 +29,7 @@
#include <machine/asm.h>
RCSID("$NetBSD: cpu_in_cksum_buffer.S,v 1.1 2012/12/19 15:05:16 matt Exp $")
RCSID("$NetBSD: cpu_in_cksum_buffer.S,v 1.2 2012/12/20 07:18:33 matt Exp $")
/*
* Special note:
@ -66,7 +66,7 @@ ENTRY(cpu_in_cksum_buffer)
#ifndef __OPTIMIZE_SIZE__
rsb r3, r3, #64 /* subtract from 64 */
#ifdef _ARM_ARCH_DWORD_OK
add r3, r3, r1, lsr #1 /* multiply by 1.5 */
add r3, r3, r3, lsr #1 /* multiply by 1.5 */
add pc, pc, r3 /* and jump! */
#else
add pc, pc, r3, lsl #1 /* multiply by 2 and jump! */
@ -97,6 +97,7 @@ ENTRY(cpu_in_cksum_buffer)
LOAD_DWORD_INTO_R4(r0) /* 1 dword left */
.Ladd_one_dword:
adcs ip, ip, r4
.Ladd_one_word:
adcs ip, ip, r5
teq r2, r0 /* nothing left? */
beq .Lfold /* yep, proceed to hold */
@ -107,13 +108,14 @@ ENTRY(cpu_in_cksum_buffer)
bne 4b /* yep, do 64 at time */
#endif
bics r3, r1, #7 /* at least 8 bytes left? */
bge 3b /* yep, do them */
bne 3b /* yep, do them */
.Lfinal_dword:
tst r1, #4 /* more than one word more left? */
sub r3, r1, #1 /* 0-3 = 1 word, 4-7 = 2 words */
tst r3, #4 /* more than one word more left? */
moveq r4, #0 /* no, just use zero */
ldrne r4, [r0], #4 /* yes, load first word */
ldr r5, [r0] /* load last word */
ldreq r5, [r0] /* no, load last word */
ldmneia r0, {r4-r5} /* yes, load last dword */
.Lfinal_dword_noload:
rsb r1, r1, #4 /* find out many bytes to discard */
#ifdef __ARMEL__
@ -127,7 +129,9 @@ ENTRY(cpu_in_cksum_buffer)
tst r1, #1 /* discard odd? */
bicne r5, r5, #0x000000ff /* yes, discard odd byte */
#endif
adds ip, ip, r4 /* add 1st to accumulator */
.Lfinal_add_one_dword:
adcs ip, ip, r4 /* add 1st to accumulator */
.Lfinal_add_one_word:
adcs ip, ip, r5 /* add 2nd to accumulator */
/*
@ -143,33 +147,58 @@ ENTRY(cpu_in_cksum_buffer)
#include "cpu_in_cksum_fold.S"
.Ldword_misaligned:
tst r0, #3 /* are at least word aligned? */
bne .Lword_misaligned /* no, do it the hard way */
ldr r5, [r0], #4 /* load word here in case of partial */
sub r1, r1, #4 /* subtract length of one word */
teq r1, #0 /* what is length? */
beq .Lfinal_add_one_word /* = 0? just do the final add */
addgt r2, r1, r0 /* > 0? point r2 just past end */
bgt .Ladd_one_word /* > 0? accumulate it and loop */
mov r4, #0 /* < 0? zero this */
b .Lfinal_dword_noload /* < 0? handle final partial dword */
.Lword_misaligned:
tst r0, #4 /* do we load 1 or 2 words? */
bic r0, r0, #3 /* force word alignment */
add r1, r1, r2 /* add misalignment to length */
tst r2, #4 /* first */
ldr r4, [r0], #4 /* load first word */
add r1, r1, r2 /* add initial offset to length */
sub r1, r1, #8 /* subtract length of one dword */
ldmeqia r0!, {r4-r5} /* load first dword */
ldrne r4, [r0], #4 /* load first word */
movne r5, #0 /* no second word */
ldreq r5, [r0], #4 /* load second word */
/*
* We are now dword aligned.
*/
#ifdef __ARMEL__
tst r2, #2 /* discard at least 2? */
movne r4, r4, lsr #16 /* yes, discard lower halfword */
tst r2, #1 /* discard odd? */
bicne r4, r4, #0x0000ff00 /* yes, discard odd byte */
tst r2, #1 /* start odd? */
bicne r4, r4, #0x000000ff /* yes, discard even byte */
#else
tst r2, #2 /* discard at least 2? */
movne r4, r4, lsl #16 /* yes, discard upper halfword */
tst r2, #1 /* discard odd? */
bicne r4, r4, #0x00ff0000 /* yes, discard odd byte */
tst r2, #1 /* start odd? */
bicne r4, r4, #0xff000000 /* yes, discard even byte */
#endif
/*
* Since we started on an odd boundary, set up our stack frame so we
* fixup the return value to be byteswapped.
*/
ldrne r3, [sp, #4] /* pop r5 */
strne r3, [sp, #-4]! /* push it again */
ldrne r3, [sp, #4] /* pop r4 */
strne r3, [sp, #-4]! /* push it again */
strne lr, [sp, #8] /* save our return address */
adrne lr, .Lmisaligned_fixup /* use new to fixup the return value */
/*
* See if we have a least a full dword to process. If we do, jump
* into the main loop as if we just load a single dword.
*/
bics r3, r1, #7 /* at least one dword? */
addne r2, r1, r0 /* yes, point r2 just past end */
bne .Ladd_one_dword /* yes, accumulate it and loop */
teq r1, #0 /* what is length? */
beq .Lfinal_add_one_word /* = 0? just do the final add */
addgt r2, r1, r0 /* > 0? point r2 just past end */
bgt .Ladd_one_dword /* > 0? accumulate it and loop */
/*
* Not a full dword so do the final dword processing to find out
* bytes to discard. If we only loaded one word, move it to 2nd
@ -177,7 +206,23 @@ ENTRY(cpu_in_cksum_buffer)
* clear the 1st word.
*/
tst r2, #4 /* one or two words? */
movne r5, r4 /* one, move 1st word to 2nd word */
movne r4, #0 /* and clear 1st word */
moveq r5, r4 /* one, move 1st word to 2nd word */
moveq r4, #0 /* and clear 1st word */
b .Lfinal_dword_noload /* handle final dword */
/*
* If we had an odd address, we have byte swap the return value.
* instead of testing everywhere, we inserted a fake callframe and
* set LR to return to do the fixup and return to the caller.
*/
.Lmisaligned_fixup:
ldr lr, [sp], #8 /* fetch saved LR */
#ifdef _ARM_ARCH_6
rev16 r0, r0 /* byte swap */
#else
mov r0, r0, r0, ror #8 /* move 0:7 to 24:31 and 8:15 to 0:7 */
orr r0, r0, r0, lsl #16 /* move 0:7 to 16:23 */
mov r0, r0, r0, lsr #16 /* clear 16:31 to 0:15 */
#endif
RET
END(cpu_in_cksum_buffer)