Make this work. Various fixes and some further optimizations.
This commit is contained in:
parent
6fd79daca4
commit
69105f68aa
|
@ -29,7 +29,7 @@
|
|||
|
||||
#include <machine/asm.h>
|
||||
|
||||
RCSID("$NetBSD: cpu_in_cksum_buffer.S,v 1.1 2012/12/19 15:05:16 matt Exp $")
|
||||
RCSID("$NetBSD: cpu_in_cksum_buffer.S,v 1.2 2012/12/20 07:18:33 matt Exp $")
|
||||
|
||||
/*
|
||||
* Special note:
|
||||
|
@ -66,7 +66,7 @@ ENTRY(cpu_in_cksum_buffer)
|
|||
#ifndef __OPTIMIZE_SIZE__
|
||||
rsb r3, r3, #64 /* subtract from 64 */
|
||||
#ifdef _ARM_ARCH_DWORD_OK
|
||||
add r3, r3, r1, lsr #1 /* multiply by 1.5 */
|
||||
add r3, r3, r3, lsr #1 /* multiply by 1.5 */
|
||||
add pc, pc, r3 /* and jump! */
|
||||
#else
|
||||
add pc, pc, r3, lsl #1 /* multiply by 2 and jump! */
|
||||
|
@ -97,6 +97,7 @@ ENTRY(cpu_in_cksum_buffer)
|
|||
LOAD_DWORD_INTO_R4(r0) /* 1 dword left */
|
||||
.Ladd_one_dword:
|
||||
adcs ip, ip, r4
|
||||
.Ladd_one_word:
|
||||
adcs ip, ip, r5
|
||||
teq r2, r0 /* nothing left? */
|
||||
beq .Lfold /* yep, proceed to hold */
|
||||
|
@ -107,13 +108,14 @@ ENTRY(cpu_in_cksum_buffer)
|
|||
bne 4b /* yep, do 64 at time */
|
||||
#endif
|
||||
bics r3, r1, #7 /* at least 8 bytes left? */
|
||||
bge 3b /* yep, do them */
|
||||
bne 3b /* yep, do them */
|
||||
|
||||
.Lfinal_dword:
|
||||
tst r1, #4 /* more than one word more left? */
|
||||
sub r3, r1, #1 /* 0-3 = 1 word, 4-7 = 2 words */
|
||||
tst r3, #4 /* more than one word more left? */
|
||||
moveq r4, #0 /* no, just use zero */
|
||||
ldrne r4, [r0], #4 /* yes, load first word */
|
||||
ldr r5, [r0] /* load last word */
|
||||
ldreq r5, [r0] /* no, load last word */
|
||||
ldmneia r0, {r4-r5} /* yes, load last dword */
|
||||
.Lfinal_dword_noload:
|
||||
rsb r1, r1, #4 /* find out many bytes to discard */
|
||||
#ifdef __ARMEL__
|
||||
|
@ -127,7 +129,9 @@ ENTRY(cpu_in_cksum_buffer)
|
|||
tst r1, #1 /* discard odd? */
|
||||
bicne r5, r5, #0x000000ff /* yes, discard odd byte */
|
||||
#endif
|
||||
adds ip, ip, r4 /* add 1st to accumulator */
|
||||
.Lfinal_add_one_dword:
|
||||
adcs ip, ip, r4 /* add 1st to accumulator */
|
||||
.Lfinal_add_one_word:
|
||||
adcs ip, ip, r5 /* add 2nd to accumulator */
|
||||
|
||||
/*
|
||||
|
@ -143,33 +147,58 @@ ENTRY(cpu_in_cksum_buffer)
|
|||
#include "cpu_in_cksum_fold.S"
|
||||
|
||||
.Ldword_misaligned:
|
||||
tst r0, #3 /* are at least word aligned? */
|
||||
bne .Lword_misaligned /* no, do it the hard way */
|
||||
ldr r5, [r0], #4 /* load word here in case of partial */
|
||||
sub r1, r1, #4 /* subtract length of one word */
|
||||
teq r1, #0 /* what is length? */
|
||||
beq .Lfinal_add_one_word /* = 0? just do the final add */
|
||||
addgt r2, r1, r0 /* > 0? point r2 just past end */
|
||||
bgt .Ladd_one_word /* > 0? accumulate it and loop */
|
||||
mov r4, #0 /* < 0? zero this */
|
||||
b .Lfinal_dword_noload /* < 0? handle final partial dword */
|
||||
|
||||
.Lword_misaligned:
|
||||
tst r0, #4 /* do we load 1 or 2 words? */
|
||||
bic r0, r0, #3 /* force word alignment */
|
||||
add r1, r1, r2 /* add misalignment to length */
|
||||
tst r2, #4 /* first */
|
||||
ldr r4, [r0], #4 /* load first word */
|
||||
add r1, r1, r2 /* add initial offset to length */
|
||||
sub r1, r1, #8 /* subtract length of one dword */
|
||||
ldmeqia r0!, {r4-r5} /* load first dword */
|
||||
ldrne r4, [r0], #4 /* load first word */
|
||||
movne r5, #0 /* no second word */
|
||||
ldreq r5, [r0], #4 /* load second word */
|
||||
/*
|
||||
* We are now dword aligned.
|
||||
*/
|
||||
#ifdef __ARMEL__
|
||||
tst r2, #2 /* discard at least 2? */
|
||||
movne r4, r4, lsr #16 /* yes, discard lower halfword */
|
||||
tst r2, #1 /* discard odd? */
|
||||
bicne r4, r4, #0x0000ff00 /* yes, discard odd byte */
|
||||
tst r2, #1 /* start odd? */
|
||||
bicne r4, r4, #0x000000ff /* yes, discard even byte */
|
||||
#else
|
||||
tst r2, #2 /* discard at least 2? */
|
||||
movne r4, r4, lsl #16 /* yes, discard upper halfword */
|
||||
tst r2, #1 /* discard odd? */
|
||||
bicne r4, r4, #0x00ff0000 /* yes, discard odd byte */
|
||||
tst r2, #1 /* start odd? */
|
||||
bicne r4, r4, #0xff000000 /* yes, discard even byte */
|
||||
#endif
|
||||
/*
|
||||
* Since we started on an odd boundary, set up our stack frame so we
|
||||
* fixup the return value to be byteswapped.
|
||||
*/
|
||||
ldrne r3, [sp, #4] /* pop r5 */
|
||||
strne r3, [sp, #-4]! /* push it again */
|
||||
ldrne r3, [sp, #4] /* pop r4 */
|
||||
strne r3, [sp, #-4]! /* push it again */
|
||||
strne lr, [sp, #8] /* save our return address */
|
||||
adrne lr, .Lmisaligned_fixup /* use new to fixup the return value */
|
||||
/*
|
||||
* See if we have a least a full dword to process. If we do, jump
|
||||
* into the main loop as if we just load a single dword.
|
||||
*/
|
||||
bics r3, r1, #7 /* at least one dword? */
|
||||
addne r2, r1, r0 /* yes, point r2 just past end */
|
||||
bne .Ladd_one_dword /* yes, accumulate it and loop */
|
||||
teq r1, #0 /* what is length? */
|
||||
beq .Lfinal_add_one_word /* = 0? just do the final add */
|
||||
addgt r2, r1, r0 /* > 0? point r2 just past end */
|
||||
bgt .Ladd_one_dword /* > 0? accumulate it and loop */
|
||||
|
||||
/*
|
||||
* Not a full dword so do the final dword processing to find out
|
||||
* bytes to discard. If we only loaded one word, move it to 2nd
|
||||
|
@ -177,7 +206,23 @@ ENTRY(cpu_in_cksum_buffer)
|
|||
* clear the 1st word.
|
||||
*/
|
||||
tst r2, #4 /* one or two words? */
|
||||
movne r5, r4 /* one, move 1st word to 2nd word */
|
||||
movne r4, #0 /* and clear 1st word */
|
||||
moveq r5, r4 /* one, move 1st word to 2nd word */
|
||||
moveq r4, #0 /* and clear 1st word */
|
||||
b .Lfinal_dword_noload /* handle final dword */
|
||||
|
||||
/*
|
||||
* If we had an odd address, we have byte swap the return value.
|
||||
* instead of testing everywhere, we inserted a fake callframe and
|
||||
* set LR to return to do the fixup and return to the caller.
|
||||
*/
|
||||
.Lmisaligned_fixup:
|
||||
ldr lr, [sp], #8 /* fetch saved LR */
|
||||
#ifdef _ARM_ARCH_6
|
||||
rev16 r0, r0 /* byte swap */
|
||||
#else
|
||||
mov r0, r0, r0, ror #8 /* move 0:7 to 24:31 and 8:15 to 0:7 */
|
||||
orr r0, r0, r0, lsl #16 /* move 0:7 to 16:23 */
|
||||
mov r0, r0, r0, lsr #16 /* clear 16:31 to 0:15 */
|
||||
#endif
|
||||
RET
|
||||
END(cpu_in_cksum_buffer)
|
||||
|
|
Loading…
Reference in New Issue