Fast write_region_2, which is just the fast read_region_2 with different
inner loops.
This commit is contained in:
parent
ca8f51da8c
commit
d98bf2904c
@ -1,4 +1,4 @@
|
||||
/* $NetBSD: bus_asm.S,v 1.3 2006/10/03 22:27:02 bjh21 Exp $ */
|
||||
/* $NetBSD: bus_asm.S,v 1.4 2006/10/03 23:15:18 bjh21 Exp $ */
|
||||
|
||||
/*
|
||||
* Copyright (c) 2006 Ben Harris
|
||||
@ -303,27 +303,72 @@ ENTRY(iobus_bs_wr_1)
|
||||
|
||||
ENTRY(iobus_bs_wr_2)
|
||||
mov ip, sp
|
||||
stmfd sp!, {r4, fp, ip, lr, pc}
|
||||
stmfd sp!, {r4-r10, fp, ip, lr, pc}
|
||||
sub fp, ip, #4
|
||||
add r12, r1, r2, lsl r0
|
||||
add r8, r1, r2, lsl r0
|
||||
mov r9, r3
|
||||
mov r2, #1
|
||||
mov r0, r2, lsl r0
|
||||
ldr r2, [sp, #20]
|
||||
ldr r10, [sp, #44]
|
||||
|
||||
teq r0, #4 /* Is the step 4? */
|
||||
movne r7, r10 /* If not, do the whole lot slowly. */
|
||||
rsbeq r7, r9, #0 /* Otherwise, go slowly to a */
|
||||
andeq r7, r7, #15 /* 16-byte boundary. */
|
||||
moveq r7, r7, lsr #1 /* Convert to uint16_ts */
|
||||
sub r10, r10, r7 /* Adjust fast transfer len to match */
|
||||
cmp r10, #8
|
||||
addlt r7, r7, r10 /* If remaining fast xfer is <8 */
|
||||
movlt r10, #0 /* make it zero and correct r7. */
|
||||
|
||||
/* Make sure that we have a positive length */
|
||||
cmp r2, #0x00000000
|
||||
ldmledb fp, {fp, sp, pc}
|
||||
teq r7, #0
|
||||
beq 2f
|
||||
|
||||
1:
|
||||
ldrb r1, [r3], #0x0001
|
||||
ldrb r4, [r3], #1
|
||||
1: ldrb r1, [r9], #0x0001
|
||||
ldrb r4, [r9], #1
|
||||
orr r1, r1, r4, lsl #8
|
||||
orr r1, r1, r1, lsl #16
|
||||
str r1, [r12], r0
|
||||
subs r2, r2, #0x00000001
|
||||
str r1, [r8], r0
|
||||
subs r7, r7, #1
|
||||
bgt 1b
|
||||
|
||||
ldmdb fp, {r4, fp, sp, pc}
|
||||
2: teq r10, #0
|
||||
ldmeqdb fp, {r4-r10, fp, sp, pc}
|
||||
|
||||
/*
|
||||
* Fast write_region_2 code. This is at its best when dealing with
|
||||
* 16-byte-aligned blocks of memory, which is arranged by the code
|
||||
* above.
|
||||
*
|
||||
* The EOR trick goes:
|
||||
* rH = (H)(L)
|
||||
* eor rL, rH, rH, lsl #16 rL = (H^L)(L)
|
||||
* eor rH, rH, rL, lsr #16 rH = (H)(L^H^L) = (H)(H)
|
||||
* eor rL, rL, rH, lsl #16 rL = (H^L^H)(L) = (L)(L)
|
||||
*/
|
||||
sub r10, r10, #7
|
||||
2: ldmia r9!, {r1, r3, r5, r7}
|
||||
subs r10, r10, #8
|
||||
eor r0, r1, r1, lsl #16
|
||||
eor r2, r3, r3, lsl #16
|
||||
eor r4, r5, r5, lsl #16
|
||||
eor r6, r7, r7, lsl #16
|
||||
eor r1, r1, r0, lsr #16
|
||||
eor r3, r3, r2, lsr #16
|
||||
eor r5, r5, r4, lsr #16
|
||||
eor r7, r7, r6, lsr #16
|
||||
eor r0, r0, r1, lsl #16
|
||||
eor r2, r2, r3, lsl #16
|
||||
eor r4, r4, r5, lsl #16
|
||||
eor r6, r6, r7, lsl #16
|
||||
stmia r8!, {r0-r7}
|
||||
bgt 2b
|
||||
adds r7, r10, #7
|
||||
ldmeqdb fp, {r4-r10, fp, sp, pc}
|
||||
mov r0, #4
|
||||
mov r10, #0
|
||||
b 1b
|
||||
|
||||
/*
|
||||
* set multiple
|
||||
|
Loading…
Reference in New Issue
Block a user