Fix the asm macros

The tmpx registers are now outputs, this makes them all unique.
Add the fact that cc is changed by the asm (not believed to be used but rather be correct)
Correctly specify w as an input and output register, I think this was hiding the bug below!
Allow sum to be in a different input and output register.

Correct bug in psuedo header handling for in4_cksum.  Seems that the new macros turned up a latent bug in the psuedo header handling, the code was moving a pointer forward 16 bytes twice, not found before as the ADD16 macro wasn't 100% accurate, as it didn't output w, even though it modified it.
This commit is contained in:
chris 2001-12-08 21:18:50 +00:00
parent b1554100e7
commit ce689bde4d
1 changed files with 51 additions and 47 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: in_cksum_arm.c,v 1.2 2001/05/23 19:33:48 chris Exp $ */ /* $NetBSD: in_cksum_arm.c,v 1.3 2001/12/08 21:18:50 chris Exp $ */
/* /*
* ARM version: * ARM version:
@ -67,54 +67,59 @@
*/ */
#define ADD64 __asm __volatile(" \n\ #define ADD64 __asm __volatile(" \n\
ldmia %2!, {%3, %4, %5, %6} \n\ ldmia %0!, {%2, %3, %4, %5} \n\
adds %0,%0,%3; adcs %0,%0,%4 \n\ adds %1,%7,%2; adcs %1,%1,%3 \n\
adcs %0,%0,%5; adcs %0,%0,%6 \n\ adcs %1,%1,%4; adcs %1,%1,%5 \n\
ldmia %2!, {%3, %4, %5, %6} \n\ ldmia %0!, {%2, %3, %4, %5} \n\
adcs %0,%0,%3; adcs %0,%0,%4 \n\ adcs %1,%1,%2; adcs %1,%1,%3 \n\
adcs %0,%0,%5; adcs %0,%0,%6 \n\ adcs %1,%1,%4; adcs %1,%1,%5 \n\
ldmia %2!, {%3, %4, %5, %6} \n\ ldmia %0!, {%2, %3, %4, %5} \n\
adcs %0,%0,%3; adcs %0,%0,%4 \n\ adcs %1,%1,%2; adcs %1,%1,%3 \n\
adcs %0,%0,%5; adcs %0,%0,%6 \n\ adcs %1,%1,%4; adcs %1,%1,%5 \n\
ldmia %2!, {%3, %4, %5, %6} \n\ ldmia %0!, {%2, %3, %4, %5} \n\
adcs %0,%0,%3; adcs %0,%0,%4 \n\ adcs %1,%1,%2; adcs %1,%1,%3 \n\
adcs %0,%0,%5; adcs %0,%0,%6 \n\ adcs %1,%1,%4; adcs %1,%1,%5 \n\
adcs %0,%0,#0\n" \ adcs %1,%1,#0\n" \
: "=r" (sum) \ : "=r" (w), "=r" (sum), "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4) \
: "0" (sum), "r" (w), "r" (tmp1), "r" (tmp2), "r" (tmp3), "r" (tmp4)) : "0" (w), "r" (sum) \
: "cc")
#define ADD32 __asm __volatile(" \n\ #define ADD32 __asm __volatile(" \n\
ldmia %2!, {%3, %4, %5, %6} \n\ ldmia %0!, {%2, %3, %4, %5} \n\
adds %0,%0,%3; adcs %0,%0,%4 \n\ adds %1,%7,%2; adcs %1,%1,%3 \n\
adcs %0,%0,%5; adcs %0,%0,%6 \n\ adcs %1,%1,%4; adcs %1,%1,%5 \n\
ldmia %2!, {%3, %4, %5, %6} \n\ ldmia %0!, {%2, %3, %4, %5} \n\
adcs %0,%0,%3; adcs %0,%0,%4 \n\ adcs %1,%1,%2; adcs %1,%1,%3 \n\
adcs %0,%0,%5; adcs %0,%0,%6 \n\ adcs %1,%1,%4; adcs %1,%1,%5 \n\
adcs %0,%0,#0\n" \ adcs %1,%1,#0\n" \
: "=r" (sum) \ : "=r" (w), "=r" (sum), "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4) \
: "0" (sum), "r" (w), "r" (tmp1), "r" (tmp2), "r" (tmp3), "r" (tmp4)) : "0" (w), "r" (sum) \
: "cc")
#define ADD16 __asm __volatile(" \n\ #define ADD16 __asm __volatile(" \n\
ldmia %2!, {%3, %4, %5, %6} \n\ ldmia %0!, {%2, %3, %4, %5} \n\
adds %0,%0,%3; adcs %0,%0,%4 \n\ adds %1,%7,%2; adcs %1,%1,%3 \n\
adcs %0,%0,%5; adcs %0,%0,%6 \n\ adcs %1,%1,%4; adcs %1,%1,%5 \n\
adcs %0,%0,#0\n" \ adcs %1,%1,#0\n" \
: "=r" (sum) \ : "=r" (w), "=r" (sum), "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4) \
: "0" (sum), "r" (w), "r" (tmp1), "r" (tmp2), "r" (tmp3), "r" (tmp4)) : "0" (w), "r" (sum) \
: "cc")
#define ADD8 __asm __volatile(" \n\ #define ADD8 __asm __volatile(" \n\
ldmia %2!, {%3, %4} \n\ ldmia %0!, {%2, %3} \n\
adds %0,%0,%3; adcs %0,%0,%4 \n\ adds %1,%5,%2; adcs %1,%1,%3 \n\
adcs %0,%0,#0\n" \ adcs %1,%1,#0\n" \
: "=r" (sum) \ : "=r" (w), "=r" (sum), "=&r" (tmp1), "=&r" (tmp2) \
: "0" (sum), "r" (w), "r" (tmp1), "r" (tmp2)) : "0" (w), "r" (sum) \
: "cc" )
#define ADD4 __asm __volatile(" \n\ #define ADD4 __asm __volatile(" \n\
ldr %3,[%2],#4 \n\ ldr %2,[%0],#4 \n\
adds %0,%0,%3 \n\ adds %1,%4,%2 \n\
adcs %0,%0,#0\n" \ adcs %1,%1,#0\n" \
: "=r" (sum) \ : "=r" (w), "=r" (sum), "=&r" (tmp1) \
: "0" (sum), "r" (w), "r" (tmp1)) : "0" (w), "r" (sum) \
: "cc")
/*#define REDUCE {sum = (sum & 0xffff) + (sum >> 16);}*/ /*#define REDUCE {sum = (sum & 0xffff) + (sum >> 16);}*/
#define REDUCE __asm __volatile(" \n\ #define REDUCE __asm __volatile(" \n\
@ -144,7 +149,7 @@ in_cksum_internal(struct mbuf *m, int off, int len, u_int sum)
* allow the compiler to pick which specific machine registers to * allow the compiler to pick which specific machine registers to
* use, instead of hard-coding this in the asm code above. * use, instead of hard-coding this in the asm code above.
*/ */
u_int tmp1, tmp2, tmp3, tmp4; register u_int tmp1, tmp2, tmp3, tmp4;
for (; m && len; m = m->m_next) { for (; m && len; m = m->m_next) {
if (m->m_len == 0) if (m->m_len == 0)
@ -235,10 +240,10 @@ in4_cksum(m, nxt, off, len)
int off, len; int off, len;
{ {
u_int sum = 0; u_int sum = 0;
/* for ADD macros */
u_int tmp1, tmp2, tmp3, tmp4;
if (nxt != 0) { if (nxt != 0) {
/* for ADD macros */
register u_int tmp1, tmp2, tmp3, tmp4;
u_char *w; u_char *w;
struct ipovly ipov; struct ipovly ipov;
/* pseudo header */ /* pseudo header */
@ -256,7 +261,6 @@ in4_cksum(m, nxt, off, len)
/* assumes sizeof(ipov) == 20 */ /* assumes sizeof(ipov) == 20 */
ADD16; ADD16;
w += 16;
ADD4; ADD4;
} }
/* skip unnecessary part */ /* skip unnecessary part */