diff --git a/sys/dev/tc/if_le_ioasic.c b/sys/dev/tc/if_le_ioasic.c index f90ee89c4105..f40fe8746e8a 100644 --- a/sys/dev/tc/if_le_ioasic.c +++ b/sys/dev/tc/if_le_ioasic.c @@ -1,4 +1,4 @@ -/* $NetBSD: if_le_ioasic.c,v 1.7 1997/07/22 03:44:30 jonathan Exp $ */ +/* $NetBSD: if_le_ioasic.c,v 1.8 1997/08/26 01:27:12 jonathan Exp $ */ /* * Copyright (c) 1996 Carnegie-Mellon University. @@ -204,19 +204,79 @@ le_ioasic_copytobuf_gap16(sc, fromv, boff, len) volatile caddr_t buf = sc->sc_mem; register caddr_t from = fromv; register caddr_t bptr; - register int xfer; bptr = buf + ((boff << 1) & ~0x1f); boff &= 0xf; - xfer = min(len, 16 - boff); - while (len > 0) { + + /* + * Dispose of boff so destination of subsequent copies is + * 16-byte aligned. + */ + if (boff) { + register int xfer; + xfer = min(len, 16 - boff); bcopy(from, bptr + boff, xfer); from += xfer; bptr += 32; - boff = 0; len -= xfer; - xfer = min(len, 16); } + + /* Destination of copies is now 16-byte aligned. */ + if (len >= 16) + switch ((u_long)from & (sizeof(u_int32_t) -1)) { + case 2: + /* Ethernet headers make this the dominant case. */ + do { + register u_int32_t *dst = (u_int32_t*)bptr; + register u_int16_t t0; + register u_int32_t t1, t2, t3, t4; + + /* read from odd-16-bit-aligned, cached src */ + t0 = *(u_int16_t*)from; + t1 = *(u_int32_t*)(from+2); + t2 = *(u_int32_t*)(from+6); + t3 = *(u_int32_t*)(from+10); + t4 = *(u_int16_t*)(from+14); + + /* DMA buffer is uncached on mips */ + dst[0] = t0 | (t1 << 16); + dst[1] = (t1 >> 16) | (t2 << 16); + dst[2] = (t2 >> 16) | (t3 << 16); + dst[3] = (t3 >> 16) | (t4 << 16); + + from += 16; + bptr += 32; + len -= 16; + } while (len >= 16); + break; + + case 0: + do { + register u_int32_t *src = (u_int32_t*)from; + register u_int32_t *dst = (u_int32_t*)bptr; + register u_int32_t t0, t1, t2, t3; + + t0 = src[0]; t1 = src[1]; t2 = src[2]; t3 = src[3]; + dst[0] = t0; dst[1] = t1; dst[2] = t2; dst[3] = t3; + + from += 16; + bptr += 32; + len -= 16; + } while (len >= 16); + break; + + default: + /* Does odd-aligned case ever happen? */ + do { + bcopy(from, bptr, 16); + from += 16; + bptr += 32; + len -= 16; + } while (len >= 16); + break; + } + if (len) + bcopy(from, bptr, len); } void @@ -228,19 +288,71 @@ le_ioasic_copyfrombuf_gap16(sc, tov, boff, len) volatile caddr_t buf = sc->sc_mem; register caddr_t to = tov; register caddr_t bptr; - register int xfer; bptr = buf + ((boff << 1) & ~0x1f); boff &= 0xf; - xfer = min(len, 16 - boff); - while (len > 0) { - bcopy(bptr + boff, to, xfer); + + /* Dispose of boff. source of copy is subsequently 16-byte aligned. */ + if (boff) { + register int xfer; + xfer = min(len, 16 - boff); + bcopy(bptr+boff, to, xfer); to += xfer; bptr += 32; - boff = 0; len -= xfer; - xfer = min(len, 16); } + if (len >= 16) + switch ((u_long)to & (sizeof(u_int32_t) -1)) { + case 2: + /* + * to is aligned to an odd 16-bit boundary. Ethernet headers + * make this the dominant case (98% or more). + */ + do { + register u_int32_t *src = (u_int32_t*)bptr; + register u_int32_t t0, t1, t2, t3; + + /* read from uncached aligned DMA buf */ + t0 = src[0]; t1 = src[1]; t2 = src[2]; t3 = src[3]; + + /* write to odd-16-bit-word aligned dst */ + *(u_int16_t *) (to+0) = (u_short) t0; + *(u_int32_t *) (to+2) = (t0 >> 16) | (t1 << 16); + *(u_int32_t *) (to+6) = (t1 >> 16) | (t2 << 16); + *(u_int32_t *) (to+10) = (t2 >> 16) | (t3 << 16); + *(u_int16_t *) (to+14) = (t3 >> 16); + bptr += 32; + to += 16; + len -= 16; + } while (len > 16); + break; + case 0: + /* 32-bit aligned aligned copy. Rare. */ + do { + register u_int32_t *src = (u_int32_t*)bptr; + register u_int32_t *dst = (u_int32_t*)to; + register u_int32_t t0, t1, t2, t3; + + t0 = src[0]; t1 = src[1]; t2 = src[2]; t3 = src[3]; + dst[0] = t0; dst[1] = t1; dst[2] = t2; dst[3] = t3; + to += 16; + bptr += 32; + len -= 16; + } while (len > 16); + break; + + /* XXX Does odd-byte-aligned case ever happen? */ + default: + do { + bcopy(bptr, to, 16); + to += 16; + bptr += 32; + len -= 16; + } while (len > 16); + break; + } + if (len) + bcopy(bptr, to, len); } void