diff --git a/sys/arch/sh5/conf/files.sh5 b/sys/arch/sh5/conf/files.sh5 index 242d72ff7b48..06225905ff2d 100644 --- a/sys/arch/sh5/conf/files.sh5 +++ b/sys/arch/sh5/conf/files.sh5 @@ -1,4 +1,4 @@ -# $NetBSD: files.sh5,v 1.9 2002/10/14 14:13:27 scw Exp $ +# $NetBSD: files.sh5,v 1.10 2002/10/19 09:01:44 scw Exp $ # @@ -146,8 +146,7 @@ file dev/cninit.c # the two architectures. file arch/sh3/sh3/disksubr.c -file arch/sh5/sh5/in_cksum.c inet -file netinet/in4_cksum.c inet +file arch/sh5/sh5/in_cksum.S inet file netns/ns_cksum.c ns defflag opt_sh5_debug.h SH5_SIM SH5_DEBUG_ST50 diff --git a/sys/arch/sh5/sh5/genassym.cf b/sys/arch/sh5/sh5/genassym.cf index d7efe725297f..3cb5f32d3127 100644 --- a/sys/arch/sh5/sh5/genassym.cf +++ b/sys/arch/sh5/sh5/genassym.cf @@ -1,4 +1,4 @@ -# $NetBSD: genassym.cf,v 1.11 2002/10/12 11:39:54 scw Exp $ +# $NetBSD: genassym.cf,v 1.12 2002/10/19 09:01:45 scw Exp $ # Copyright 2002 Wasabi Systems, Inc. # All rights reserved. @@ -38,8 +38,14 @@ include "opt_kernel_ipt.h" include include include +include include include +include +include +include +include +include include @@ -363,3 +369,12 @@ define SYS_exit SYS_exit define MR_START offsetof(struct mem_region, mr_start) define MR_SIZE offsetof(struct mem_region, mr_size) define SIZEOF_MEM_REGION sizeof(struct mem_region) + +# Constants required for in_cksum() and friends. +define M_LEN offsetof(struct mbuf, m_len) +define M_DATA offsetof(struct mbuf, m_data) +define M_NEXT offsetof(struct mbuf, m_next) +define IP_SRC offsetof(struct ip, ip_src) +define IP_DST offsetof(struct ip, ip_dst) +define IP6_SRC offsetof(struct ip6_hdr, ip6_src) +define IP6_DST offsetof(struct ip6_hdr, ip6_dst) diff --git a/sys/arch/sh5/sh5/in_cksum.S b/sys/arch/sh5/sh5/in_cksum.S new file mode 100644 index 000000000000..02931364736e --- /dev/null +++ b/sys/arch/sh5/sh5/in_cksum.S @@ -0,0 +1,352 @@ +/* $NetBSD: in_cksum.S,v 1.1 2002/10/19 09:01:45 scw Exp $ */ + +/* + * Copyright 2002 Wasabi Systems, Inc. + * All rights reserved. + * + * Written by Steve C. Woodford for Wasabi Systems, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed for the NetBSD Project by + * Wasabi Systems, Inc. + * 4. The name of Wasabi Systems, Inc. may not be used to endorse + * or promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * in_cksum() and in4_cksum() implementations for SH5. + * + * The SH5 does not have a carry flag, which complicates matters somewhat. + * On the plus side, misaligned buffers are a piece of cake to deal with + * thanks to the ldlo.q and ldhi.q instructions. + */ + +#include "opt_inet.h" + +#include +#include "assym.h" + +/* + * Add With Carry two quads, "q1" and "q2. Put the result in "result". + * + * Trashes r0 and q1. + */ +#define ADDC(q1, q2, result) \ + add q2, q1, r0 ;\ + or q2, q1, q1 ;\ + cmpgtu q1, r0, q1 ;\ + add r0, q1, result + +/* + * Reduce the quad in "q" to a 32-bit sum, dealing with any + * resulting carry. Put the result in "result". + * + * Trashes r0 and q. + */ +#define REDUCE32(q, result) \ + mshflo.l q, r63, r0 ;\ + mshfhi.l q, r63, q ;\ + add r0, q, q ;\ + mshflo.l q, r63, r0 ;\ + mshfhi.l q, r63, q ;\ + add r0, q, result + +/* + * Reduce the 32-bit int in "i" to a 16-bit sum, dealing with any + * resulting carry. Put the result in "result". + * + * Trashes r0 and i. + */ +#define REDUCE16(i, result) \ + mshflo.w i, r63, r0 ;\ + shlri i, 16, i ;\ + add r0, i, i ;\ + mshflo.w i, r63, r0 ;\ + shlri i, 16, i ;\ + add r0, i, result + +/* + * Entry parameters: + * + * r3 Buffer length + * r4 Pointer to buffer + * r17 Must be set to 0x1f + * r18 Return address + * + * Returns: + * + * r7 Accumulated sum as two pairs of "carry:sum" words. + * + * Trashes: + * r0, r1, r3, r4, r19, r20, r21, r22 + * tr0, tr1, tr2 + */ + +Lcksumdata: + movi 0, r7 + ld.b r4, 0, r63 /* Pre-fetch the start of the buffer */ + ptabs/u r18, tr0 + pta/u Lend_game, tr1 + + /* + * We first have to quad-align the buffer. + * + * XXX: We may have to shift the result of the following "ldlo.q" + * depending on the buffer alignment, particularly for odd addresses, + * in the same way as we do for the "ldhi.q" in Lend_game. + */ + xori r4, 0x7, r0 + andi r0, 0x7, r0 + addi r0, 1, r0 /* r0 == # bytes to next quad */ + bgtu/u r0, r3, tr1 /* Not enough bytes left to make it */ + ldlo.q r4, 0, r19 /* Fetch 1 to 4 words */ + add r4, r0, r4 /* r4 is now quad-aligned */ + sub r3, r0, r3 /* Update remaining length */ + ADDC (r19, r7, r7) /* Accumulate the words we just read */ + beq/u r3, r63, tr0 /* Return to caller if done */ + + /* + * Buffer is now quad-aligned. + * We now need to align it to a 32-byte boundary. + */ + and r4, r17, r1 + xor r1, r17, r0 + addi r0, 1, r0 /* r0 == # bytes to 32-byte boundary */ + bgtu/u r0, r3, tr1 /* Jump if not enough left to align */ + add r4, r0, r4 /* Update buffer pointer */ + sub r3, r0, r3 /* Update remaining bytes */ + shlri r1, 1, r1 /* Compute loop entry-point in order */ + addi r1, 17, r1 /* align buffer to 32-byte boundaey */ + movi 0, r19 + movi 0, r20 + movi 0, r21 + pta/u Lbig_loop, tr2 + ptrel/l r1, tr1 + blink tr1, r63 /* Go for it. */ + + /* + * At this point: + * + * r0 == 0x00 Enter loop at 1st load. + * r0 == 0x08 Enter loop at 2nd load. + * r0 == 0x10 Enter loop at 3rd load. + * r0 == 0x18 Enter loop at 4th load. + * + * r3 == # of bytes remaining, AFTER loop entry. + * r4 -> *next* 32-byte aligned chunk of buffer. + * + * The "big_loop" checksums 16 words at a time. + */ + +Lbig_loop: + addi r4, 32, r4 + addi r3, -32, r3 + ld.q r4, -32, r19 + ld.q r4, -24, r20 + ld.q r4, -16, r21 + ld.q r4, -8, r22 + ld.q r4, 0, r63 /* Pre-fetch next chunk */ + ADDC (r19, r7, r7) + ADDC (r20, r7, r7) + ADDC (r21, r7, r7) + ADDC (r22, r7, r7) + bgt/l r3, r17, tr2 + + /* + * There are less than 32-bytes left. + */ +Lend_game: + beq/u r3, r63, tr0 /* Exit if all done */ + add r3, r4, r0 + andi r3, 0x18, r3 + add r4, r3, r4 + xor r3, r17, r3 + shlri r3, 1, r3 + addi r3, 6, r3 + movi 0, r19 + movi 0, r20 + movi 0, r21 + ptrel/l r3, tr1 + blink tr1, r63 + ld.q r4, -24, r19 + ld.q r4, -16, r20 + ld.q r4, -8, r21 + ldhi.q r0, -1, r22 /* The last quad needs special care */ + xori r0, 7, r0 /* to deal with mis-alignment, and */ + addi r0, 1, r0 /* to ensure we don't include any */ + andi r0, 7, r0 /* bytes past the end of the buffer */ + cmveq r0, r0, r22 + shlli r0, 3, r0 +#ifdef __LITTLE_ENDIAN__ + shlrd r22, r0, r22 +#else + shlld r22, r0, r22 +#endif + ADDC (r19, r7, r7) + ADDC (r20, r7, r7) + ADDC (r21, r7, r7) + ADDC (r22, r7, r7) + blink tr0, r63 + + +/* + * int in_cksum(struct mbuf *m, int len) + */ +ENTRY(in_cksum) +#ifndef _LP64 + add.l r2, r63, r5 +#else + add r2, r63, r5 +#endif + addz.l r3, r63, r6 + movi 0, r2 + pta/l Lcksum_entry, tr2 + + /* + * r2 == current sum + * r5 == m + * r6 == len + */ +Lcksum_top: + pta/u Lcksumdata, tr4 + pta/u Lcksum_loop, tr3 + movi 0, r7 + movi 0, r8 + movi 0x1f, r17 + add r18, r63, r23 + blink tr2, r63 + +Lcksum_loop: + ld.l r5, M_LEN, r3 + LDPTR r5, M_DATA, r4 + LDPTR r5, M_NEXT, r5 +Lcksum_entry4: + cmpgt r3, r6, r0 + cmvne r0, r6, r3 + sub r6, r3, r6 + xor r8, r4, r0 + add r8, r3, r8 + andi r0, 1, r0 + shlli r0, 3, r9 + blink tr4, r18 + REDUCE32(r7, r7) + shlld r7, r9, r7 + add r7, r2, r2 +Lcksum_entry: + pta/u 4f, tr0 + beq/u r6, r63, tr0 + bne/l r5, r63, tr3 + + + /* + * XXX: Do we need to take account of odd final r8? + */ +4: REDUCE32(r2, r2) + REDUCE16(r2, r2) + ptabs/l r23, tr0 + xori r2, -1, r2 + shlli r2, 48, r2 + shlri r2, 48, r2 + blink tr0, r63 + + +#ifdef INET +/* + * int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len) + */ +ENTRY(in4_cksum) + addz.l r5, r63, r6 +#ifndef _LP64 + add.l r2, r63, r5 +#else + add r2, r63, r5 +#endif + shlli r3, 56, r3 + shlri r3, 56, r2 + addz.l r4, r63, r8 + + /* + * r2 == nxt (sum) + * r5 == m + * r6 == len + * r8 == off + */ + + /* + * First, deal with a pseudo header, if present + */ + pta/l Lno_pseudo, tr0 + beq/l r2, r63, tr0 /* Jump if no pseudo header */ + LDPTR r5, M_DATA, r4 + add r2, r6, r2 /* sum += len */ +#ifdef __LITTLE_ENDIAN__ + shlli r2, 8, r2 /* sum = htons(sum) */ +#endif + ldlo.q r4, IP_SRC, r19 /* Note: Assumes ip_src/ip_dst are */ + ldhi.q r4, IP_SRC+7, r20 /* contiguous in memory */ + or r19, r20, r19 + ADDC (r19, r2, r2) /* sum += ip->ip_{src,dst} */ +#endif /* INET */ + + +#if defined(INET) || defined(INET6) +Lno_pseudo: + pta/u Lskip_loop, tr0 + pta/l Lskip_entry, tr1 + pta/u Lskip_done, tr2 + blink tr1, r63 + +Lskip_loop: + ld.l r5, M_LEN, r3 + LDPTR r5, M_DATA, r4 + sub r8, r3, r8 /* off -= m->m_len */ + LDPTR r5, M_NEXT, r5 +Lskip_entry: + bgt/l r63, r8, tr2 /* Break loop if off < 0 */ + bne/l r5, r63, tr0 /* Go back until out of mbufs */ + + /* + * Well wha' d'ya know, wan out of widdle piggies... + */ + pta/l Lout_of_mbufs, tr0 + blink tr0, r63 + +Lskip_done: + add r8, r4, r4 + xori r8, -1, r8 + add r3, r4, r4 + addi r8, 1, r3 + pta/l Lcksum_entry4, tr2 + pta/l Lcksum_top, tr0 + blink tr0, r63 + +Lout_of_mbufs: + LEA(Lmbuf_msg, r2) + pta/l _C_LABEL(printf), tr0 + blink tr0, r63 + +Lmbuf_msg: + .asciz "cksum: out of mbufs\n" +#endif /* INET || INET6 */ diff --git a/sys/arch/sh5/sh5/in_cksum.c b/sys/arch/sh5/sh5/in_cksum.c deleted file mode 100644 index ac642c8ac4a6..000000000000 --- a/sys/arch/sh5/sh5/in_cksum.c +++ /dev/null @@ -1,243 +0,0 @@ -/* $NetBSD: in_cksum.c,v 1.1 2002/07/05 13:32:05 scw Exp $ */ - -/* - * XXX: This MUST be replaced with an assembly version. SH-5's "Multi- - * Media" instruction set has some features which will make these - * checksum routines very fast indeed. - */ - -/* - * Copyright (c) 1988, 1992, 1993 - * The Regents of the University of California. All rights reserved. - * Copyright (c) 1996 - * Matt Thomas - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93 - */ - -#include /* RCS ID & Copyright macro defns */ - -__KERNEL_RCSID(0, "$NetBSD: in_cksum.c,v 1.1 2002/07/05 13:32:05 scw Exp $"); - -#include -#include -#include -#include -#include -#include -#include - -/* - * Checksum routine for Internet Protocol family headers - * (Based on Portable Alpha version). - * - * This routine is very heavily used in the network - * code and should be modified for each CPU to be as fast as possible. - */ - -#define ADDCARRY(x) (x > 65535 ? x -= 65535 : x) -#define REDUCE32 \ - { \ - q_util.q = sum; \ - sum = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \ - } -#define REDUCE16 \ - { \ - q_util.q = sum; \ - l_util.l = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \ - sum = l_util.s[0] + l_util.s[1]; \ - ADDCARRY(sum); \ - } - -static const u_int32_t in_masks[] = { - /*0 bytes*/ /*1 byte*/ /*2 bytes*/ /*3 bytes*/ - 0x00000000, 0x000000FF, 0x0000FFFF, 0x00FFFFFF, /* offset 0 */ - 0x00000000, 0x0000FF00, 0x00FFFF00, 0xFFFFFF00, /* offset 1 */ - 0x00000000, 0x00FF0000, 0xFFFF0000, 0xFFFF0000, /* offset 2 */ - 0x00000000, 0xFF000000, 0xFF000000, 0xFF000000, /* offset 3 */ -}; - -union l_util { - u_int16_t s[2]; - u_int32_t l; -}; -union q_util { - u_int16_t s[4]; - u_int32_t l[2]; - u_int64_t q; -}; - -static u_int64_t -in_cksumdata(register caddr_t buf, register int len) -{ - const u_int32_t *lw = (u_int32_t *) buf; - u_int64_t sum = 0; - u_int64_t prefilled; - int offset; - union q_util q_util; - - if ((3 & (uintptr_t) lw) == 0 && len == 20) { - sum = (u_int64_t) lw[0] + lw[1] + lw[2] + lw[3] + lw[4]; - REDUCE32; - return sum; - } - - if ((offset = 3 & (uintptr_t) lw) != 0) { - const u_int32_t *masks = in_masks + (offset << 2); - lw = (u_int32_t *) (((uintptr_t) lw) - offset); - sum = *lw++ & masks[len >= 3 ? 3 : len]; - len -= 4 - offset; - if (len <= 0) { - REDUCE32; - return sum; - } - } - - /* - * access prefilling to start load of next cache line. - * then add current cache line - * save result of prefilling for loop iteration. - */ - prefilled = lw[0]; - while ((len -= 32) >= 4) { - u_int64_t prefilling = lw[8]; - sum += prefilled + lw[1] + lw[2] + lw[3] - + lw[4] + lw[5] + lw[6] + lw[7]; - lw += 8; - prefilled = prefilling; - } - if (len >= 0) { - sum += prefilled + lw[1] + lw[2] + lw[3] - + lw[4] + lw[5] + lw[6] + lw[7]; - lw += 8; - } else { - len += 32; - } - while ((len -= 16) >= 0) { - sum += (u_int64_t) lw[0] + lw[1] + lw[2] + lw[3]; - lw += 4; - } - len += 16; - while ((len -= 4) >= 0) { - sum += (u_int64_t) *lw++; - } - len += 4; - if (len > 0) - sum += (u_int64_t) (in_masks[len] & *lw); - REDUCE32; - return sum; -} - -int -in_cksum(register struct mbuf *m, register int len) -{ - register u_int64_t sum = 0; - register int mlen = 0; - register int clen = 0; - register caddr_t addr; - union q_util q_util; - union l_util l_util; - - for (; m && len; m = m->m_next) { - if (m->m_len == 0) - continue; - mlen = m->m_len; - if (len < mlen) - mlen = len; - addr = mtod(m, caddr_t); - if ((clen ^ (uintptr_t) addr) & 1) - sum += in_cksumdata(addr, mlen) << 8; - else - sum += in_cksumdata(addr, mlen); - - clen += mlen; - len -= mlen; - } - REDUCE16; - return (~sum & 0xffff); -} - -#if 0 -int -in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len) -{ - register u_int64_t sum = 0; - register int mlen = 0; - register int clen = 0; - register caddr_t addr; - union q_util q_util; - union l_util l_util; - struct ipovly ipov; - - if (nxt != 0) { - /* pseudo header */ - if (off < sizeof(struct ipovly)) - panic("in4_cksum: offset too short"); - if (m->m_len < sizeof(struct ip)) - panic("in4_cksum: bad mbuf chain"); - - memset(&ipov, 0, sizeof(ipov)); - - ipov.ih_len = htons(len); - ipov.ih_pr = nxt; - ipov.ih_src = mtod(m, struct ip *)->ip_src; - ipov.ih_dst = mtod(m, struct ip *)->ip_dst; - - sum += in_cksumdata((caddr_t) &ipov, sizeof(ipov)); - } - - /* skip over unnecessary part */ - while (m != NULL && off > 0) { - if (m->m_len > off) - break; - off -= m->m_len; - m = m->m_next; - } - - for (; m && len; m = m->m_next, off = 0) { - if (m->m_len == 0) - continue; - mlen = m->m_len - off; - if (len < mlen) - mlen = len; - addr = mtod(m, caddr_t) + off; - if ((clen ^ (u_int64_t) addr) & 1) - sum += in_cksumdata(addr, mlen) << 8; - else - sum += in_cksumdata(addr, mlen); - - clen += mlen; - len -= mlen; - } - REDUCE16; - return (~sum & 0xffff); -} -#endif