Add fast assembler versions of in_cksum() and in4_cksum().
(Well, as fast as can be expected on a cpu with no carry flag)
This commit is contained in:
parent
80b7fbca17
commit
873eee59ff
|
@ -1,4 +1,4 @@
|
|||
# $NetBSD: files.sh5,v 1.9 2002/10/14 14:13:27 scw Exp $
|
||||
# $NetBSD: files.sh5,v 1.10 2002/10/19 09:01:44 scw Exp $
|
||||
|
||||
|
||||
#
|
||||
|
@ -146,8 +146,7 @@ file dev/cninit.c
|
|||
# the two architectures.
|
||||
file arch/sh3/sh3/disksubr.c
|
||||
|
||||
file arch/sh5/sh5/in_cksum.c inet
|
||||
file netinet/in4_cksum.c inet
|
||||
file arch/sh5/sh5/in_cksum.S inet
|
||||
file netns/ns_cksum.c ns
|
||||
|
||||
defflag opt_sh5_debug.h SH5_SIM SH5_DEBUG_ST50
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# $NetBSD: genassym.cf,v 1.11 2002/10/12 11:39:54 scw Exp $
|
||||
# $NetBSD: genassym.cf,v 1.12 2002/10/19 09:01:45 scw Exp $
|
||||
|
||||
# Copyright 2002 Wasabi Systems, Inc.
|
||||
# All rights reserved.
|
||||
|
@ -38,8 +38,14 @@ include "opt_kernel_ipt.h"
|
|||
include <sys/param.h>
|
||||
include <sys/types.h>
|
||||
include <sys/proc.h>
|
||||
include <sys/mbuf.h>
|
||||
include <sys/signal.h>
|
||||
include <sys/syscall.h>
|
||||
include <netinet/in.h>
|
||||
include <netinet/in_systm.h>
|
||||
include <netinet/ip.h>
|
||||
include <netinet/ip6.h>
|
||||
include <netinet/ip_var.h>
|
||||
|
||||
include <uvm/uvm_extern.h>
|
||||
|
||||
|
@ -363,3 +369,12 @@ define SYS_exit SYS_exit
|
|||
define MR_START offsetof(struct mem_region, mr_start)
|
||||
define MR_SIZE offsetof(struct mem_region, mr_size)
|
||||
define SIZEOF_MEM_REGION sizeof(struct mem_region)
|
||||
|
||||
# Constants required for in_cksum() and friends.
|
||||
define M_LEN offsetof(struct mbuf, m_len)
|
||||
define M_DATA offsetof(struct mbuf, m_data)
|
||||
define M_NEXT offsetof(struct mbuf, m_next)
|
||||
define IP_SRC offsetof(struct ip, ip_src)
|
||||
define IP_DST offsetof(struct ip, ip_dst)
|
||||
define IP6_SRC offsetof(struct ip6_hdr, ip6_src)
|
||||
define IP6_DST offsetof(struct ip6_hdr, ip6_dst)
|
||||
|
|
|
@ -0,0 +1,352 @@
|
|||
/* $NetBSD: in_cksum.S,v 1.1 2002/10/19 09:01:45 scw Exp $ */
|
||||
|
||||
/*
|
||||
* Copyright 2002 Wasabi Systems, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Written by Steve C. Woodford for Wasabi Systems, Inc.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed for the NetBSD Project by
|
||||
* Wasabi Systems, Inc.
|
||||
* 4. The name of Wasabi Systems, Inc. may not be used to endorse
|
||||
* or promote products derived from this software without specific prior
|
||||
* written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* in_cksum() and in4_cksum() implementations for SH5.
|
||||
*
|
||||
* The SH5 does not have a carry flag, which complicates matters somewhat.
|
||||
* On the plus side, misaligned buffers are a piece of cake to deal with
|
||||
* thanks to the ldlo.q and ldhi.q instructions.
|
||||
*/
|
||||
|
||||
#include "opt_inet.h"
|
||||
|
||||
#include <machine/asm.h>
|
||||
#include "assym.h"
|
||||
|
||||
/*
|
||||
* Add With Carry two quads, "q1" and "q2. Put the result in "result".
|
||||
*
|
||||
* Trashes r0 and q1.
|
||||
*/
|
||||
#define ADDC(q1, q2, result) \
|
||||
add q2, q1, r0 ;\
|
||||
or q2, q1, q1 ;\
|
||||
cmpgtu q1, r0, q1 ;\
|
||||
add r0, q1, result
|
||||
|
||||
/*
|
||||
* Reduce the quad in "q" to a 32-bit sum, dealing with any
|
||||
* resulting carry. Put the result in "result".
|
||||
*
|
||||
* Trashes r0 and q.
|
||||
*/
|
||||
#define REDUCE32(q, result) \
|
||||
mshflo.l q, r63, r0 ;\
|
||||
mshfhi.l q, r63, q ;\
|
||||
add r0, q, q ;\
|
||||
mshflo.l q, r63, r0 ;\
|
||||
mshfhi.l q, r63, q ;\
|
||||
add r0, q, result
|
||||
|
||||
/*
|
||||
* Reduce the 32-bit int in "i" to a 16-bit sum, dealing with any
|
||||
* resulting carry. Put the result in "result".
|
||||
*
|
||||
* Trashes r0 and i.
|
||||
*/
|
||||
#define REDUCE16(i, result) \
|
||||
mshflo.w i, r63, r0 ;\
|
||||
shlri i, 16, i ;\
|
||||
add r0, i, i ;\
|
||||
mshflo.w i, r63, r0 ;\
|
||||
shlri i, 16, i ;\
|
||||
add r0, i, result
|
||||
|
||||
/*
|
||||
* Entry parameters:
|
||||
*
|
||||
* r3 Buffer length
|
||||
* r4 Pointer to buffer
|
||||
* r17 Must be set to 0x1f
|
||||
* r18 Return address
|
||||
*
|
||||
* Returns:
|
||||
*
|
||||
* r7 Accumulated sum as two pairs of "carry:sum" words.
|
||||
*
|
||||
* Trashes:
|
||||
* r0, r1, r3, r4, r19, r20, r21, r22
|
||||
* tr0, tr1, tr2
|
||||
*/
|
||||
|
||||
Lcksumdata:
|
||||
movi 0, r7
|
||||
ld.b r4, 0, r63 /* Pre-fetch the start of the buffer */
|
||||
ptabs/u r18, tr0
|
||||
pta/u Lend_game, tr1
|
||||
|
||||
/*
|
||||
* We first have to quad-align the buffer.
|
||||
*
|
||||
* XXX: We may have to shift the result of the following "ldlo.q"
|
||||
* depending on the buffer alignment, particularly for odd addresses,
|
||||
* in the same way as we do for the "ldhi.q" in Lend_game.
|
||||
*/
|
||||
xori r4, 0x7, r0
|
||||
andi r0, 0x7, r0
|
||||
addi r0, 1, r0 /* r0 == # bytes to next quad */
|
||||
bgtu/u r0, r3, tr1 /* Not enough bytes left to make it */
|
||||
ldlo.q r4, 0, r19 /* Fetch 1 to 4 words */
|
||||
add r4, r0, r4 /* r4 is now quad-aligned */
|
||||
sub r3, r0, r3 /* Update remaining length */
|
||||
ADDC (r19, r7, r7) /* Accumulate the words we just read */
|
||||
beq/u r3, r63, tr0 /* Return to caller if done */
|
||||
|
||||
/*
|
||||
* Buffer is now quad-aligned.
|
||||
* We now need to align it to a 32-byte boundary.
|
||||
*/
|
||||
and r4, r17, r1
|
||||
xor r1, r17, r0
|
||||
addi r0, 1, r0 /* r0 == # bytes to 32-byte boundary */
|
||||
bgtu/u r0, r3, tr1 /* Jump if not enough left to align */
|
||||
add r4, r0, r4 /* Update buffer pointer */
|
||||
sub r3, r0, r3 /* Update remaining bytes */
|
||||
shlri r1, 1, r1 /* Compute loop entry-point in order */
|
||||
addi r1, 17, r1 /* align buffer to 32-byte boundaey */
|
||||
movi 0, r19
|
||||
movi 0, r20
|
||||
movi 0, r21
|
||||
pta/u Lbig_loop, tr2
|
||||
ptrel/l r1, tr1
|
||||
blink tr1, r63 /* Go for it. */
|
||||
|
||||
/*
|
||||
* At this point:
|
||||
*
|
||||
* r0 == 0x00 Enter loop at 1st load.
|
||||
* r0 == 0x08 Enter loop at 2nd load.
|
||||
* r0 == 0x10 Enter loop at 3rd load.
|
||||
* r0 == 0x18 Enter loop at 4th load.
|
||||
*
|
||||
* r3 == # of bytes remaining, AFTER loop entry.
|
||||
* r4 -> *next* 32-byte aligned chunk of buffer.
|
||||
*
|
||||
* The "big_loop" checksums 16 words at a time.
|
||||
*/
|
||||
|
||||
Lbig_loop:
|
||||
addi r4, 32, r4
|
||||
addi r3, -32, r3
|
||||
ld.q r4, -32, r19
|
||||
ld.q r4, -24, r20
|
||||
ld.q r4, -16, r21
|
||||
ld.q r4, -8, r22
|
||||
ld.q r4, 0, r63 /* Pre-fetch next chunk */
|
||||
ADDC (r19, r7, r7)
|
||||
ADDC (r20, r7, r7)
|
||||
ADDC (r21, r7, r7)
|
||||
ADDC (r22, r7, r7)
|
||||
bgt/l r3, r17, tr2
|
||||
|
||||
/*
|
||||
* There are less than 32-bytes left.
|
||||
*/
|
||||
Lend_game:
|
||||
beq/u r3, r63, tr0 /* Exit if all done */
|
||||
add r3, r4, r0
|
||||
andi r3, 0x18, r3
|
||||
add r4, r3, r4
|
||||
xor r3, r17, r3
|
||||
shlri r3, 1, r3
|
||||
addi r3, 6, r3
|
||||
movi 0, r19
|
||||
movi 0, r20
|
||||
movi 0, r21
|
||||
ptrel/l r3, tr1
|
||||
blink tr1, r63
|
||||
ld.q r4, -24, r19
|
||||
ld.q r4, -16, r20
|
||||
ld.q r4, -8, r21
|
||||
ldhi.q r0, -1, r22 /* The last quad needs special care */
|
||||
xori r0, 7, r0 /* to deal with mis-alignment, and */
|
||||
addi r0, 1, r0 /* to ensure we don't include any */
|
||||
andi r0, 7, r0 /* bytes past the end of the buffer */
|
||||
cmveq r0, r0, r22
|
||||
shlli r0, 3, r0
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
shlrd r22, r0, r22
|
||||
#else
|
||||
shlld r22, r0, r22
|
||||
#endif
|
||||
ADDC (r19, r7, r7)
|
||||
ADDC (r20, r7, r7)
|
||||
ADDC (r21, r7, r7)
|
||||
ADDC (r22, r7, r7)
|
||||
blink tr0, r63
|
||||
|
||||
|
||||
/*
|
||||
* int in_cksum(struct mbuf *m, int len)
|
||||
*/
|
||||
ENTRY(in_cksum)
|
||||
#ifndef _LP64
|
||||
add.l r2, r63, r5
|
||||
#else
|
||||
add r2, r63, r5
|
||||
#endif
|
||||
addz.l r3, r63, r6
|
||||
movi 0, r2
|
||||
pta/l Lcksum_entry, tr2
|
||||
|
||||
/*
|
||||
* r2 == current sum
|
||||
* r5 == m
|
||||
* r6 == len
|
||||
*/
|
||||
Lcksum_top:
|
||||
pta/u Lcksumdata, tr4
|
||||
pta/u Lcksum_loop, tr3
|
||||
movi 0, r7
|
||||
movi 0, r8
|
||||
movi 0x1f, r17
|
||||
add r18, r63, r23
|
||||
blink tr2, r63
|
||||
|
||||
Lcksum_loop:
|
||||
ld.l r5, M_LEN, r3
|
||||
LDPTR r5, M_DATA, r4
|
||||
LDPTR r5, M_NEXT, r5
|
||||
Lcksum_entry4:
|
||||
cmpgt r3, r6, r0
|
||||
cmvne r0, r6, r3
|
||||
sub r6, r3, r6
|
||||
xor r8, r4, r0
|
||||
add r8, r3, r8
|
||||
andi r0, 1, r0
|
||||
shlli r0, 3, r9
|
||||
blink tr4, r18
|
||||
REDUCE32(r7, r7)
|
||||
shlld r7, r9, r7
|
||||
add r7, r2, r2
|
||||
Lcksum_entry:
|
||||
pta/u 4f, tr0
|
||||
beq/u r6, r63, tr0
|
||||
bne/l r5, r63, tr3
|
||||
|
||||
|
||||
/*
|
||||
* XXX: Do we need to take account of odd final r8?
|
||||
*/
|
||||
4: REDUCE32(r2, r2)
|
||||
REDUCE16(r2, r2)
|
||||
ptabs/l r23, tr0
|
||||
xori r2, -1, r2
|
||||
shlli r2, 48, r2
|
||||
shlri r2, 48, r2
|
||||
blink tr0, r63
|
||||
|
||||
|
||||
#ifdef INET
|
||||
/*
|
||||
* int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len)
|
||||
*/
|
||||
ENTRY(in4_cksum)
|
||||
addz.l r5, r63, r6
|
||||
#ifndef _LP64
|
||||
add.l r2, r63, r5
|
||||
#else
|
||||
add r2, r63, r5
|
||||
#endif
|
||||
shlli r3, 56, r3
|
||||
shlri r3, 56, r2
|
||||
addz.l r4, r63, r8
|
||||
|
||||
/*
|
||||
* r2 == nxt (sum)
|
||||
* r5 == m
|
||||
* r6 == len
|
||||
* r8 == off
|
||||
*/
|
||||
|
||||
/*
|
||||
* First, deal with a pseudo header, if present
|
||||
*/
|
||||
pta/l Lno_pseudo, tr0
|
||||
beq/l r2, r63, tr0 /* Jump if no pseudo header */
|
||||
LDPTR r5, M_DATA, r4
|
||||
add r2, r6, r2 /* sum += len */
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
shlli r2, 8, r2 /* sum = htons(sum) */
|
||||
#endif
|
||||
ldlo.q r4, IP_SRC, r19 /* Note: Assumes ip_src/ip_dst are */
|
||||
ldhi.q r4, IP_SRC+7, r20 /* contiguous in memory */
|
||||
or r19, r20, r19
|
||||
ADDC (r19, r2, r2) /* sum += ip->ip_{src,dst} */
|
||||
#endif /* INET */
|
||||
|
||||
|
||||
#if defined(INET) || defined(INET6)
|
||||
Lno_pseudo:
|
||||
pta/u Lskip_loop, tr0
|
||||
pta/l Lskip_entry, tr1
|
||||
pta/u Lskip_done, tr2
|
||||
blink tr1, r63
|
||||
|
||||
Lskip_loop:
|
||||
ld.l r5, M_LEN, r3
|
||||
LDPTR r5, M_DATA, r4
|
||||
sub r8, r3, r8 /* off -= m->m_len */
|
||||
LDPTR r5, M_NEXT, r5
|
||||
Lskip_entry:
|
||||
bgt/l r63, r8, tr2 /* Break loop if off < 0 */
|
||||
bne/l r5, r63, tr0 /* Go back until out of mbufs */
|
||||
|
||||
/*
|
||||
* Well wha' d'ya know, wan out of widdle piggies...
|
||||
*/
|
||||
pta/l Lout_of_mbufs, tr0
|
||||
blink tr0, r63
|
||||
|
||||
Lskip_done:
|
||||
add r8, r4, r4
|
||||
xori r8, -1, r8
|
||||
add r3, r4, r4
|
||||
addi r8, 1, r3
|
||||
pta/l Lcksum_entry4, tr2
|
||||
pta/l Lcksum_top, tr0
|
||||
blink tr0, r63
|
||||
|
||||
Lout_of_mbufs:
|
||||
LEA(Lmbuf_msg, r2)
|
||||
pta/l _C_LABEL(printf), tr0
|
||||
blink tr0, r63
|
||||
|
||||
Lmbuf_msg:
|
||||
.asciz "cksum: out of mbufs\n"
|
||||
#endif /* INET || INET6 */
|
|
@ -1,243 +0,0 @@
|
|||
/* $NetBSD: in_cksum.c,v 1.1 2002/07/05 13:32:05 scw Exp $ */
|
||||
|
||||
/*
|
||||
* XXX: This MUST be replaced with an assembly version. SH-5's "Multi-
|
||||
* Media" instruction set has some features which will make these
|
||||
* checksum routines very fast indeed.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 1988, 1992, 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
* Copyright (c) 1996
|
||||
* Matt Thomas <matt@3am-software.com>
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the University of
|
||||
* California, Berkeley and its contributors.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* @(#)in_cksum.c 8.1 (Berkeley) 6/10/93
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h> /* RCS ID & Copyright macro defns */
|
||||
|
||||
__KERNEL_RCSID(0, "$NetBSD: in_cksum.c,v 1.1 2002/07/05 13:32:05 scw Exp $");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/mbuf.h>
|
||||
#include <sys/systm.h>
|
||||
#include <netinet/in.h>
|
||||
#include <netinet/in_systm.h>
|
||||
#include <netinet/ip.h>
|
||||
#include <netinet/ip_var.h>
|
||||
|
||||
/*
|
||||
* Checksum routine for Internet Protocol family headers
|
||||
* (Based on Portable Alpha version).
|
||||
*
|
||||
* This routine is very heavily used in the network
|
||||
* code and should be modified for each CPU to be as fast as possible.
|
||||
*/
|
||||
|
||||
#define ADDCARRY(x) (x > 65535 ? x -= 65535 : x)
|
||||
#define REDUCE32 \
|
||||
{ \
|
||||
q_util.q = sum; \
|
||||
sum = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \
|
||||
}
|
||||
#define REDUCE16 \
|
||||
{ \
|
||||
q_util.q = sum; \
|
||||
l_util.l = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \
|
||||
sum = l_util.s[0] + l_util.s[1]; \
|
||||
ADDCARRY(sum); \
|
||||
}
|
||||
|
||||
static const u_int32_t in_masks[] = {
|
||||
/*0 bytes*/ /*1 byte*/ /*2 bytes*/ /*3 bytes*/
|
||||
0x00000000, 0x000000FF, 0x0000FFFF, 0x00FFFFFF, /* offset 0 */
|
||||
0x00000000, 0x0000FF00, 0x00FFFF00, 0xFFFFFF00, /* offset 1 */
|
||||
0x00000000, 0x00FF0000, 0xFFFF0000, 0xFFFF0000, /* offset 2 */
|
||||
0x00000000, 0xFF000000, 0xFF000000, 0xFF000000, /* offset 3 */
|
||||
};
|
||||
|
||||
union l_util {
|
||||
u_int16_t s[2];
|
||||
u_int32_t l;
|
||||
};
|
||||
union q_util {
|
||||
u_int16_t s[4];
|
||||
u_int32_t l[2];
|
||||
u_int64_t q;
|
||||
};
|
||||
|
||||
static u_int64_t
|
||||
in_cksumdata(register caddr_t buf, register int len)
|
||||
{
|
||||
const u_int32_t *lw = (u_int32_t *) buf;
|
||||
u_int64_t sum = 0;
|
||||
u_int64_t prefilled;
|
||||
int offset;
|
||||
union q_util q_util;
|
||||
|
||||
if ((3 & (uintptr_t) lw) == 0 && len == 20) {
|
||||
sum = (u_int64_t) lw[0] + lw[1] + lw[2] + lw[3] + lw[4];
|
||||
REDUCE32;
|
||||
return sum;
|
||||
}
|
||||
|
||||
if ((offset = 3 & (uintptr_t) lw) != 0) {
|
||||
const u_int32_t *masks = in_masks + (offset << 2);
|
||||
lw = (u_int32_t *) (((uintptr_t) lw) - offset);
|
||||
sum = *lw++ & masks[len >= 3 ? 3 : len];
|
||||
len -= 4 - offset;
|
||||
if (len <= 0) {
|
||||
REDUCE32;
|
||||
return sum;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* access prefilling to start load of next cache line.
|
||||
* then add current cache line
|
||||
* save result of prefilling for loop iteration.
|
||||
*/
|
||||
prefilled = lw[0];
|
||||
while ((len -= 32) >= 4) {
|
||||
u_int64_t prefilling = lw[8];
|
||||
sum += prefilled + lw[1] + lw[2] + lw[3]
|
||||
+ lw[4] + lw[5] + lw[6] + lw[7];
|
||||
lw += 8;
|
||||
prefilled = prefilling;
|
||||
}
|
||||
if (len >= 0) {
|
||||
sum += prefilled + lw[1] + lw[2] + lw[3]
|
||||
+ lw[4] + lw[5] + lw[6] + lw[7];
|
||||
lw += 8;
|
||||
} else {
|
||||
len += 32;
|
||||
}
|
||||
while ((len -= 16) >= 0) {
|
||||
sum += (u_int64_t) lw[0] + lw[1] + lw[2] + lw[3];
|
||||
lw += 4;
|
||||
}
|
||||
len += 16;
|
||||
while ((len -= 4) >= 0) {
|
||||
sum += (u_int64_t) *lw++;
|
||||
}
|
||||
len += 4;
|
||||
if (len > 0)
|
||||
sum += (u_int64_t) (in_masks[len] & *lw);
|
||||
REDUCE32;
|
||||
return sum;
|
||||
}
|
||||
|
||||
int
|
||||
in_cksum(register struct mbuf *m, register int len)
|
||||
{
|
||||
register u_int64_t sum = 0;
|
||||
register int mlen = 0;
|
||||
register int clen = 0;
|
||||
register caddr_t addr;
|
||||
union q_util q_util;
|
||||
union l_util l_util;
|
||||
|
||||
for (; m && len; m = m->m_next) {
|
||||
if (m->m_len == 0)
|
||||
continue;
|
||||
mlen = m->m_len;
|
||||
if (len < mlen)
|
||||
mlen = len;
|
||||
addr = mtod(m, caddr_t);
|
||||
if ((clen ^ (uintptr_t) addr) & 1)
|
||||
sum += in_cksumdata(addr, mlen) << 8;
|
||||
else
|
||||
sum += in_cksumdata(addr, mlen);
|
||||
|
||||
clen += mlen;
|
||||
len -= mlen;
|
||||
}
|
||||
REDUCE16;
|
||||
return (~sum & 0xffff);
|
||||
}
|
||||
|
||||
#if 0
|
||||
int
|
||||
in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len)
|
||||
{
|
||||
register u_int64_t sum = 0;
|
||||
register int mlen = 0;
|
||||
register int clen = 0;
|
||||
register caddr_t addr;
|
||||
union q_util q_util;
|
||||
union l_util l_util;
|
||||
struct ipovly ipov;
|
||||
|
||||
if (nxt != 0) {
|
||||
/* pseudo header */
|
||||
if (off < sizeof(struct ipovly))
|
||||
panic("in4_cksum: offset too short");
|
||||
if (m->m_len < sizeof(struct ip))
|
||||
panic("in4_cksum: bad mbuf chain");
|
||||
|
||||
memset(&ipov, 0, sizeof(ipov));
|
||||
|
||||
ipov.ih_len = htons(len);
|
||||
ipov.ih_pr = nxt;
|
||||
ipov.ih_src = mtod(m, struct ip *)->ip_src;
|
||||
ipov.ih_dst = mtod(m, struct ip *)->ip_dst;
|
||||
|
||||
sum += in_cksumdata((caddr_t) &ipov, sizeof(ipov));
|
||||
}
|
||||
|
||||
/* skip over unnecessary part */
|
||||
while (m != NULL && off > 0) {
|
||||
if (m->m_len > off)
|
||||
break;
|
||||
off -= m->m_len;
|
||||
m = m->m_next;
|
||||
}
|
||||
|
||||
for (; m && len; m = m->m_next, off = 0) {
|
||||
if (m->m_len == 0)
|
||||
continue;
|
||||
mlen = m->m_len - off;
|
||||
if (len < mlen)
|
||||
mlen = len;
|
||||
addr = mtod(m, caddr_t) + off;
|
||||
if ((clen ^ (u_int64_t) addr) & 1)
|
||||
sum += in_cksumdata(addr, mlen) << 8;
|
||||
else
|
||||
sum += in_cksumdata(addr, mlen);
|
||||
|
||||
clen += mlen;
|
||||
len -= mlen;
|
||||
}
|
||||
REDUCE16;
|
||||
return (~sum & 0xffff);
|
||||
}
|
||||
#endif
|
Loading…
Reference in New Issue