Fast checksummer from Matthew Green.
This commit is contained in:
parent
7dbfb9ead9
commit
a1175c74a0
|
@ -1,17 +1,15 @@
|
|||
/* $NetBSD: in_cksum.c,v 1.2 1994/11/20 20:54:18 deraadt Exp $ */
|
||||
/* $NetBSD: in_cksum.c,v 1.3 1995/04/26 13:30:03 pk Exp $ */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1995 Matthew Green.
|
||||
* Copyright (c) 1994 Charles Hannum.
|
||||
* Copyright (c) 1992, 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This software was developed by the Computer Systems Engineering group
|
||||
* at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
|
||||
* contributed to Berkeley.
|
||||
*
|
||||
* All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the University of
|
||||
* California, Lawrence Berkeley Laboratory.
|
||||
* California, and it's contributors.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
@ -47,53 +45,133 @@
|
|||
#include <sys/param.h>
|
||||
#include <sys/mbuf.h>
|
||||
|
||||
#include <netinet/in.h>
|
||||
#include <netinet/in_systm.h>
|
||||
|
||||
/*
|
||||
* Checksum routine for Internet Protocol family headers.
|
||||
*
|
||||
* This routine is very heavily used in the network
|
||||
* code and should be modified for each CPU to be as fast as possible.
|
||||
* In particular, it should not be this one.
|
||||
*
|
||||
* SPARC version.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This idea here is that we do as many 32 bit operations as possible
|
||||
* for maximum efficiency. We also unroll all loops in to assembly.
|
||||
* This gains about 20% extra efficiency over the non-pipelined method.
|
||||
*
|
||||
* XXX - this code really needs further performance analysis. At the
|
||||
* moment it has only been run on a SPARC ELC.
|
||||
*/
|
||||
|
||||
#define Asm __asm __volatile
|
||||
#define ADD32 Asm(" ld [%2+28],%%i0; ld [%2+24],%%i1; \
|
||||
ld [%2+20],%%i2; ld [%2+16],%%i3; \
|
||||
ld [%2+12],%%i4; ld [%2+8],%%i5; \
|
||||
ld [%2+4],%%g3; ld [%2],%%g4; \
|
||||
addcc %0,%%i0,%0; addxcc %0,%%i1,%0; \
|
||||
addxcc %0,%%i2,%0; addxcc %0,%%i3,%0; \
|
||||
addxcc %0,%%i4,%0; addxcc %0,%%i5,%0; \
|
||||
addxcc %0,%%g3,%0; addxcc %0,%%g4,%0; \
|
||||
addxcc %0,0,%0" \
|
||||
: "=r" (sum) : "0" (sum), "r" (w) \
|
||||
: "%i0", "%i1", "%i2", "%i3", \
|
||||
"%i4", "%i5", "%g3", "%g4")
|
||||
#define ADD16 Asm(" ld [%2+12],%%i0; ld [%2+8],%%i1; \
|
||||
ld [%2+4],%%i2; ld [%2],%%i3; \
|
||||
addcc %0,%%i0,%0; addxcc %0,%%i1,%0; \
|
||||
addxcc %0,%%i2,%0; addxcc %0,%%i3,%0; \
|
||||
addxcc %0,0,%0" \
|
||||
: "=r" (sum) : "0" (sum), "r" (w) \
|
||||
: "%i0", "%i1", "%i2", "%i3")
|
||||
#define ADD8 Asm(" ld [%2+4],%%i0; ld [%2],%%i1; \
|
||||
addcc %0,%%i0,%0; addxcc %0,%%i1,%0; \
|
||||
addxcc %0,0,%0" \
|
||||
: "=r" (sum) : "0" (sum), "r" (w) \
|
||||
: "%i0", "%i1")
|
||||
#define ADD4 Asm(" ld [%2],%%i0; addcc %0,%%i0,%0; \
|
||||
addxcc %0,0,%0" \
|
||||
: "=r" (sum) : "0" (sum), "r" (w) \
|
||||
: "%i0")
|
||||
|
||||
#define REDUCE {sum = (sum & 0xffff) + (sum >> 16);}
|
||||
#define ADDCARRY {if (sum > 0xffff) sum -= 0xffff;}
|
||||
#define ROL {sum = sum << 8;} /* depends on recent REDUCE */
|
||||
#define ADDB {ROL; sum += *w; byte_swapped ^= 1;}
|
||||
#define ADDS {sum += *(u_short *)w;}
|
||||
#define SHIFT(n) {w += n; mlen -= n;}
|
||||
|
||||
int
|
||||
in_cksum(m, len)
|
||||
register struct mbuf *m;
|
||||
register int len;
|
||||
{
|
||||
register int sum = 0, i, oddbyte = 0, v = 0;
|
||||
register u_char *cp;
|
||||
register u_char *w;
|
||||
register u_int sum = 0;
|
||||
register int mlen = 0;
|
||||
int byte_swapped = 0;
|
||||
|
||||
/* we assume < 2^16 bytes being summed */
|
||||
while (len) {
|
||||
while ((i = m->m_len) == 0)
|
||||
m = m->m_next;
|
||||
if (i > len)
|
||||
i = len;
|
||||
len -= i;
|
||||
cp = mtod(m, u_char *);
|
||||
if (oddbyte) {
|
||||
sum += v + *cp++;
|
||||
i--;
|
||||
}
|
||||
if (((int)cp & 1) == 0) {
|
||||
while ((i -= 2) >= 0) {
|
||||
sum += *(u_short *)cp;
|
||||
cp += 2;
|
||||
for (; m && len; m = m->m_next) {
|
||||
if (m->m_len == 0)
|
||||
continue;
|
||||
w = mtod(m, u_char *);
|
||||
mlen = m->m_len;
|
||||
if (len < mlen)
|
||||
mlen = len;
|
||||
len -= mlen;
|
||||
|
||||
/*
|
||||
* Ensure that we're aligned on a word boundary here so
|
||||
* that we can do 32 bit operations below.
|
||||
*/
|
||||
if ((3 & (long)w) != 0) {
|
||||
REDUCE;
|
||||
if ((1 & (long)w) != 0 && mlen >= 1) {
|
||||
ADDB;
|
||||
SHIFT(1);
|
||||
}
|
||||
} else {
|
||||
while ((i -= 2) >= 0) {
|
||||
sum += *cp++ << 8;
|
||||
sum += *cp++;
|
||||
if ((2 & (long)w) != 0 && mlen >= 2) {
|
||||
ADDS;
|
||||
SHIFT(2);
|
||||
}
|
||||
}
|
||||
if ((oddbyte = i & 1) != 0)
|
||||
v = *cp << 8;
|
||||
m = m->m_next;
|
||||
/*
|
||||
* Do as many 32 bit operattions as possible using the
|
||||
* 32/16/8/4 macro's above, using as many as possible of
|
||||
* these.
|
||||
*/
|
||||
while (mlen >= 32) {
|
||||
ADD32;
|
||||
SHIFT(32);
|
||||
}
|
||||
if (mlen >= 16) {
|
||||
ADD16;
|
||||
SHIFT(16);
|
||||
}
|
||||
if (mlen >= 8) {
|
||||
ADD8;
|
||||
SHIFT(8);
|
||||
}
|
||||
if (mlen >= 4) {
|
||||
ADD4;
|
||||
SHIFT(4)
|
||||
}
|
||||
if (mlen == 0)
|
||||
continue;
|
||||
|
||||
REDUCE;
|
||||
if (mlen >= 2) {
|
||||
ADDS;
|
||||
SHIFT(2);
|
||||
}
|
||||
if (mlen == 1) {
|
||||
ADDB;
|
||||
}
|
||||
}
|
||||
if (oddbyte)
|
||||
sum += v;
|
||||
sum = (sum >> 16) + (sum & 0xffff); /* add in accumulated carries */
|
||||
sum += sum >> 16; /* add potential last carry */
|
||||
return (0xffff & ~sum);
|
||||
if (byte_swapped) {
|
||||
REDUCE;
|
||||
ROL;
|
||||
}
|
||||
REDUCE;
|
||||
ADDCARRY;
|
||||
return (0xffff ^ sum);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue