Fast checksummer from Matthew Green.

This commit is contained in:
pk 1995-04-26 13:30:03 +00:00
parent 7dbfb9ead9
commit a1175c74a0
1 changed files with 118 additions and 40 deletions

View File

@ -1,17 +1,15 @@
/* $NetBSD: in_cksum.c,v 1.2 1994/11/20 20:54:18 deraadt Exp $ */
/* $NetBSD: in_cksum.c,v 1.3 1995/04/26 13:30:03 pk Exp $ */
/*
* Copyright (c) 1995 Matthew Green.
* Copyright (c) 1994 Charles Hannum.
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* This software was developed by the Computer Systems Engineering group
* at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
* contributed to Berkeley.
*
* All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Lawrence Berkeley Laboratory.
* California, and it's contributors.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@ -47,53 +45,133 @@
#include <sys/param.h>
#include <sys/mbuf.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
/*
* Checksum routine for Internet Protocol family headers.
*
* This routine is very heavily used in the network
* code and should be modified for each CPU to be as fast as possible.
* In particular, it should not be this one.
*
* SPARC version.
*/
/*
* This idea here is that we do as many 32 bit operations as possible
* for maximum efficiency. We also unroll all loops in to assembly.
* This gains about 20% extra efficiency over the non-pipelined method.
*
* XXX - this code really needs further performance analysis. At the
* moment it has only been run on a SPARC ELC.
*/
#define Asm __asm __volatile
#define ADD32 Asm(" ld [%2+28],%%i0; ld [%2+24],%%i1; \
ld [%2+20],%%i2; ld [%2+16],%%i3; \
ld [%2+12],%%i4; ld [%2+8],%%i5; \
ld [%2+4],%%g3; ld [%2],%%g4; \
addcc %0,%%i0,%0; addxcc %0,%%i1,%0; \
addxcc %0,%%i2,%0; addxcc %0,%%i3,%0; \
addxcc %0,%%i4,%0; addxcc %0,%%i5,%0; \
addxcc %0,%%g3,%0; addxcc %0,%%g4,%0; \
addxcc %0,0,%0" \
: "=r" (sum) : "0" (sum), "r" (w) \
: "%i0", "%i1", "%i2", "%i3", \
"%i4", "%i5", "%g3", "%g4")
#define ADD16 Asm(" ld [%2+12],%%i0; ld [%2+8],%%i1; \
ld [%2+4],%%i2; ld [%2],%%i3; \
addcc %0,%%i0,%0; addxcc %0,%%i1,%0; \
addxcc %0,%%i2,%0; addxcc %0,%%i3,%0; \
addxcc %0,0,%0" \
: "=r" (sum) : "0" (sum), "r" (w) \
: "%i0", "%i1", "%i2", "%i3")
#define ADD8 Asm(" ld [%2+4],%%i0; ld [%2],%%i1; \
addcc %0,%%i0,%0; addxcc %0,%%i1,%0; \
addxcc %0,0,%0" \
: "=r" (sum) : "0" (sum), "r" (w) \
: "%i0", "%i1")
#define ADD4 Asm(" ld [%2],%%i0; addcc %0,%%i0,%0; \
addxcc %0,0,%0" \
: "=r" (sum) : "0" (sum), "r" (w) \
: "%i0")
#define REDUCE {sum = (sum & 0xffff) + (sum >> 16);}
#define ADDCARRY {if (sum > 0xffff) sum -= 0xffff;}
#define ROL {sum = sum << 8;} /* depends on recent REDUCE */
#define ADDB {ROL; sum += *w; byte_swapped ^= 1;}
#define ADDS {sum += *(u_short *)w;}
#define SHIFT(n) {w += n; mlen -= n;}
int
in_cksum(m, len)
register struct mbuf *m;
register int len;
{
register int sum = 0, i, oddbyte = 0, v = 0;
register u_char *cp;
register u_char *w;
register u_int sum = 0;
register int mlen = 0;
int byte_swapped = 0;
/* we assume < 2^16 bytes being summed */
while (len) {
while ((i = m->m_len) == 0)
m = m->m_next;
if (i > len)
i = len;
len -= i;
cp = mtod(m, u_char *);
if (oddbyte) {
sum += v + *cp++;
i--;
}
if (((int)cp & 1) == 0) {
while ((i -= 2) >= 0) {
sum += *(u_short *)cp;
cp += 2;
for (; m && len; m = m->m_next) {
if (m->m_len == 0)
continue;
w = mtod(m, u_char *);
mlen = m->m_len;
if (len < mlen)
mlen = len;
len -= mlen;
/*
* Ensure that we're aligned on a word boundary here so
* that we can do 32 bit operations below.
*/
if ((3 & (long)w) != 0) {
REDUCE;
if ((1 & (long)w) != 0 && mlen >= 1) {
ADDB;
SHIFT(1);
}
} else {
while ((i -= 2) >= 0) {
sum += *cp++ << 8;
sum += *cp++;
if ((2 & (long)w) != 0 && mlen >= 2) {
ADDS;
SHIFT(2);
}
}
if ((oddbyte = i & 1) != 0)
v = *cp << 8;
m = m->m_next;
/*
* Do as many 32 bit operattions as possible using the
* 32/16/8/4 macro's above, using as many as possible of
* these.
*/
while (mlen >= 32) {
ADD32;
SHIFT(32);
}
if (mlen >= 16) {
ADD16;
SHIFT(16);
}
if (mlen >= 8) {
ADD8;
SHIFT(8);
}
if (mlen >= 4) {
ADD4;
SHIFT(4)
}
if (mlen == 0)
continue;
REDUCE;
if (mlen >= 2) {
ADDS;
SHIFT(2);
}
if (mlen == 1) {
ADDB;
}
}
if (oddbyte)
sum += v;
sum = (sum >> 16) + (sum & 0xffff); /* add in accumulated carries */
sum += sum >> 16; /* add potential last carry */
return (0xffff & ~sum);
if (byte_swapped) {
REDUCE;
ROL;
}
REDUCE;
ADDCARRY;
return (0xffff ^ sum);
}