NetBSD/sys/netinet/ip_frag.c

464 lines
10 KiB
C
Raw Normal View History

1998-11-22 18:17:18 +03:00
/* $NetBSD: ip_frag.c,v 1.13 1998/11/22 15:17:19 mrg Exp $ */
/*
1998-11-22 18:17:18 +03:00
* Copyright (C) 1993-1998 by Darren Reed.
*
* Redistribution and use in source and binary forms are permitted
* provided that this notice is preserved and due credit is given
* to the original author and the contributors.
*/
#if !defined(lint)
static const char sccsid[] = "@(#)ip_frag.c 1.11 3/24/96 (C) 1993-1995 Darren Reed";
1998-11-22 18:17:18 +03:00
static const char rcsid[] = "@(#)Id: ip_frag.c,v 2.0.2.19.2.6 1998/11/22 01:50:25 darrenr Exp ";
#endif
#include <sys/errno.h>
#include <sys/types.h>
#include <sys/param.h>
1997-05-25 16:40:11 +04:00
#include <sys/time.h>
#include <sys/file.h>
1998-11-22 18:17:18 +03:00
#if !defined(_KERNEL) && !defined(KERNEL)
# include <stdio.h>
# include <string.h>
# include <stdlib.h>
#endif
1997-05-25 16:40:11 +04:00
#if defined(KERNEL) && (__FreeBSD_version >= 220000)
#include <sys/filio.h>
#include <sys/fcntl.h>
#else
#include <sys/ioctl.h>
1997-05-25 16:40:11 +04:00
#endif
#include <sys/uio.h>
1997-11-14 15:40:06 +03:00
#ifndef linux
#include <sys/protosw.h>
1997-11-14 15:40:06 +03:00
#endif
#include <sys/socket.h>
1997-11-14 15:40:06 +03:00
#if defined(_KERNEL) && !defined(linux)
# include <sys/systm.h>
#endif
#if !defined(__SVR4) && !defined(__svr4__)
1997-11-14 15:40:06 +03:00
# ifndef linux
# include <sys/mbuf.h>
# endif
#else
# include <sys/byteorder.h>
# include <sys/dditypes.h>
# include <sys/stream.h>
# include <sys/kmem.h>
#endif
#include <net/if.h>
#ifdef sun
#include <net/af.h>
#endif
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
1997-11-14 15:40:06 +03:00
#ifndef linux
#include <netinet/ip_var.h>
1997-11-14 15:40:06 +03:00
#endif
#include <netinet/tcp.h>
#include <netinet/udp.h>
#include <netinet/ip_icmp.h>
1997-05-25 16:40:11 +04:00
#include "netinet/ip_compat.h"
1997-11-14 15:40:06 +03:00
#include <netinet/tcpip.h>
1997-05-25 16:40:11 +04:00
#include "netinet/ip_fil.h"
#include "netinet/ip_proxy.h"
#include "netinet/ip_nat.h"
#include "netinet/ip_frag.h"
#include "netinet/ip_state.h"
1997-07-05 09:38:14 +04:00
#include "netinet/ip_auth.h"
ipfr_t *ipfr_heads[IPFT_SIZE];
1997-05-25 16:40:11 +04:00
ipfr_t *ipfr_nattab[IPFT_SIZE];
ipfrstat_t ipfr_stats;
1997-07-05 09:38:14 +04:00
int ipfr_inuse = 0,
fr_ipfrttl = 120; /* 60 seconds */
#ifdef _KERNEL
extern int ipfr_timer_id;
#endif
#if (SOLARIS || defined(__sgi)) && defined(_KERNEL)
1998-11-22 18:17:18 +03:00
extern KRWLOCK_T ipf_frag, ipf_natfrag, ipf_nat;
1998-07-12 19:23:59 +04:00
extern kmutex_t ipf_rw;
#endif
1997-05-25 16:40:11 +04:00
static ipfr_t *ipfr_new __P((ip_t *, fr_info_t *, int, ipfr_t **));
static ipfr_t *ipfr_lookup __P((ip_t *, fr_info_t *, ipfr_t **));
ipfrstat_t *ipfr_fragstats()
{
ipfr_stats.ifs_table = ipfr_heads;
1997-05-25 16:40:11 +04:00
ipfr_stats.ifs_nattab = ipfr_nattab;
ipfr_stats.ifs_inuse = ipfr_inuse;
return &ipfr_stats;
}
/*
* add a new entry to the fragment cache, registering it as having come
* through this box, with the result of the filter operation.
*/
1997-05-25 16:40:11 +04:00
static ipfr_t *ipfr_new(ip, fin, pass, table)
ip_t *ip;
fr_info_t *fin;
int pass;
1997-05-25 16:40:11 +04:00
ipfr_t *table[];
{
ipfr_t **fp, *fr, frag;
u_int idx;
frag.ipfr_p = ip->ip_p;
idx = ip->ip_p;
frag.ipfr_id = ip->ip_id;
idx += ip->ip_id;
frag.ipfr_tos = ip->ip_tos;
frag.ipfr_src.s_addr = ip->ip_src.s_addr;
idx += ip->ip_src.s_addr;
frag.ipfr_dst.s_addr = ip->ip_dst.s_addr;
idx += ip->ip_dst.s_addr;
idx *= 127;
idx %= IPFT_SIZE;
/*
* first, make sure it isn't already there...
*/
1997-05-25 16:40:11 +04:00
for (fp = &table[idx]; (fr = *fp); fp = &fr->ipfr_next)
if (!bcmp((char *)&frag.ipfr_src, (char *)&fr->ipfr_src,
IPFR_CMPSZ)) {
1998-07-12 19:23:59 +04:00
ATOMIC_INC(ipfr_stats.ifs_exists);
1997-05-25 16:40:11 +04:00
return NULL;
}
1997-05-25 16:40:11 +04:00
/*
* allocate some memory, if possible, if not, just record that we
* failed to do so.
*/
KMALLOC(fr, ipfr_t *, sizeof(*fr));
if (fr == NULL) {
1998-07-12 19:23:59 +04:00
ATOMIC_INC(ipfr_stats.ifs_nomem);
1997-05-25 16:40:11 +04:00
return NULL;
}
1997-05-25 16:40:11 +04:00
/*
* Instert the fragment into the fragment table, copy the struct used
* in the search using bcopy rather than reassign each field.
* Set the ttl to the default and mask out logging from "pass"
*/
if ((fr->ipfr_next = table[idx]))
table[idx]->ipfr_prev = fr;
fr->ipfr_prev = NULL;
1997-05-25 16:40:11 +04:00
fr->ipfr_data = NULL;
table[idx] = fr;
bcopy((char *)&frag.ipfr_src, (char *)&fr->ipfr_src, IPFR_CMPSZ);
fr->ipfr_ttl = fr_ipfrttl;
fr->ipfr_pass = pass & ~(FR_LOGFIRST|FR_LOG);
1997-05-25 16:40:11 +04:00
/*
* Compute the offset of the expected start of the next packet.
*/
1998-11-22 18:17:18 +03:00
fr->ipfr_off = (ip->ip_off & IP_OFFMASK) + (fin->fin_dlen >> 3);
1998-07-12 19:23:59 +04:00
ATOMIC_INC(ipfr_stats.ifs_new);
ATOMIC_INC(ipfr_inuse);
1997-05-25 16:40:11 +04:00
return fr;
}
int ipfr_newfrag(ip, fin, pass)
ip_t *ip;
fr_info_t *fin;
int pass;
{
ipfr_t *ipf;
1998-07-12 19:23:59 +04:00
WRITE_ENTER(&ipf_frag);
1997-05-25 16:40:11 +04:00
ipf = ipfr_new(ip, fin, pass, ipfr_heads);
1998-07-12 19:23:59 +04:00
RWLOCK_EXIT(&ipf_frag);
1997-05-25 16:40:11 +04:00
return ipf ? 0 : -1;
}
int ipfr_nat_newfrag(ip, fin, pass, nat)
ip_t *ip;
fr_info_t *fin;
int pass;
nat_t *nat;
{
ipfr_t *ipf;
1998-07-12 19:23:59 +04:00
WRITE_ENTER(&ipf_natfrag);
1998-11-22 18:17:18 +03:00
ipf = ipfr_new(ip, fin, pass, ipfr_nattab);
if (ipf != NULL) {
1997-05-25 16:40:11 +04:00
ipf->ipfr_data = nat;
nat->nat_data = ipf;
1997-05-25 16:40:11 +04:00
}
1998-07-12 19:23:59 +04:00
RWLOCK_EXIT(&ipf_natfrag);
1997-05-25 16:40:11 +04:00
return ipf ? 0 : -1;
}
/*
* check the fragment cache to see if there is already a record of this packet
* with its filter result known.
*/
1997-05-25 16:40:11 +04:00
static ipfr_t *ipfr_lookup(ip, fin, table)
ip_t *ip;
fr_info_t *fin;
1997-05-25 16:40:11 +04:00
ipfr_t *table[];
{
ipfr_t *f, frag;
u_int idx;
/*
* For fragments, we record protocol, packet id, TOS and both IP#'s
* (these should all be the same for all fragments of a packet).
1997-05-25 16:40:11 +04:00
*
* build up a hash value to index the table with.
*/
frag.ipfr_p = ip->ip_p;
idx = ip->ip_p;
frag.ipfr_id = ip->ip_id;
idx += ip->ip_id;
frag.ipfr_tos = ip->ip_tos;
frag.ipfr_src.s_addr = ip->ip_src.s_addr;
idx += ip->ip_src.s_addr;
frag.ipfr_dst.s_addr = ip->ip_dst.s_addr;
idx += ip->ip_dst.s_addr;
idx *= 127;
idx %= IPFT_SIZE;
1997-05-25 16:40:11 +04:00
/*
* check the table, careful to only compare the right amount of data
*/
for (f = table[idx]; f; f = f->ipfr_next)
if (!bcmp((char *)&frag.ipfr_src, (char *)&f->ipfr_src,
IPFR_CMPSZ)) {
u_short atoff, off;
1997-05-25 16:40:11 +04:00
if (f != table[idx]) {
/*
* move fragment info. to the top of the list
* to speed up searches.
*/
if ((f->ipfr_prev->ipfr_next = f->ipfr_next))
f->ipfr_next->ipfr_prev = f->ipfr_prev;
1997-05-25 16:40:11 +04:00
f->ipfr_next = table[idx];
table[idx]->ipfr_prev = f;
f->ipfr_prev = NULL;
1997-05-25 16:40:11 +04:00
table[idx] = f;
}
off = ip->ip_off;
atoff = off + (fin->fin_dlen >> 3);
/*
* If we've follwed the fragments, and this is the
* last (in order), shrink expiration time.
*/
1998-11-22 18:17:18 +03:00
if ((off & IP_OFFMASK) == f->ipfr_off) {
if (!(off & IP_MF))
f->ipfr_ttl = 1;
else
f->ipfr_off = atoff;
}
1998-07-12 19:23:59 +04:00
ATOMIC_INC(ipfr_stats.ifs_hits);
1997-05-25 16:40:11 +04:00
return f;
}
1997-05-25 16:40:11 +04:00
return NULL;
}
/*
1997-07-05 09:38:14 +04:00
* functional interface for NAT lookups of the NAT fragment cache
1997-05-25 16:40:11 +04:00
*/
nat_t *ipfr_nat_knownfrag(ip, fin)
ip_t *ip;
fr_info_t *fin;
{
nat_t *nat;
ipfr_t *ipf;
1998-07-12 19:23:59 +04:00
READ_ENTER(&ipf_natfrag);
1997-07-05 09:38:14 +04:00
ipf = ipfr_lookup(ip, fin, ipfr_nattab);
1998-11-22 18:17:18 +03:00
if (ipf != NULL) {
nat = ipf->ipfr_data;
/*
* This is the last fragment for this packet.
*/
1998-11-22 18:17:18 +03:00
if ((ipf->ipfr_ttl == 1) && (nat != NULL)) {
nat->nat_data = NULL;
ipf->ipfr_data = NULL;
}
} else
nat = NULL;
1998-07-12 19:23:59 +04:00
RWLOCK_EXIT(&ipf_natfrag);
1997-05-25 16:40:11 +04:00
return nat;
}
/*
1997-07-05 09:38:14 +04:00
* functional interface for normal lookups of the fragment cache
1997-05-25 16:40:11 +04:00
*/
int ipfr_knownfrag(ip, fin)
ip_t *ip;
fr_info_t *fin;
{
int ret;
ipfr_t *ipf;
1998-07-12 19:23:59 +04:00
READ_ENTER(&ipf_frag);
1997-05-25 16:40:11 +04:00
ipf = ipfr_lookup(ip, fin, ipfr_heads);
ret = ipf ? ipf->ipfr_pass : 0;
1998-07-12 19:23:59 +04:00
RWLOCK_EXIT(&ipf_frag);
1997-05-25 16:40:11 +04:00
return ret;
}
/*
* forget any references to this external object.
*/
void ipfr_forget(nat)
void *nat;
{
ipfr_t *fr;
int idx;
1998-07-12 19:23:59 +04:00
WRITE_ENTER(&ipf_natfrag);
for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
for (fr = ipfr_heads[idx]; fr; fr = fr->ipfr_next)
if (fr->ipfr_data == nat)
fr->ipfr_data = NULL;
1998-07-12 19:23:59 +04:00
RWLOCK_EXIT(&ipf_natfrag);
}
/*
* Free memory in use by fragment state info. kept.
*/
void ipfr_unload()
{
ipfr_t **fp, *fr;
1997-05-25 16:40:11 +04:00
nat_t *nat;
int idx;
1998-07-12 19:23:59 +04:00
WRITE_ENTER(&ipf_frag);
for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
for (fp = &ipfr_heads[idx]; (fr = *fp); ) {
*fp = fr->ipfr_next;
KFREE(fr);
}
1998-07-12 19:23:59 +04:00
RWLOCK_EXIT(&ipf_frag);
1997-05-25 16:40:11 +04:00
1998-07-12 19:23:59 +04:00
WRITE_ENTER(&ipf_nat);
WRITE_ENTER(&ipf_natfrag);
1997-05-25 16:40:11 +04:00
for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
for (fp = &ipfr_nattab[idx]; (fr = *fp); ) {
*fp = fr->ipfr_next;
1998-11-22 18:17:18 +03:00
nat = (nat_t *)fr->ipfr_data;
if (nat != NULL) {
if (nat->nat_data == fr)
nat->nat_data = NULL;
1997-05-25 16:40:11 +04:00
}
KFREE(fr);
}
1998-07-12 19:23:59 +04:00
RWLOCK_EXIT(&ipf_natfrag);
RWLOCK_EXIT(&ipf_nat);
}
#ifdef _KERNEL
/*
* Slowly expire held state for fragments. Timeouts are set * in expectation
* of this being called twice per second.
*/
# if (BSD >= 199306) || SOLARIS || defined(__sgi)
void ipfr_slowtimer()
# else
int ipfr_slowtimer()
# endif
{
ipfr_t **fp, *fr;
1997-05-25 16:40:11 +04:00
nat_t *nat;
int s, idx;
#ifdef __sgi
ipfilter_sgi_intfsync();
#endif
SPL_NET(s);
1998-07-12 19:23:59 +04:00
WRITE_ENTER(&ipf_frag);
1997-05-25 16:40:11 +04:00
/*
* Go through the entire table, looking for entries to expire,
* decreasing the ttl by one for each entry. If it reaches 0,
* remove it from the chain and free it.
*/
for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
for (fp = &ipfr_heads[idx]; (fr = *fp); ) {
--fr->ipfr_ttl;
if (fr->ipfr_ttl == 0) {
if (fr->ipfr_prev)
fr->ipfr_prev->ipfr_next =
fr->ipfr_next;
if (fr->ipfr_next)
fr->ipfr_next->ipfr_prev =
fr->ipfr_prev;
*fp = fr->ipfr_next;
1998-07-12 19:23:59 +04:00
ATOMIC_INC(ipfr_stats.ifs_expire);
ATOMIC_DEC(ipfr_inuse);
KFREE(fr);
} else
fp = &fr->ipfr_next;
}
1998-07-12 19:23:59 +04:00
RWLOCK_EXIT(&ipf_frag);
1997-05-25 16:40:11 +04:00
/*
* Same again for the NAT table, except that if the structure also
* still points to a NAT structure, and the NAT structure points back
* at the one to be free'd, NULL the reference from the NAT struct.
* NOTE: We need to grab both mutex's early, and in this order so as
* to prevent a deadlock if both try to expire at the same time.
*/
1998-07-12 19:23:59 +04:00
WRITE_ENTER(&ipf_nat);
WRITE_ENTER(&ipf_natfrag);
1997-05-25 16:40:11 +04:00
for (idx = IPFT_SIZE - 1; idx >= 0; idx--)
for (fp = &ipfr_nattab[idx]; (fr = *fp); ) {
--fr->ipfr_ttl;
if (fr->ipfr_ttl == 0) {
if (fr->ipfr_prev)
fr->ipfr_prev->ipfr_next =
fr->ipfr_next;
if (fr->ipfr_next)
fr->ipfr_next->ipfr_prev =
fr->ipfr_prev;
*fp = fr->ipfr_next;
1998-07-12 19:23:59 +04:00
ATOMIC_INC(ipfr_stats.ifs_expire);
ATOMIC_DEC(ipfr_inuse);
1998-11-22 18:17:18 +03:00
nat = (nat_t *)fr->ipfr_data;
if (nat != NULL) {
if (nat->nat_data == fr)
nat->nat_data = NULL;
1997-05-25 16:40:11 +04:00
}
KFREE(fr);
} else
fp = &fr->ipfr_next;
}
1998-07-12 19:23:59 +04:00
RWLOCK_EXIT(&ipf_natfrag);
RWLOCK_EXIT(&ipf_nat);
SPL_X(s);
fr_timeoutstate();
ip_natexpire();
1997-07-05 09:38:14 +04:00
fr_authexpire();
# if SOLARIS
1997-05-25 16:40:11 +04:00
ipfr_timer_id = timeout(ipfr_slowtimer, NULL, drv_usectohz(500000));
# else
1997-11-14 15:40:06 +03:00
# ifndef linux
ip_slowtimo();
1997-11-14 15:40:06 +03:00
# endif
# if (BSD < 199306) && !defined(__sgi)
return 0;
# endif
# endif
}
#endif /* defined(_KERNEL) */