/* $NetBSD: ip_frag.c,v 1.25 2001/11/15 09:48:27 lukem Exp $ */ /* * Copyright (C) 1993-2000 by Darren Reed. * * Redistribution and use in source and binary forms are permitted * provided that this notice is preserved and due credit is given * to the original author and the contributors. */ #if !defined(lint) #if defined(__NetBSD__) #include __KERNEL_RCSID(0, "$NetBSD: ip_frag.c,v 1.25 2001/11/15 09:48:27 lukem Exp $"); #else static const char sccsid[] = "@(#)ip_frag.c 1.11 3/24/96 (C) 1993-2000 Darren Reed"; static const char rcsid[] = "@(#)Id: ip_frag.c,v 2.10.2.7 2000/11/27 10:26:56 darrenr Exp"; #endif #endif #if defined(__FreeBSD__) && defined(KERNEL) && !defined(_KERNEL) # define _KERNEL #endif #include #include #include #include #if !defined(_KERNEL) && !defined(KERNEL) # include # include # include #endif #if (defined(KERNEL) || defined(_KERNEL)) && (__FreeBSD_version >= 220000) # include # include #else # include #endif #include #ifndef linux # include #endif #include #if defined(_KERNEL) && !defined(linux) # include #endif #if !defined(__SVR4) && !defined(__svr4__) # if defined(_KERNEL) && !defined(__sgi) # include # endif # ifndef linux # include # endif #else # include # ifdef _KERNEL # include # endif # include # include #endif #include #ifdef sun # include #endif #include #include #include #include #ifndef linux # include #endif #include #include #include #include "netinet/ip_compat.h" #include #include "netinet/ip_fil.h" #include "netinet/ip_proxy.h" #include "netinet/ip_nat.h" #include "netinet/ip_frag.h" #include "netinet/ip_state.h" #include "netinet/ip_auth.h" #if (__FreeBSD_version >= 300000) # include # if (defined(KERNEL) || defined(_KERNEL)) # ifndef IPFILTER_LKM # include # include # endif extern struct callout_handle ipfr_slowtimer_ch; # endif #endif #if defined(__NetBSD__) && (__NetBSD_Version__ >= 104230000) # include extern struct callout ipfr_slowtimer_ch; #endif static ipfr_t *ipfr_heads[IPFT_SIZE]; static ipfr_t *ipfr_nattab[IPFT_SIZE]; static ipfrstat_t ipfr_stats; static int ipfr_inuse = 0; int fr_ipfrttl = 120; /* 60 seconds */ int fr_frag_lock = 0; #ifdef _KERNEL # if SOLARIS2 >= 7 extern timeout_id_t ipfr_timer_id; # else extern int ipfr_timer_id; # endif #endif #if (SOLARIS || defined(__sgi)) && defined(_KERNEL) extern KRWLOCK_T ipf_frag, ipf_natfrag, ipf_nat, ipf_mutex; # if SOLARIS extern KRWLOCK_T ipf_solaris; # else KRWLOCK_T ipf_solaris; # endif extern kmutex_t ipf_rw; #endif static ipfr_t *ipfr_new __P((ip_t *, fr_info_t *, u_int, ipfr_t **)); static ipfr_t *ipfr_lookup __P((ip_t *, fr_info_t *, ipfr_t **)); static void ipfr_delete __P((ipfr_t *)); ipfrstat_t *ipfr_fragstats() { ipfr_stats.ifs_table = ipfr_heads; ipfr_stats.ifs_nattab = ipfr_nattab; ipfr_stats.ifs_inuse = ipfr_inuse; return &ipfr_stats; } /* * add a new entry to the fragment cache, registering it as having come * through this box, with the result of the filter operation. */ static ipfr_t *ipfr_new(ip, fin, pass, table) ip_t *ip; fr_info_t *fin; u_int pass; ipfr_t *table[]; { ipfr_t **fp, *fra, frag; u_int idx, off; if (ipfr_inuse >= IPFT_SIZE) return NULL; if (!(fin->fin_fi.fi_fl & FI_FRAG)) return NULL; frag.ipfr_p = ip->ip_p; idx = ip->ip_p; frag.ipfr_id = ip->ip_id; idx += ip->ip_id; frag.ipfr_tos = ip->ip_tos; frag.ipfr_src.s_addr = ip->ip_src.s_addr; idx += ip->ip_src.s_addr; frag.ipfr_dst.s_addr = ip->ip_dst.s_addr; idx += ip->ip_dst.s_addr; frag.ipfr_ifp = fin->fin_ifp; idx *= 127; idx %= IPFT_SIZE; /* * first, make sure it isn't already there... */ for (fp = &table[idx]; (fra = *fp); fp = &fra->ipfr_next) if (!bcmp((char *)&frag.ipfr_src, (char *)&fra->ipfr_src, IPFR_CMPSZ)) { ATOMIC_INCL(ipfr_stats.ifs_exists); return NULL; } /* * allocate some memory, if possible, if not, just record that we * failed to do so. */ KMALLOC(fra, ipfr_t *); if (fra == NULL) { ATOMIC_INCL(ipfr_stats.ifs_nomem); return NULL; } if ((fra->ipfr_rule = fin->fin_fr) != NULL) { ATOMIC_INC32(fin->fin_fr->fr_ref); } /* * Instert the fragment into the fragment table, copy the struct used * in the search using bcopy rather than reassign each field. * Set the ttl to the default and mask out logging from "pass" */ if ((fra->ipfr_next = table[idx])) table[idx]->ipfr_prev = fra; fra->ipfr_prev = NULL; fra->ipfr_data = NULL; table[idx] = fra; bcopy((char *)&frag.ipfr_src, (char *)&fra->ipfr_src, IPFR_CMPSZ); fra->ipfr_ttl = fr_ipfrttl; /* * Compute the offset of the expected start of the next packet. */ off = ip->ip_off & IP_OFFMASK; if (!off) fra->ipfr_seen0 = 1; fra->ipfr_off = off + (fin->fin_dlen >> 3); ATOMIC_INCL(ipfr_stats.ifs_new); ATOMIC_INC32(ipfr_inuse); return fra; } int ipfr_newfrag(ip, fin, pass) ip_t *ip; fr_info_t *fin; u_int pass; { ipfr_t *ipf; if ((ip->ip_v != 4) || (fr_frag_lock)) return -1; WRITE_ENTER(&ipf_frag); ipf = ipfr_new(ip, fin, pass, ipfr_heads); RWLOCK_EXIT(&ipf_frag); return ipf ? 0 : -1; } int ipfr_nat_newfrag(ip, fin, pass, nat) ip_t *ip; fr_info_t *fin; u_int pass; nat_t *nat; { ipfr_t *ipf; if ((ip->ip_v != 4) || (fr_frag_lock)) return -1; WRITE_ENTER(&ipf_natfrag); ipf = ipfr_new(ip, fin, pass, ipfr_nattab); if (ipf != NULL) { ipf->ipfr_data = nat; nat->nat_data = ipf; } RWLOCK_EXIT(&ipf_natfrag); return ipf ? 0 : -1; } /* * check the fragment cache to see if there is already a record of this packet * with its filter result known. */ static ipfr_t *ipfr_lookup(ip, fin, table) ip_t *ip; fr_info_t *fin; ipfr_t *table[]; { ipfr_t *f, frag; u_int idx; if (!(fin->fin_fi.fi_fl & FI_FRAG)) return NULL; /* * For fragments, we record protocol, packet id, TOS and both IP#'s * (these should all be the same for all fragments of a packet). * * build up a hash value to index the table with. */ frag.ipfr_p = ip->ip_p; idx = ip->ip_p; frag.ipfr_id = ip->ip_id; idx += ip->ip_id; frag.ipfr_tos = ip->ip_tos; frag.ipfr_src.s_addr = ip->ip_src.s_addr; idx += ip->ip_src.s_addr; frag.ipfr_dst.s_addr = ip->ip_dst.s_addr; idx += ip->ip_dst.s_addr; frag.ipfr_ifp = fin->fin_ifp; idx *= 127; idx %= IPFT_SIZE; /* * check the table, careful to only compare the right amount of data */ for (f = table[idx]; f; f = f->ipfr_next) if (!bcmp((char *)&frag.ipfr_src, (char *)&f->ipfr_src, IPFR_CMPSZ)) { u_short atoff, off; /* * XXX - We really need to be guarding against the * retransmission of (src,dst,id,offset-range) here * because a fragmented packet is never resent with * the same IP ID#. */ off = ip->ip_off & IP_OFFMASK; if (f->ipfr_seen0) { if (!off || (fin->fin_fi.fi_fl & FI_SHORT)) continue; } else if (!off) f->ipfr_seen0 = 1; if (f != table[idx]) { /* * move fragment info. to the top of the list * to speed up searches. */ if ((f->ipfr_prev->ipfr_next = f->ipfr_next)) f->ipfr_next->ipfr_prev = f->ipfr_prev; f->ipfr_next = table[idx]; table[idx]->ipfr_prev = f; f->ipfr_prev = NULL; table[idx] = f; } atoff = off + (fin->fin_dlen >> 3); /* * If we've follwed the fragments, and this is the * last (in order), shrink expiration time. */ if (off == f->ipfr_off) { if (!(ip->ip_off & IP_MF)) f->ipfr_ttl = 1; else f->ipfr_off = atoff; } ATOMIC_INCL(ipfr_stats.ifs_hits); return f; } return NULL; } /* * functional interface for NAT lookups of the NAT fragment cache */ nat_t *ipfr_nat_knownfrag(ip, fin) ip_t *ip; fr_info_t *fin; { nat_t *nat; ipfr_t *ipf; if ((ip->ip_v != 4) || (fr_frag_lock)) return NULL; READ_ENTER(&ipf_natfrag); ipf = ipfr_lookup(ip, fin, ipfr_nattab); if (ipf != NULL) { nat = ipf->ipfr_data; /* * This is the last fragment for this packet. */ if ((ipf->ipfr_ttl == 1) && (nat != NULL)) { nat->nat_data = NULL; ipf->ipfr_data = NULL; } } else nat = NULL; RWLOCK_EXIT(&ipf_natfrag); return nat; } /* * functional interface for normal lookups of the fragment cache */ frentry_t *ipfr_knownfrag(ip, fin) ip_t *ip; fr_info_t *fin; { frentry_t *fr = NULL; ipfr_t *fra; if ((ip->ip_v != 4) || (fr_frag_lock)) return NULL; READ_ENTER(&ipf_frag); fra = ipfr_lookup(ip, fin, ipfr_heads); if (fra != NULL) fr = fra->ipfr_rule; RWLOCK_EXIT(&ipf_frag); return fr; } /* * forget any references to this external object. */ void ipfr_forget(nat) void *nat; { ipfr_t *fr; int idx; WRITE_ENTER(&ipf_natfrag); for (idx = IPFT_SIZE - 1; idx >= 0; idx--) for (fr = ipfr_heads[idx]; fr; fr = fr->ipfr_next) if (fr->ipfr_data == nat) fr->ipfr_data = NULL; RWLOCK_EXIT(&ipf_natfrag); } static void ipfr_delete(fra) ipfr_t *fra; { frentry_t *fr; fr = fra->ipfr_rule; if (fr != NULL) { ATOMIC_DEC32(fr->fr_ref); if (fr->fr_ref == 0) KFREE(fr); } if (fra->ipfr_prev) fra->ipfr_prev->ipfr_next = fra->ipfr_next; if (fra->ipfr_next) fra->ipfr_next->ipfr_prev = fra->ipfr_prev; KFREE(fra); } /* * Free memory in use by fragment state info. kept. */ void ipfr_unload() { ipfr_t **fp, *fra; nat_t *nat; int idx; WRITE_ENTER(&ipf_frag); for (idx = IPFT_SIZE - 1; idx >= 0; idx--) for (fp = &ipfr_heads[idx]; (fra = *fp); ) { *fp = fra->ipfr_next; ipfr_delete(fra); } RWLOCK_EXIT(&ipf_frag); WRITE_ENTER(&ipf_nat); WRITE_ENTER(&ipf_natfrag); for (idx = IPFT_SIZE - 1; idx >= 0; idx--) for (fp = &ipfr_nattab[idx]; (fra = *fp); ) { *fp = fra->ipfr_next; nat = fra->ipfr_data; if (nat != NULL) { if (nat->nat_data == fra) nat->nat_data = NULL; } ipfr_delete(fra); } RWLOCK_EXIT(&ipf_natfrag); RWLOCK_EXIT(&ipf_nat); } #ifdef _KERNEL void ipfr_fragexpire() { ipfr_t **fp, *fra; nat_t *nat; int idx; #if defined(_KERNEL) # if !SOLARIS int s; # endif #endif if (fr_frag_lock) return; SPL_NET(s); WRITE_ENTER(&ipf_frag); /* * Go through the entire table, looking for entries to expire, * decreasing the ttl by one for each entry. If it reaches 0, * remove it from the chain and free it. */ for (idx = IPFT_SIZE - 1; idx >= 0; idx--) for (fp = &ipfr_heads[idx]; (fra = *fp); ) { --fra->ipfr_ttl; if (fra->ipfr_ttl == 0) { *fp = fra->ipfr_next; ipfr_delete(fra); ATOMIC_INCL(ipfr_stats.ifs_expire); ATOMIC_DEC32(ipfr_inuse); } else fp = &fra->ipfr_next; } RWLOCK_EXIT(&ipf_frag); /* * Same again for the NAT table, except that if the structure also * still points to a NAT structure, and the NAT structure points back * at the one to be free'd, NULL the reference from the NAT struct. * NOTE: We need to grab both mutex's early, and in this order so as * to prevent a deadlock if both try to expire at the same time. */ WRITE_ENTER(&ipf_nat); WRITE_ENTER(&ipf_natfrag); for (idx = IPFT_SIZE - 1; idx >= 0; idx--) for (fp = &ipfr_nattab[idx]; (fra = *fp); ) { --fra->ipfr_ttl; if (fra->ipfr_ttl == 0) { ATOMIC_INCL(ipfr_stats.ifs_expire); ATOMIC_DEC32(ipfr_inuse); nat = fra->ipfr_data; if (nat != NULL) { if (nat->nat_data == fra) nat->nat_data = NULL; } *fp = fra->ipfr_next; ipfr_delete(fra); } else fp = &fra->ipfr_next; } RWLOCK_EXIT(&ipf_natfrag); RWLOCK_EXIT(&ipf_nat); SPL_X(s); } /* * Slowly expire held state for fragments. Timeouts are set * in expectation * of this being called twice per second. */ # if (BSD >= 199306) || SOLARIS || defined(__sgi) # if defined(SOLARIS2) && (SOLARIS2 < 7) void ipfr_slowtimer() # else void ipfr_slowtimer __P((void *ptr)) # endif # else int ipfr_slowtimer() # endif { #if defined(_KERNEL) && SOLARIS extern int fr_running; if (fr_running <= 0) return; #endif READ_ENTER(&ipf_solaris); #ifdef __sgi ipfilter_sgi_intfsync(); #endif ipfr_fragexpire(); fr_timeoutstate(); ip_natexpire(); fr_authexpire(); # if SOLARIS ipfr_timer_id = timeout(ipfr_slowtimer, NULL, drv_usectohz(500000)); RWLOCK_EXIT(&ipf_solaris); # else # if defined(__NetBSD__) && (__NetBSD_Version__ >= 104240000) callout_reset(&ipfr_slowtimer_ch, hz / 2, ipfr_slowtimer, NULL); # else # if (__FreeBSD_version >= 300000) ipfr_slowtimer_ch = timeout(ipfr_slowtimer, NULL, hz/2); # else # if defined(__NetBSD__) callout_reset(&ipfr_slowtimer_ch, hz / 2, ipfr_slowtimer, NULL); # else timeout(ipfr_slowtimer, NULL, hz/2); # endif # endif # if (BSD < 199306) && !defined(__sgi) return 0; # endif /* FreeBSD */ # endif /* NetBSD */ # endif /* SOLARIS */ } #endif /* defined(_KERNEL) */