NPF checkpoint:

- Add proper TCP state tracking as described in Guido van Rooij paper,
  plus handle TCP Window Scaling option.
- Completely rework npf_cache_t, reduce granularity, simplify code.
- Add npf_addr_t as an abstraction, amend session handling code, as well
  as NAT code et al, to use it.  Now design is prepared for IPv6 support.
- Handle IPv4 fragments i.e. perform packet reassembly.
- Add support for IPv4 ID randomization and minimum TTL enforcement.
- Add support for TCP MSS "clamping".
- Random bits for IPv6.  Various fixes and clean-up.
This commit is contained in:
rmind 2010-11-11 06:30:39 +00:00
parent b2d38cefdf
commit 97b932f123
24 changed files with 1549 additions and 807 deletions

View File

@ -1,4 +1,4 @@
# $NetBSD: Makefile,v 1.2 2010/09/16 04:53:27 rmind Exp $
# $NetBSD: Makefile,v 1.3 2010/11/11 06:30:39 rmind Exp $
.include "../Makefile.inc"
@ -8,6 +8,6 @@ KMOD= npf
SRCS= npf.c npf_ctl.c npf_handler.c npf_instr.c npf_mbuf.c
SRCS+= npf_processor.c npf_ruleset.c npf_tableset.c npf_inet.c
SRCS+= npf_session.c npf_nat.c npf_sendpkt.c npf_alg.c
SRCS+= npf_session.c npf_state.c npf_nat.c npf_alg.c npf_sendpkt.c
.include <bsd.kmodule.mk>

View File

@ -1,4 +1,4 @@
# $NetBSD: files.npf,v 1.2 2010/09/16 04:53:27 rmind Exp $
# $NetBSD: files.npf,v 1.3 2010/11/11 06:30:39 rmind Exp $
#
# Public Domain.
#
@ -20,6 +20,7 @@ file net/npf/npf_ruleset.c npf
file net/npf/npf_tableset.c npf
file net/npf/npf_inet.c npf
file net/npf/npf_session.c npf
file net/npf/npf_state.c npf
file net/npf/npf_nat.c npf
file net/npf/npf_alg.c npf
file net/npf/npf_sendpkt.c npf

View File

@ -1,4 +1,4 @@
/* $NetBSD: npf.h,v 1.3 2010/09/25 00:25:31 rmind Exp $ */
/* $NetBSD: npf.h,v 1.4 2010/11/11 06:30:39 rmind Exp $ */
/*-
* Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@ -66,36 +66,57 @@ typedef struct npf_hook npf_hook_t;
typedef void nbuf_t;
#if defined(_KERNEL) || defined(_NPF_TESTING)
#include <netinet/in_systm.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/ip6.h>
#include <netinet/tcp.h>
#include <netinet/udp.h>
#include <netinet/ip_icmp.h>
/*
* Storage of address, both IPv4 and IPv6.
*/
typedef struct in6_addr npf_addr_t;
/*
* Packet information cache.
*/
#define NPC_IP46 0x01 /* IPv4,6 packet with known protocol. */
#define NPC_IP6VER 0x02 /* If NPI_IP46, then: 0 - IPv4, 1 - IPv6. */
#define NPC_ADDRS 0x04 /* Known source and destination addresses. */
#define NPC_PORTS 0x08 /* Known ports (for TCP/UDP cases). */
#define NPC_ICMP 0x10 /* ICMP with known type and code. */
#define NPC_ICMP_ID 0x20 /* ICMP with query ID. */
#define NPC_IP4 0x01 /* Indicates fetched IPv4 header. */
#define NPC_IP6 0x02 /* Indicates IPv6 header. */
#define NPC_IPFRAG 0x04 /* IPv4 fragment. */
#define NPC_LAYER4 0x08 /* Layer 4 has been fetched. */
#define NPC_TCP 0x10 /* TCP header. */
#define NPC_UDP 0x20 /* UDP header. */
#define NPC_ICMP 0x40 /* ICMP header. */
#define NPC_ICMP_ID 0x80 /* ICMP with query ID. */
#define NPC_IP46 (NPC_IP4|NPC_IP6)
/* XXX: Optimise later, pack in unions, perhaps bitfields, etc. */
typedef struct {
/* Information flags and packet direction. */
uint32_t npc_info;
int npc_dir;
/* NPC_IP46 */
uint8_t npc_proto;
uint16_t npc_hlen;
uint16_t npc_ipsum;
/* NPC_ADDRS */
in_addr_t npc_srcip;
in_addr_t npc_dstip;
/* NPC_PORTS */
in_port_t npc_sport;
in_port_t npc_dport;
uint8_t npc_tcp_flags;
/* NPC_ICMP */
uint8_t npc_icmp_type;
uint8_t npc_icmp_code;
uint16_t npc_icmp_id;
int npc_di;
/* Pointers to the IP v4/v6 addresses. */
npf_addr_t * npc_srcip;
npf_addr_t * npc_dstip;
/* Size (v4 or v6) of IP addresses. */
int npc_ipsz;
/* IPv4, IPv6. */
union {
struct ip v4;
struct ip6_hdr v6;
} npc_ip;
/* TCP, UDP, ICMP. */
union {
struct tcphdr tcp;
struct udphdr udp;
struct icmp icmp;
} npc_l4;
} npf_cache_t;
static inline bool
@ -105,12 +126,20 @@ npf_iscached(const npf_cache_t *npc, const int inf)
return __predict_true((npc->npc_info & inf) != 0);
}
#if defined(_KERNEL) || defined(_NPF_TESTING)
static inline int
npf_cache_ipproto(const npf_cache_t *npc)
{
const struct ip *ip = &npc->npc_ip.v4;
KASSERT(npf_iscached(npc, NPC_IP46));
return ip->ip_p;
}
/* Network buffer interface. */
void * nbuf_dataptr(void *);
void * nbuf_advance(nbuf_t **, void *, u_int);
int nbuf_advfetch(nbuf_t **, void **, u_int, size_t, void *);
int nbuf_advstore(nbuf_t **, void **, u_int, size_t, void *);
int nbuf_fetch_datum(nbuf_t *, void *, size_t, void *);
int nbuf_store_datum(nbuf_t *, void *, size_t, void *);
@ -118,30 +147,31 @@ int nbuf_add_tag(nbuf_t *, uint32_t, uint32_t);
int nbuf_find_tag(nbuf_t *, uint32_t, void **);
/* Ruleset interface. */
npf_rule_t * npf_rule_alloc(int, pri_t, int, void *, size_t);
npf_rule_t * npf_rule_alloc(int, pri_t, int, void *, size_t, bool, int, int);
void npf_rule_free(npf_rule_t *);
void npf_activate_rule(npf_rule_t *);
void npf_deactivate_rule(npf_rule_t *);
npf_hook_t * npf_hook_register(npf_rule_t *,
void (*)(const npf_cache_t *, void *), void *);
void (*)(npf_cache_t *, nbuf_t *, void *), void *);
void npf_hook_unregister(npf_rule_t *, npf_hook_t *);
#endif /* _KERNEL */
/* Rule attributes. */
#define NPF_RULE_PASS 0x0001
#define NPF_RULE_COUNT 0x0002
#define NPF_RULE_DEFAULT 0x0002
#define NPF_RULE_FINAL 0x0004
#define NPF_RULE_LOG 0x0008
#define NPF_RULE_DEFAULT 0x0010
#define NPF_RULE_KEEPSTATE 0x0020
#define NPF_RULE_KEEPSTATE 0x0008
#define NPF_RULE_COUNT 0x0010
#define NPF_RULE_LOG 0x0020
#define NPF_RULE_RETRST 0x0040
#define NPF_RULE_RETICMP 0x0080
#define NPF_RULE_NORMALIZE 0x0100
#define NPF_RULE_IN 0x1000
#define NPF_RULE_OUT 0x2000
#define NPF_RULE_DIMASK 0x3000
#define NPF_RULE_IN 0x10000000
#define NPF_RULE_OUT 0x20000000
#define NPF_RULE_DIMASK (NPF_RULE_IN | NPF_RULE_OUT)
/* Address translation types and flags. */
#define NPF_NATIN 1

View File

@ -1,4 +1,4 @@
/* $NetBSD: npf_alg.c,v 1.1 2010/08/22 18:56:22 rmind Exp $ */
/* $NetBSD: npf_alg.c,v 1.2 2010/11/11 06:30:39 rmind Exp $ */
/*-
* Copyright (c) 2010 The NetBSD Foundation, Inc.
@ -31,16 +31,15 @@
/*
* NPF interface for application level gateways (ALGs).
*
* XXX: locking
*/
#ifdef _KERNEL
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: npf_alg.c,v 1.1 2010/08/22 18:56:22 rmind Exp $");
__KERNEL_RCSID(0, "$NetBSD: npf_alg.c,v 1.2 2010/11/11 06:30:39 rmind Exp $");
#include <sys/param.h>
#include <sys/kernel.h>
#endif
#include <sys/kmem.h>
#include <sys/pool.h>
#include <net/pfil.h>
@ -50,14 +49,14 @@ __KERNEL_RCSID(0, "$NetBSD: npf_alg.c,v 1.1 2010/08/22 18:56:22 rmind Exp $");
/* NAT ALG structure for registration. */
struct npf_alg {
LIST_ENTRY(npf_alg) na_entry;
void * na_ptr;
npf_alg_t * na_bptr;
npf_algfunc_t na_match_func;
npf_algfunc_t na_out_func;
npf_algfunc_t na_in_func;
npf_algfunc_t na_seid_func;
};
static LIST_HEAD(, npf_alg) nat_alg_list;
static LIST_HEAD(, npf_alg) nat_alg_list __read_mostly;
void
npf_alg_sysinit(void)
@ -85,7 +84,7 @@ npf_alg_register(npf_algfunc_t match, npf_algfunc_t out, npf_algfunc_t in,
npf_alg_t *alg;
alg = kmem_alloc(sizeof(npf_alg_t), KM_SLEEP);
alg->na_ptr = alg;
alg->na_bptr = alg;
alg->na_match_func = match;
alg->na_out_func = out;
alg->na_in_func = in;
@ -114,7 +113,10 @@ npf_alg_unregister(npf_alg_t *alg)
return 0;
}
void
/*
* npf_alg_match: call ALG matching inspectors, determine if any ALG matches.
*/
bool
npf_alg_match(npf_cache_t *npc, nbuf_t *nbuf, npf_nat_t *nt)
{
npf_alg_t *alg;
@ -122,15 +124,15 @@ npf_alg_match(npf_cache_t *npc, nbuf_t *nbuf, npf_nat_t *nt)
LIST_FOREACH(alg, &nat_alg_list, na_entry) {
func = alg->na_match_func;
if (__predict_true(func != NULL)) {
func(npc, nbuf, nt);
return;
if (func && func(npc, nbuf, nt)) {
return true;
}
}
return false;
}
/*
* npf_alg_exec: execute in/out inspection hooks of each ALG.
* npf_alg_exec: execute ALG hooks for translation.
*/
void
npf_alg_exec(npf_cache_t *npc, nbuf_t *nbuf, npf_nat_t *nt, const int di)
@ -157,10 +159,7 @@ npf_alg_sessionid(npf_cache_t *npc, nbuf_t *nbuf, npf_cache_t *key)
LIST_FOREACH(alg, &nat_alg_list, na_entry) {
func = alg->na_seid_func;
if (__predict_true(func == NULL)) {
continue;
}
if (func(npc, nbuf, key)) {
if (func && func(npc, nbuf, (npf_nat_t *)key)) {
return true;
}
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: npf_alg_icmp.c,v 1.3 2010/09/25 00:25:31 rmind Exp $ */
/* $NetBSD: npf_alg_icmp.c,v 1.4 2010/11/11 06:30:39 rmind Exp $ */
/*-
* Copyright (c) 2010 The NetBSD Foundation, Inc.
@ -33,13 +33,11 @@
* NPF ALG for ICMP and traceroute translations.
*/
#ifdef _KERNEL
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: npf_alg_icmp.c,v 1.3 2010/09/25 00:25:31 rmind Exp $");
__KERNEL_RCSID(0, "$NetBSD: npf_alg_icmp.c,v 1.4 2010/11/11 06:30:39 rmind Exp $");
#include <sys/param.h>
#include <sys/kernel.h>
#endif
#include <sys/module.h>
#include <sys/pool.h>
@ -111,33 +109,37 @@ npf_alg_icmp_modcmd(modcmd_t cmd, void *arg)
}
/*
* npfa_icmp_match: ALG matching inspector, determines ALG case and
* establishes a session for "backwards" stream.
* npfa_icmp_match: ALG matching inspector - determines ALG case and
* associates ALG with NAT entry.
*/
static bool
npfa_icmp_match(npf_cache_t *npc, nbuf_t *nbuf, void *ntptr)
{
const int proto = npc->npc_proto;
void *n_ptr = nbuf_dataptr(nbuf);
u_int offby;
uint8_t ttl;
const int proto = npf_cache_ipproto(npc);
struct ip *ip = &npc->npc_ip.v4;
in_port_t dport;
/* Handle TCP/UDP traceroute - check for port range. */
if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) {
KASSERT(npf_iscached(npc, NPC_IP46 | NPC_LAYER4));
if (proto == IPPROTO_TCP) {
struct tcphdr *th = &npc->npc_l4.tcp;
dport = ntohs(th->th_dport);
} else if (proto == IPPROTO_UDP) {
struct udphdr *uh = &npc->npc_l4.udp;
dport = ntohs(uh->uh_dport);
} else {
return false;
}
KASSERT(npf_iscached(npc, NPC_PORTS));
in_port_t dport = ntohs(npc->npc_dport);
/* Handle TCP/UDP traceroute - check for port range. */
if (dport < TR_BASE_PORT || dport > TR_PORT_RANGE) {
return false;
}
/* Check for low TTL. */
offby = offsetof(struct ip, ip_ttl);
if (nbuf_advfetch(&nbuf, &n_ptr, offby, sizeof(uint8_t), &ttl))
return false;
if (ttl > TR_MAX_TTL)
if (ip->ip_ttl > TR_MAX_TTL) {
return false;
}
/* Associate ALG with translation entry. */
npf_nat_t *nt = ntptr;
@ -152,6 +154,7 @@ npfa_icmp_match(npf_cache_t *npc, nbuf_t *nbuf, void *ntptr)
static inline bool
npf_icmp_uniqid(const int type, npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr)
{
struct icmp *ic;
u_int offby;
/* Per RFC 792. */
@ -167,17 +170,15 @@ npf_icmp_uniqid(const int type, npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr)
return false;
}
/* Fetch into the cache. */
if (!npf_ip4_proto(npc, nbuf, n_ptr)) {
if (!npf_fetch_ip(npc, nbuf, n_ptr)) {
return false;
}
const int proto = npc->npc_proto;
if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) {
return false;
}
if (!npf_fetch_ip4addrs(npc, nbuf, n_ptr)) {
return false;
}
if (!npf_fetch_ports(npc, nbuf, n_ptr, proto)) {
switch (npf_cache_ipproto(npc)) {
case IPPROTO_TCP:
return npf_fetch_tcp(npc, nbuf, n_ptr);
case IPPROTO_UDP:
return npf_fetch_udp(npc, nbuf, n_ptr);
default:
return false;
}
return true;
@ -189,9 +190,10 @@ npf_icmp_uniqid(const int type, npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr)
case ICMP_IREQ:
case ICMP_IREQREPLY:
/* Should contain ICMP query ID. */
ic = &npc->npc_l4.icmp;
offby = offsetof(struct icmp, icmp_id);
if (nbuf_advfetch(&nbuf, &n_ptr, offby, sizeof(uint16_t),
&npc->npc_icmp_id)) {
if (nbuf_advfetch(&nbuf, &n_ptr, offby,
sizeof(uint16_t), &ic->icmp_id)) {
return false;
}
npc->npc_info |= NPC_ICMP_ID;
@ -210,52 +212,48 @@ static bool
npfa_icmp_session(npf_cache_t *npc, nbuf_t *nbuf, void *keyptr)
{
npf_cache_t *key = keyptr;
void *n_ptr;
/* ICMP? Get unique identifiers from ICMP packet. */
if (npc->npc_proto != IPPROTO_ICMP) {
if (npf_cache_ipproto(npc) != IPPROTO_ICMP) {
return false;
}
KASSERT(npf_iscached(npc, NPC_IP46 | NPC_ICMP));
KASSERT(npf_iscached(npc, NPC_IP46));
KASSERT(npf_iscached(npc, NPC_ICMP));
key->npc_info = NPC_ICMP;
/* Advance to ICMP header. */
n_ptr = nbuf_dataptr(nbuf);
if ((n_ptr = nbuf_advance(&nbuf, n_ptr, npc->npc_hlen)) == NULL) {
struct ip *ip = &npc->npc_ip.v4;
void *n_ptr = nbuf_dataptr(nbuf);
if ((n_ptr = nbuf_advance(&nbuf, n_ptr, ip->ip_hl << 2)) == NULL) {
return false;
}
/* Fetch into the separate (key) cache. */
if (!npf_icmp_uniqid(npc->npc_icmp_type, key, nbuf, n_ptr)) {
/* Fetch relevant data into the separate ("key") cache. */
struct icmp *ic = &npc->npc_l4.icmp;
if (!npf_icmp_uniqid(ic->icmp_type, key, nbuf, n_ptr)) {
return false;
}
if (npf_iscached(key, NPC_ICMP_ID)) {
/* Construct the key. */
key->npc_proto = npc->npc_proto;
key->npc_dir = npc->npc_dir;
/* Save IP addresses. */
key->npc_srcip = npc->npc_srcip;
key->npc_dstip = npc->npc_dstip;
key->npc_info |= NPC_IP46 | NPC_ADDRS | NPC_PORTS;
/* Fake ports with ICMP query IDs. */
key->npc_sport = key->npc_icmp_id;
key->npc_dport = key->npc_icmp_id;
} else {
in_addr_t addr;
in_port_t port;
/*
* Embedded IP packet is the original of "forwards" stream.
* We should imitate the "backwards" stream for inspection.
*/
KASSERT(npf_iscached(key, NPC_IP46 | NPC_ADDRS | NPC_PORTS));
addr = key->npc_srcip;
port = key->npc_sport;
key->npc_srcip = key->npc_dstip;
key->npc_dstip = addr;
key->npc_sport = key->npc_dport;
key->npc_dport = port;
struct icmp *keyic = &key->npc_l4.icmp;
/* Copy ICMP ID to the cache and flag it. */
npc->npc_info |= NPC_ICMP_ID;
ic->icmp_id = keyic->icmp_id;
/* Note: return 'false', since key is the original cache. */
return false;
}
/*
* Embedded IP packet is the original of "forwards" stream.
* We should imitate the "backwards" stream for inspection.
*/
KASSERT(npf_iscached(key, NPC_IP46));
KASSERT(npf_iscached(key, NPC_LAYER4));
key->npc_di = (npc->npc_di == PFIL_IN) ? PFIL_OUT : PFIL_IN;
return true;
}
@ -266,61 +264,73 @@ npfa_icmp_session(npf_cache_t *npc, nbuf_t *nbuf, void *keyptr)
static bool
npfa_icmp_natin(npf_cache_t *npc, nbuf_t *nbuf, void *ntptr)
{
void *n_ptr = nbuf_dataptr(nbuf);
npf_cache_t enpc;
u_int offby;
uint16_t cksum;
/* XXX: Duplicated work. */
if (!npfa_icmp_session(npc, nbuf, &enpc)) {
return false;
}
KASSERT(npf_iscached(&enpc, NPC_IP46 | NPC_ADDRS | NPC_PORTS));
KASSERT(npf_iscached(&enpc, NPC_IP46 | NPC_LAYER4));
/* Advance to ICMP checksum and fetch it. */
offby = npc->npc_hlen + offsetof(struct icmp, icmp_cksum);
if (nbuf_advfetch(&nbuf, &n_ptr, offby, sizeof(uint16_t), &cksum)) {
return false;
const int proto = npf_cache_ipproto(&enpc);
void *n_ptr = nbuf_dataptr(nbuf);
void *cnbuf = nbuf, *cnptr = n_ptr;
struct icmp *ic = &npc->npc_l4.icmp;
uint16_t cksum = ic->icmp_cksum;
struct ip *ip = &enpc.npc_ip.v4;
uint16_t ecksum = ip->ip_sum, l4cksum;
/* Save TCP/UDP checksum for update. */
if (proto == IPPROTO_TCP) {
struct tcphdr *th = &enpc.npc_l4.tcp;
l4cksum = th->th_sum;
} else {
struct udphdr *uh = &enpc.npc_l4.udp;
l4cksum = uh->uh_sum;
}
/* Save the data for checksum update later. */
void *cnbuf = nbuf, *cnptr = n_ptr;
uint16_t ecksum = enpc.npc_ipsum;
/* Advance to the original IP header, which is embedded after ICMP. */
offby = offsetof(struct icmp, icmp_ip) -
offsetof(struct icmp, icmp_cksum);
u_int offby = offsetof(struct icmp, icmp_ip);
if ((n_ptr = nbuf_advance(&nbuf, n_ptr, offby)) == NULL) {
return false;
}
/*
* Rewrite source IP address and port of the embedded IP header,
* which represents original packet - therefore passing PFIL_OUT.
*/
npf_nat_t *nt = ntptr;
in_addr_t addr;
npf_addr_t *addr;
in_port_t port;
npf_nat_getorig(nt, &addr, &port);
/*
* Rewrite source IP address and port of the embedded IP header,
* which represents original packet - therefore passing PFIL_OUT.
* Note: checksum is first, since it uses values from the cache.
*/
if (!npf_rwrcksum(&enpc, nbuf, n_ptr, PFIL_OUT, addr, port)) {
return false;
}
if (!npf_rwrip(&enpc, nbuf, n_ptr, PFIL_OUT, addr)) {
return false;
}
if (!npf_rwrport(&enpc, nbuf, n_ptr, PFIL_OUT, port, addr)) {
if (!npf_rwrport(&enpc, nbuf, n_ptr, PFIL_OUT, port)) {
return false;
}
/*
* Fixup and update ICMP checksum.
* Note: npf_rwrip() has updated the IP checksum.
* Calculate ICMP checksum.
*/
cksum = npf_fixup32_cksum(cksum, enpc.npc_srcip, addr);
cksum = npf_fixup16_cksum(cksum, enpc.npc_sport, port);
cksum = npf_fixup16_cksum(cksum, ecksum, enpc.npc_ipsum);
/* FIXME: Updated UDP/TCP checksum joins-in too., when != 0, sigh. */
if (nbuf_store_datum(cnbuf, cnptr, sizeof(uint16_t), &cksum)){
return false;
if (proto == IPPROTO_TCP) {
struct tcphdr *th = &enpc.npc_l4.tcp;
cksum = npf_fixup16_cksum(cksum, th->th_sport, port);
cksum = npf_fixup16_cksum(cksum, l4cksum, th->th_sum);
} else {
struct udphdr *uh = &enpc.npc_l4.udp;
cksum = npf_fixup16_cksum(cksum, uh->uh_sport, port);
cksum = npf_fixup16_cksum(cksum, l4cksum, uh->uh_sum);
}
return true;
cksum = npf_addr_cksum(cksum, enpc.npc_ipsz, enpc.npc_srcip, addr);
cksum = npf_fixup16_cksum(cksum, ecksum, ip->ip_sum);
/* Rewrite ICMP checksum. */
return nbuf_store_datum(cnbuf, cnptr, sizeof(uint16_t), &cksum);
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: npf_ctl.c,v 1.2 2010/09/16 04:53:27 rmind Exp $ */
/* $NetBSD: npf_ctl.c,v 1.3 2010/11/11 06:30:39 rmind Exp $ */
/*-
* Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@ -39,14 +39,12 @@
* - Consider implementing 'sync' functionality.
*/
#ifdef _KERNEL
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: npf_ctl.c,v 1.2 2010/09/16 04:53:27 rmind Exp $");
__KERNEL_RCSID(0, "$NetBSD: npf_ctl.c,v 1.3 2010/11/11 06:30:39 rmind Exp $");
#include <sys/param.h>
#include <sys/conf.h>
#include <sys/kernel.h>
#endif
#include <prop/proplib.h>
@ -190,8 +188,9 @@ npf_mk_singlerule(prop_dictionary_t rldict,
{
npf_rule_t *rl;
prop_object_t obj;
int attr, ifidx;
int attr, ifidx, minttl, maxmss;
pri_t pri;
bool rnd_ipid;
size_t nc_size;
void *nc;
@ -211,6 +210,18 @@ npf_mk_singlerule(prop_dictionary_t rldict,
obj = prop_dictionary_get(rldict, "interface");
ifidx = prop_number_integer_value(obj);
/* Randomize IP ID (bool). */
obj = prop_dictionary_get(rldict, "randomize-id");
rnd_ipid = prop_bool_true(obj);
/* Minimum IP TTL (integer). */
obj = prop_dictionary_get(rldict, "min-ttl");
minttl = prop_number_integer_value(obj);
/* Maximum TCP MSS (integer). */
obj = prop_dictionary_get(rldict, "max-mss");
maxmss = prop_number_integer_value(obj);
/* N-code (binary data). */
obj = prop_dictionary_get(rldict, "ncode");
if (obj) {
@ -233,7 +244,8 @@ npf_mk_singlerule(prop_dictionary_t rldict,
}
/* Allocate and setup NPF rule. */
rl = npf_rule_alloc(attr, pri, ifidx, nc, nc_size);
rl = npf_rule_alloc(attr, pri, ifidx, nc, nc_size,
rnd_ipid, minttl, maxmss);
if (rl == NULL) {
if (nc) {
npf_ncode_free(nc, nc_size); /* XXX */
@ -328,7 +340,8 @@ npf_mk_natlist(npf_ruleset_t *nset, prop_array_t natlist)
prop_object_t obj;
npf_natpolicy_t *np;
npf_rule_t *rl;
in_addr_t taddr;
const npf_addr_t *taddr;
size_t taddr_sz;
in_port_t tport;
int type, flags;
@ -347,12 +360,13 @@ npf_mk_natlist(npf_ruleset_t *nset, prop_array_t natlist)
flags = prop_number_integer_value(obj);
/* Translation IP. */
obj = prop_dictionary_get(natdict, "translation_ip");
taddr = (in_addr_t)prop_number_integer_value(obj);
obj = prop_dictionary_get(natdict, "translation-ip");
taddr_sz = prop_data_size(obj);
taddr = (const npf_addr_t *)prop_data_data_nocopy(obj);
/* Translation port (for redirect case). */
obj = prop_dictionary_get(natdict, "translation_port");
tport = (in_addr_t)prop_number_integer_value(obj);
obj = prop_dictionary_get(natdict, "translation-port");
tport = (in_port_t)prop_number_integer_value(obj);
/*
* NAT policies are standard rules, plus additional
@ -363,7 +377,7 @@ npf_mk_natlist(npf_ruleset_t *nset, prop_array_t natlist)
break;
/* Allocate a new NAT policy and assign to the rule. */
np = npf_nat_newpolicy(type, flags, taddr, tport);
np = npf_nat_newpolicy(type, flags, taddr, taddr_sz, tport);
if (np == NULL) {
error = ENOMEM;
break;

View File

@ -1,4 +1,4 @@
/* $NetBSD: npf_handler.c,v 1.3 2010/10/10 15:29:01 rmind Exp $ */
/* $NetBSD: npf_handler.c,v 1.4 2010/11/11 06:30:39 rmind Exp $ */
/*-
* Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@ -34,7 +34,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: npf_handler.c,v 1.3 2010/10/10 15:29:01 rmind Exp $");
__KERNEL_RCSID(0, "$NetBSD: npf_handler.c,v 1.4 2010/11/11 06:30:39 rmind Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@ -45,6 +45,10 @@ __KERNEL_RCSID(0, "$NetBSD: npf_handler.c,v 1.3 2010/10/10 15:29:01 rmind Exp $"
#include <net/pfil.h>
#include <sys/socketvar.h>
#include <netinet/in_systm.h>
#include <netinet/in.h>
#include <netinet/ip_var.h>
#include "npf_impl.h"
/*
@ -91,8 +95,28 @@ npf_packet_handler(void *arg, struct mbuf **mp, struct ifnet *ifp, int di)
error = 0;
retfl = 0;
/* Cache everything. Determine whether it is an IPv4 fragment. */
if (npf_cache_all(&npc, nbuf) && npf_iscached(&npc, NPC_IPFRAG)) {
struct ip *ip = nbuf_dataptr(*mp);
/*
* Pass to IPv4 reassembly mechanism.
*/
if (ip_reass_packet(mp, ip) != 0) {
/* Failed; invalid fragment(s) or packet. */
error = EINVAL;
se = NULL;
goto out;
}
if (*mp == NULL) {
/* More fragments should come; return. */
return 0;
}
/* Reassembly is complete, we have the final packet. */
nbuf = (nbuf_t *)*mp;
}
/* Inspect the list of sessions. */
se = npf_session_inspect(&npc, nbuf, ifp, di);
se = npf_session_inspect(&npc, nbuf, di);
/* If "passing" session found - skip the ruleset inspection. */
if (se && npf_session_pass(se)) {
@ -110,14 +134,14 @@ npf_packet_handler(void *arg, struct mbuf **mp, struct ifnet *ifp, int di)
}
/* Apply the rule. */
error = npf_rule_apply(&npc, rl, &keepstate, &retfl);
error = npf_rule_apply(&npc, nbuf, rl, &keepstate, &retfl);
if (error) {
goto out;
}
/* Establish a "pass" session, if required. */
if (keepstate && !se) {
se = npf_session_establish(&npc, NULL, di);
se = npf_session_establish(&npc, nbuf, NULL, di);
if (se == NULL) {
error = ENOMEM;
goto out;

View File

@ -1,4 +1,4 @@
/* $NetBSD: npf_impl.h,v 1.3 2010/09/25 01:42:39 matt Exp $ */
/* $NetBSD: npf_impl.h,v 1.4 2010/11/11 06:30:39 rmind Exp $ */
/*-
* Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@ -37,10 +37,10 @@
#ifndef _NPF_IMPL_H_
#define _NPF_IMPL_H_
#include <sys/rbtree.h>
#include <sys/hash.h>
#include <sys/queue.h>
#include <sys/types.h>
#include <sys/queue.h>
#include <sys/hash.h>
#include <sys/rbtree.h>
#include <sys/rwlock.h>
#include "npf.h"
@ -76,11 +76,33 @@ typedef npf_table_t * npf_tableset_t;
* DEFINITIONS.
*/
typedef bool (*npf_algfunc_t)(npf_cache_t *, void *, void *);
typedef bool (*npf_algfunc_t)(npf_cache_t *, nbuf_t *, void *);
#define NPF_NCODE_LIMIT 1024
#define NPF_TABLE_SLOTS 32
/*
* SESSION STATE STRUCTURES
*/
#define ST_OPENING 1 /* SYN has been sent. */
#define ST_ACKNOWLEDGE 2 /* SYN-ACK received, wait for ACK. */
#define ST_ESTABLISHED 3 /* ACK seen, connection established. */
#define ST_CLOSING 4
typedef struct {
uint32_t nst_seqend; /* SEQ number + length. */
uint32_t nst_ackend; /* ACK sequence number + window. */
uint32_t nst_maxwin; /* Maximum window seen. */
int nst_wscale; /* Window Scale. */
} npf_tcpstate_t;
typedef struct {
kmutex_t nst_lock;
int nst_state;
npf_tcpstate_t nst_tcpst[2];
} npf_state_t;
/*
* INTERFACES.
*/
@ -95,20 +117,27 @@ int npf_register_pfil(void);
void npf_unregister_pfil(void);
/* Protocol helpers. */
bool npf_ip4_proto(npf_cache_t *, nbuf_t *, void *);
bool npf_fetch_ip4addrs(npf_cache_t *, nbuf_t *, void *);
bool npf_fetch_ports(npf_cache_t *, nbuf_t *, void *, const int);
bool npf_fetch_tcpfl(npf_cache_t *, nbuf_t *, void *);
bool npf_fetch_ip(npf_cache_t *, nbuf_t *, void *);
bool npf_fetch_tcp(npf_cache_t *, nbuf_t *, void *);
bool npf_fetch_udp(npf_cache_t *, nbuf_t *, void *);
bool npf_fetch_icmp(npf_cache_t *, nbuf_t *, void *);
bool npf_cache_all(npf_cache_t *, nbuf_t *);
bool npf_rwrip(npf_cache_t *, nbuf_t *, void *, const int,
npf_addr_t *);
bool npf_rwrport(npf_cache_t *, nbuf_t *, void *, const int,
in_port_t, in_addr_t);
bool npf_rwrip(npf_cache_t *, nbuf_t *, void *, const int, in_addr_t);
in_port_t);
bool npf_rwrcksum(npf_cache_t *, nbuf_t *, void *, const int,
npf_addr_t *, in_port_t);
uint16_t npf_fixup16_cksum(uint16_t, uint16_t, uint16_t);
uint16_t npf_fixup32_cksum(uint16_t, uint32_t, uint32_t);
uint16_t npf_addr_cksum(uint16_t, int, npf_addr_t *, npf_addr_t *);
uint32_t npf_addr_sum(const int, const npf_addr_t *, const npf_addr_t *);
int npf_tcpsaw(npf_cache_t *, tcp_seq *, tcp_seq *, uint32_t *);
bool npf_fetch_tcpopts(const npf_cache_t *, nbuf_t *,
uint16_t *, int *);
bool npf_normalize(npf_cache_t *, nbuf_t *, bool, u_int, u_int);
void npf_return_block(npf_cache_t *, nbuf_t *, const int);
/* Complex instructions. */
@ -121,8 +150,8 @@ int npf_match_tcp_ports(npf_cache_t *, nbuf_t *, void *,
const int, const uint32_t);
int npf_match_udp_ports(npf_cache_t *, nbuf_t *, void *,
const int, const uint32_t);
int npf_match_icmp4(npf_cache_t *, nbuf_t *, void *, const uint32_t);
int npf_match_tcpfl(npf_cache_t *, nbuf_t *, void *, const uint32_t);
int npf_match_icmp4(npf_cache_t *, nbuf_t *, void *, uint32_t);
int npf_match_tcpfl(npf_cache_t *, nbuf_t *, void *, uint32_t);
/* Tableset interface. */
int npf_tableset_sysinit(void);
@ -160,20 +189,20 @@ npf_rule_t * npf_ruleset_match(npf_ruleset_t *, npf_cache_t *, nbuf_t *,
struct ifnet *, const int, const int);
npf_rule_t * npf_ruleset_inspect(npf_cache_t *, nbuf_t *,
struct ifnet *, const int, const int);
int npf_rule_apply(const npf_cache_t *, npf_rule_t *, bool *, int *);
int npf_rule_apply(npf_cache_t *, nbuf_t *, npf_rule_t *,
bool *, int *);
npf_ruleset_t * npf_rule_subset(npf_rule_t *);
npf_natpolicy_t *npf_rule_getnat(const npf_rule_t *);
void npf_rule_setnat(npf_rule_t *, npf_natpolicy_t *);
/* State handling interface. */
/* Session handling interface. */
int npf_session_sysinit(void);
void npf_session_sysfini(void);
int npf_session_tracking(bool);
npf_session_t * npf_session_inspect(npf_cache_t *, nbuf_t *,
struct ifnet *, const int);
npf_session_t * npf_session_establish(const npf_cache_t *,
npf_session_t * npf_session_inspect(npf_cache_t *, nbuf_t *, const int);
npf_session_t * npf_session_establish(const npf_cache_t *, nbuf_t *,
npf_nat_t *, const int);
void npf_session_release(npf_session_t *);
bool npf_session_pass(const npf_session_t *);
@ -181,10 +210,18 @@ void npf_session_setpass(npf_session_t *);
void npf_session_link(npf_session_t *, npf_session_t *);
npf_nat_t * npf_session_retnat(npf_session_t *, const int, bool *);
/* State handling. */
bool npf_state_init(const npf_cache_t *, nbuf_t *, npf_state_t *);
bool npf_state_inspect(const npf_cache_t *, nbuf_t *, npf_state_t *,
const bool);
int npf_state_etime(const npf_state_t *, const int);
void npf_state_destroy(npf_state_t *);
/* NAT. */
void npf_nat_sysinit(void);
void npf_nat_sysfini(void);
npf_natpolicy_t *npf_nat_newpolicy(int, int, in_addr_t, in_port_t);
npf_natpolicy_t *npf_nat_newpolicy(int, int, const npf_addr_t *, size_t,
in_port_t);
void npf_nat_freepolicy(npf_natpolicy_t *);
void npf_nat_flush(void);
void npf_nat_reload(npf_ruleset_t *);
@ -192,7 +229,7 @@ void npf_nat_reload(npf_ruleset_t *);
int npf_do_nat(npf_cache_t *, npf_session_t *, nbuf_t *,
struct ifnet *, const int);
void npf_nat_expire(npf_nat_t *);
void npf_nat_getorig(npf_nat_t *, in_addr_t *, in_port_t *);
void npf_nat_getorig(npf_nat_t *, npf_addr_t **, in_port_t *);
void npf_nat_setalg(npf_nat_t *, npf_alg_t *, uintptr_t);
/* ALG interface. */
@ -201,13 +238,14 @@ void npf_alg_sysfini(void);
npf_alg_t * npf_alg_register(npf_algfunc_t, npf_algfunc_t,
npf_algfunc_t, npf_algfunc_t);
int npf_alg_unregister(npf_alg_t *);
void npf_alg_match(npf_cache_t *, nbuf_t *, npf_nat_t *);
bool npf_alg_match(npf_cache_t *, nbuf_t *, npf_nat_t *);
void npf_alg_exec(npf_cache_t *, nbuf_t *, npf_nat_t *, const int );
bool npf_alg_sessionid(npf_cache_t *, nbuf_t *, npf_cache_t *);
/* Debugging routines. */
void npf_rulenc_dump(npf_rule_t *);
void npf_sessions_dump(void);
void npf_state_dump(npf_state_t *);
void npf_nat_dump(npf_nat_t *);
#endif

View File

@ -1,4 +1,4 @@
/* $NetBSD: npf_inet.c,v 1.3 2010/09/25 00:25:31 rmind Exp $ */
/* $NetBSD: npf_inet.c,v 1.4 2010/11/11 06:30:39 rmind Exp $ */
/*-
* Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@ -33,25 +33,25 @@
* Various procotol related helper routines.
*/
#ifdef _KERNEL
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: npf_inet.c,v 1.3 2010/09/25 00:25:31 rmind Exp $");
__KERNEL_RCSID(0, "$NetBSD: npf_inet.c,v 1.4 2010/11/11 06:30:39 rmind Exp $");
#include <sys/param.h>
#include <sys/kernel.h>
#include <netinet/in_systm.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/tcp.h>
#include <netinet/udp.h>
#include <netinet/ip_icmp.h>
#include <net/pfil.h>
#include <net/if.h>
#include <net/ethertypes.h>
#include <net/if_ether.h>
#endif
#include <net/pfil.h>
#include <netinet/in_systm.h>
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>
#include <netinet/ip6.h>
#include <netinet/tcp.h>
#include <netinet/udp.h>
#include <netinet/ip_icmp.h>
#include "npf_impl.h"
@ -86,117 +86,234 @@ npf_fixup32_cksum(uint16_t cksum, uint32_t odatum, uint32_t ndatum)
}
/*
* npf_ip4_proto: check IPv4 header length and match protocol number.
*
* => Returns pointer to protocol header or NULL on failure.
* => Stores protocol number in the cache.
* => Updates nbuf pointer to header's nbuf.
* npf_addr_cksum: calculate checksum of the address, either IPv4 or IPv6.
*/
bool
npf_ip4_proto(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr)
uint16_t
npf_addr_cksum(uint16_t cksum, int sz, npf_addr_t *oaddr, npf_addr_t *naddr)
{
u_int hlen, offby;
uint8_t val8;
int error;
uint32_t *oip32 = (uint32_t *)oaddr, *nip32 = (uint32_t *)naddr;
/* IPv4 header: check IP version and header length. */
error = nbuf_fetch_datum(nbuf, n_ptr, sizeof(uint8_t), &val8);
if (error || (val8 >> 4) != IPVERSION)
return false;
hlen = (val8 & 0xf) << 2;
if (hlen < sizeof(struct ip))
return false;
KASSERT(sz % sizeof(uint32_t) == 0);
do {
cksum = npf_fixup32_cksum(cksum, *oip32++, *nip32++);
sz -= sizeof(uint32_t);
} while (sz);
/* IPv4 header: check fragment offset. */
offby = offsetof(struct ip, ip_off);
error = nbuf_advfetch(&nbuf, &n_ptr, offby, sizeof(uint8_t), &val8);
if (error || (val8 & ~htons(IP_DF | IP_RF)))
return false;
/* Get and match protocol. */
KASSERT(offsetof(struct ip, ip_p) > offby);
offby = offsetof(struct ip, ip_p) - offby;
if (nbuf_advfetch(&nbuf, &n_ptr, offby, sizeof(uint8_t), &val8))
return false;
/* IP checksum. */
offby = offsetof(struct ip, ip_sum) - offsetof(struct ip, ip_p);
if (nbuf_advfetch(&nbuf, &n_ptr, offby,
sizeof(uint16_t), &npc->npc_ipsum))
return false;
/* Cache: IPv4, protocol, header length. */
npc->npc_info |= NPC_IP46;
npc->npc_proto = val8;
npc->npc_hlen = hlen;
return true;
return cksum;
}
/*
* npf_fetch_ip4addrs: fetch source and destination address from IPv4 header.
*
* => Stores both source and destination addresses into the cache.
* npf_addr_sum: provide IP address as a summed (if needed) 32-bit integer.
* Note: used for hash function.
*/
bool
npf_fetch_ip4addrs(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr)
uint32_t
npf_addr_sum(const int sz, const npf_addr_t *a1, const npf_addr_t *a2)
{
in_addr_t *src = &npc->npc_srcip, *dst = &npc->npc_dstip;
u_int offby;
uint32_t mix = 0;
int i;
/* Source address. */
offby = offsetof(struct ip, ip_src);
if (nbuf_advfetch(&nbuf, &n_ptr, offby, sizeof(in_addr_t), src))
return false;
/* Destination address. */
offby = offsetof(struct ip, ip_dst) - offby;
if (nbuf_advfetch(&nbuf, &n_ptr, offby, sizeof(in_addr_t), dst))
return false;
/* Both addresses are cached. */
npc->npc_info |= NPC_ADDRS;
return true;
for (i = 0; i < (sz >> 2); i++) {
mix += a1->s6_addr32[i];
mix += a2->s6_addr32[i];
}
return mix;
}
/*
* npf_fetch_ports: fetch ports from either TCP or UDP header.
*
* => Stores both source and destination ports into the cache.
* npf_tcpsaw: helper to fetch SEQ, ACK, WIN and return TCP data length.
* Returns all values in host byte-order.
*/
int
npf_tcpsaw(npf_cache_t *npc, tcp_seq *seq, tcp_seq *ack, uint32_t *win)
{
struct ip *ip = &npc->npc_ip.v4;
struct tcphdr *th = &npc->npc_l4.tcp;
KASSERT(npf_iscached(npc, NPC_IP46 | NPC_TCP));
*seq = ntohl(th->th_seq);
*ack = ntohl(th->th_ack);
*win = (uint32_t)ntohs(th->th_win);
return ntohs(ip->ip_len) - (ip->ip_hl << 2) - (th->th_off << 2);
}
/*
* npf_fetch_tcpopts: parse and return TCP options.
*/
bool
npf_fetch_ports(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr, const int proto)
npf_fetch_tcpopts(const npf_cache_t *npc, nbuf_t *nbuf,
uint16_t *mss, int *wscale)
{
u_int dst_off;
void *n_ptr = nbuf_dataptr(nbuf);
const struct ip *ip = &npc->npc_ip.v4;
const struct tcphdr *th = &npc->npc_l4.tcp;
int topts_len, step;
uint16_t val16;
uint8_t val;
/* Perform checks, advance to TCP/UDP header. */
if (!npf_iscached(npc, NPC_IP46) && !npf_ip4_proto(npc, nbuf, n_ptr))
return false;
n_ptr = nbuf_advance(&nbuf, n_ptr, npc->npc_hlen);
if (n_ptr == NULL || npc->npc_proto != proto)
return false;
KASSERT(npf_iscached(npc, NPC_IP46 | NPC_TCP));
/*
* TCP/UDP header: fetch source and destination ports. For both
* protocols offset of the source port offset is 0.
*/
CTASSERT(offsetof(struct tcphdr, th_sport) == 0);
CTASSERT(offsetof(struct udphdr, uh_sport) == 0);
if (proto == IPPROTO_TCP) {
dst_off = offsetof(struct tcphdr, th_dport);
} else {
KASSERT(proto == IPPROTO_UDP);
dst_off = offsetof(struct udphdr, uh_dport);
/* Determine if there are any TCP options, get their length. */
topts_len = (th->th_off << 2) - sizeof(struct tcphdr);
if (topts_len <= 0) {
/* No options. */
return false;
}
KASSERT(topts_len <= MAX_TCPOPTLEN);
/* First step: IP and TCP header up to options. */
step = (ip->ip_hl << 2) + sizeof(struct tcphdr);
next:
if (nbuf_advfetch(&nbuf, &n_ptr, step, sizeof(val), &val)) {
return false;
}
switch (val) {
case TCPOPT_EOL:
/* Done. */
return true;
case TCPOPT_NOP:
topts_len--;
step = 1;
break;
case TCPOPT_MAXSEG:
/*
* XXX: clean this mess.
*/
if (mss && *mss) {
val16 = *mss;
if (nbuf_advstore(&nbuf, &n_ptr, 2,
sizeof(val16), &val16))
return false;
} else if (nbuf_advfetch(&nbuf, &n_ptr, 2,
sizeof(val16), &val16)) {
return false;
}
if (mss) {
*mss = val16;
}
topts_len -= TCPOLEN_MAXSEG;
step = sizeof(val16);
break;
case TCPOPT_WINDOW:
if (nbuf_advfetch(&nbuf, &n_ptr, 2, sizeof(val), &val)) {
return false;
}
*wscale = (val > TCP_MAX_WINSHIFT) ? TCP_MAX_WINSHIFT : val;
topts_len -= TCPOLEN_WINDOW;
step = sizeof(val);
break;
default:
if (nbuf_advfetch(&nbuf, &n_ptr, 1, sizeof(val), &val)) {
return false;
}
if (val < 2 || val >= topts_len) {
return false;
}
topts_len -= val;
step = val - 1;
}
/* Soft limit, in a case of invalid packet. */
if (__predict_true(topts_len > 0)) {
goto next;
}
return false;
}
/*
* npf_fetch_ip: fetch, check and cache IP header.
*/
bool
npf_fetch_ip(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr)
{
struct ip *ip;
uint8_t ver;
if (nbuf_fetch_datum(nbuf, n_ptr, sizeof(uint8_t), &ver)) {
return false;
}
switch (ver >> 4) {
case IPVERSION:
/* IPv4 */
ip = &npc->npc_ip.v4;
/* Fetch the header. */
if (nbuf_fetch_datum(nbuf, n_ptr, sizeof(struct ip), ip)) {
return false;
}
/* Check header length and fragment offset. */
if ((ip->ip_hl << 2) < sizeof(struct ip)) {
return false;
}
if (ip->ip_off & ~htons(IP_DF | IP_RF)) {
/* Note fragmentation. */
npc->npc_info |= NPC_IPFRAG;
}
/* Cache: layer 3 - IPv4. */
npc->npc_ipsz = sizeof(struct in_addr);
npc->npc_srcip = (npf_addr_t *)&ip->ip_src;
npc->npc_dstip = (npf_addr_t *)&ip->ip_dst;
npc->npc_info |= NPC_IP4;
break;
case (IPV6_VERSION >> 4):
/* TODO */
default:
return false;
}
return true;
}
bool
npf_fetch_tcp(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr)
{
struct ip *ip = &npc->npc_ip.v4;
struct tcphdr *th;
u_int hlen;
/* Must have IP header processed for its length and protocol. */
if (!npf_iscached(npc, NPC_IP46) && !npf_fetch_ip(npc, nbuf, n_ptr)) {
return false;
}
if (ip->ip_p != IPPROTO_TCP) {
return false;
}
hlen = ip->ip_hl << 2;
th = &npc->npc_l4.tcp;
/* Fetch TCP header. */
if (nbuf_advfetch(&nbuf, &n_ptr, hlen, sizeof(struct tcphdr), th)) {
return false;
}
if (nbuf_fetch_datum(nbuf, n_ptr, sizeof(in_port_t), &npc->npc_sport))
return false;
if ((n_ptr = nbuf_advance(&nbuf, n_ptr, dst_off)) == NULL)
return false;
if (nbuf_fetch_datum(nbuf, n_ptr, sizeof(in_port_t), &npc->npc_dport))
return false;
/* Cache: layer 4 - TCP. */
npc->npc_info |= (NPC_LAYER4 | NPC_TCP);
return true;
}
/* Both ports are cached. */
npc->npc_info |= NPC_PORTS;
bool
npf_fetch_udp(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr)
{
struct ip *ip = &npc->npc_ip.v4;
struct udphdr *uh;
u_int hlen;
/* Must have IP header processed for its length and protocol. */
if (!npf_iscached(npc, NPC_IP46) && !npf_fetch_ip(npc, nbuf, n_ptr)) {
return false;
}
if (ip->ip_p != IPPROTO_UDP) {
return false;
}
hlen = ip->ip_hl << 2;
uh = &npc->npc_l4.udp;
/* Fetch ICMP header. */
if (nbuf_advfetch(&nbuf, &n_ptr, hlen, sizeof(struct udphdr), uh)) {
return false;
}
/* Cache: layer 4 - ICMP. */
npc->npc_info |= (NPC_LAYER4 | NPC_UDP);
return true;
}
@ -208,179 +325,309 @@ npf_fetch_ports(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr, const int proto)
bool
npf_fetch_icmp(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr)
{
uint8_t *type = &npc->npc_icmp_type, *code = &npc->npc_icmp_code;
u_int offby;
struct ip *ip = &npc->npc_ip.v4;
struct icmp *ic;
u_int hlen, offby;
KASSERT(npf_iscached(npc, NPC_IP46));
/* ICMP type. */
offby = npc->npc_hlen;
CTASSERT(offsetof(struct icmp, icmp_type) == 0);
if (nbuf_advfetch(&nbuf, &n_ptr, offby, sizeof(uint8_t), type))
return false;
/* ICMP code. */
offby = offsetof(struct icmp, icmp_code);
if (nbuf_advfetch(&nbuf, &n_ptr, offby, sizeof(uint8_t), code))
return false;
/* Mark as cached. */
npc->npc_info |= NPC_ICMP;
return true;
}
/*
* npf_fetch_tcpfl: fetch TCP flags and store into the cache.
*/
bool
npf_fetch_tcpfl(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr)
{
const u_int offby = npc->npc_hlen + offsetof(struct tcphdr, th_flags);
uint8_t *tcpfl = &npc->npc_tcp_flags;
if (nbuf_advfetch(&nbuf, &n_ptr, offby, sizeof(uint8_t), tcpfl)) {
/* Must have IP header processed for its length and protocol. */
if (!npf_iscached(npc, NPC_IP46) && !npf_fetch_ip(npc, nbuf, n_ptr)) {
return false;
}
if (ip->ip_p != IPPROTO_ICMP) {
return false;
}
hlen = ip->ip_hl << 2;
ic = &npc->npc_l4.icmp;
/* Fetch basic ICMP header, up to the "data" point. */
offby = offsetof(struct icmp, icmp_data);
if (nbuf_advfetch(&nbuf, &n_ptr, hlen, offby, ic)) {
return false;
}
/* Cache: layer 4 - ICMP. */
npc->npc_info |= (NPC_LAYER4 | NPC_ICMP);
return true;
}
/*
* npf_cache_all: general routine to cache all relevant IPv4 and
* TCP, UDP or ICMP data.
* npf_cache_all: general routine to cache all relevant IP (v4 or v6)
* and TCP, UDP or ICMP data.
*/
bool
npf_cache_all(npf_cache_t *npc, nbuf_t *nbuf)
{
void *n_ptr = nbuf_dataptr(nbuf);
/* IPv4: get protocol, source and destination addresses. */
if (!npf_iscached(npc, NPC_IP46) && !npf_ip4_proto(npc, nbuf, n_ptr)) {
if (!npf_iscached(npc, NPC_IP46) && !npf_fetch_ip(npc, nbuf, n_ptr)) {
return false;
}
if (!npf_iscached(npc, NPC_ADDRS) &&
!npf_fetch_ip4addrs(npc, nbuf, n_ptr)) {
return false;
if (npf_iscached(npc, NPC_IPFRAG)) {
return true;
}
switch (npc->npc_proto) {
switch (npf_cache_ipproto(npc)) {
case IPPROTO_TCP:
/* TCP flags. */
if (!npf_fetch_tcpfl(npc, nbuf, n_ptr)) {
return false;
}
/* FALLTHROUGH */
return npf_fetch_tcp(npc, nbuf, n_ptr);
case IPPROTO_UDP:
/* Fetch TCP/UDP ports. */
return npf_fetch_ports(npc, nbuf, n_ptr, npc->npc_proto);
return npf_fetch_udp(npc, nbuf, n_ptr);
case IPPROTO_ICMP:
/* Fetch ICMP data. */
return npf_fetch_icmp(npc, nbuf, n_ptr);
}
return false;
}
/*
* npf_rwrport: rewrite required TCP/UDP port and update checksum.
* npf_rwrip: rewrite required IP address, update the cache.
*/
bool
npf_rwrport(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr, const int di,
in_port_t port, in_addr_t naddr)
npf_rwrip(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr, const int di,
npf_addr_t *addr)
{
const int proto = npc->npc_proto;
u_int offby, toff;
in_addr_t oaddr;
in_port_t oport;
uint16_t cksum;
npf_addr_t *oaddr;
u_int offby;
KASSERT(npf_iscached(npc, NPC_PORTS));
KASSERT(proto == IPPROTO_TCP || proto == IPPROTO_UDP);
offby = npc->npc_hlen;
KASSERT(npf_iscached(npc, NPC_IP46));
if (di == PFIL_OUT) {
/* Offset to the source port is zero. */
CTASSERT(offsetof(struct tcphdr, th_sport) == 0);
CTASSERT(offsetof(struct udphdr, uh_sport) == 0);
if (proto == IPPROTO_TCP) {
toff = offsetof(struct tcphdr, th_sum);
} else {
toff = offsetof(struct udphdr, uh_sum);
}
/* Rewrite source address, if outgoing. */
offby = offsetof(struct ip, ip_src);
oaddr = npc->npc_srcip;
oport = npc->npc_sport;
} else {
/* Calculate offset to destination port and checksum. */
u_int poff;
if (proto == IPPROTO_TCP) {
poff = offsetof(struct tcphdr, th_dport);
toff = offsetof(struct tcphdr, th_sum) - poff;
} else {
poff = offsetof(struct udphdr, uh_dport);
toff = offsetof(struct udphdr, uh_sum) - poff;
}
/* Rewrite destination, if incoming. */
offby = offsetof(struct ip, ip_dst);
oaddr = npc->npc_dstip;
oport = npc->npc_dport;
offby += poff;
}
/* Advance and rewrite port. */
if ((n_ptr = nbuf_advance(&nbuf, n_ptr, offby)) == NULL)
return false;
if (nbuf_store_datum(nbuf, n_ptr, sizeof(in_port_t), &port))
/* Advance to the address and rewrite it. */
if (nbuf_advstore(&nbuf, &n_ptr, offby, npc->npc_ipsz, addr))
return false;
/* Advance and update TCP/UDP checksum. */
if (nbuf_advfetch(&nbuf, &n_ptr, toff, sizeof(uint16_t), &cksum)) {
return false;
}
if (__predict_true(cksum || proto == IPPROTO_TCP)) {
cksum = npf_fixup32_cksum(cksum, oaddr, naddr);
cksum = npf_fixup16_cksum(cksum, oport, port);
if (nbuf_store_datum(nbuf, n_ptr, sizeof(uint16_t), &cksum))
return false;
}
/* Cache: IP address. */
memcpy(oaddr, addr, npc->npc_ipsz);
return true;
}
/*
* npf_rwrip: rewrite required IP address and update checksum.
* npf_rwrport: rewrite required TCP/UDP port, update the cache.
*/
bool
npf_rwrip(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr, const int di,
in_addr_t addr)
npf_rwrport(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr, const int di,
in_port_t port)
{
u_int offby;
in_addr_t oaddr;
const int proto = npf_cache_ipproto(npc);
struct ip *ip = &npc->npc_ip.v4;
u_int offby = ip->ip_hl << 2;
in_port_t *oport;
KASSERT(npf_iscached(npc, NPC_IP46 | NPC_ADDRS));
KASSERT(npf_iscached(npc, NPC_TCP) || npf_iscached(npc, NPC_UDP));
KASSERT(proto == IPPROTO_TCP || proto == IPPROTO_UDP);
/* Advance to the checksum in IP header and fetch it. */
offby = offsetof(struct ip, ip_sum);
if ((n_ptr = nbuf_advance(&nbuf, n_ptr, offby)) == NULL)
return false;
if (di == PFIL_OUT) {
/* Rewrite source address, if outgoing. */
offby = offsetof(struct ip, ip_src) - offby;
oaddr = npc->npc_srcip;
/* Offset to the port and pointer in the cache. */
if (proto == IPPROTO_TCP) {
struct tcphdr *th = &npc->npc_l4.tcp;
if (di == PFIL_OUT) {
CTASSERT(offsetof(struct tcphdr, th_sport) == 0);
oport = &th->th_sport;
} else {
offby += offsetof(struct tcphdr, th_dport);
oport = &th->th_dport;
}
} else {
/* Rewrite destination, if incoming. */
offby = offsetof(struct ip, ip_dst) - offby;
oaddr = npc->npc_dstip;
struct udphdr *uh = &npc->npc_l4.udp;
if (di == PFIL_OUT) {
CTASSERT(offsetof(struct udphdr, uh_sport) == 0);
oport = &uh->uh_sport;
} else {
offby += offsetof(struct udphdr, uh_dport);
oport = &uh->uh_dport;
}
}
/* Write new IP checksum (it is acceptable to do this earlier). */
uint16_t cksum = npf_fixup32_cksum(npc->npc_ipsum, oaddr, addr);
if (nbuf_store_datum(nbuf, n_ptr, sizeof(uint16_t), &cksum))
/* Advance and rewrite the port. */
if (nbuf_advstore(&nbuf, &n_ptr, offby, sizeof(in_port_t), &port))
return false;
/* Advance to address and rewrite it. */
if ((n_ptr = nbuf_advance(&nbuf, n_ptr, offby)) == NULL)
return false;
if (nbuf_store_datum(nbuf, n_ptr, sizeof(in_addr_t), &addr))
return false;
npc->npc_ipsum = cksum;
/* Cache: TCP/UDP port. */
*oport = port;
return true;
}
/*
* npf_rwrcksum: rewrite IPv4 and/or TCP/UDP checksum, update chache.
*/
bool
npf_rwrcksum(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr, const int di,
npf_addr_t *addr, in_port_t port)
{
const int proto = npf_cache_ipproto(npc);
npf_addr_t *oaddr;
in_port_t *oport;
uint16_t *cksum;
u_int offby;
/* Checksum update for IPv4 header. */
if (npf_iscached(npc, NPC_IP4)) {
struct ip *ip = &npc->npc_ip.v4;
uint16_t ipsum;
oaddr = (di == PFIL_OUT) ? npc->npc_srcip : npc->npc_dstip;
ipsum = npf_addr_cksum(ip->ip_sum, npc->npc_ipsz, oaddr, addr);
/* Advance to the IPv4 checksum and rewrite it. */
offby = offsetof(struct ip, ip_sum);
if (nbuf_advstore(&nbuf, &n_ptr, offby, sizeof(ipsum), &ipsum))
return false;
ip->ip_sum = ipsum;
offby = (ip->ip_hl << 2) - offby;
} else {
/* No checksum for IPv6. */
KASSERT(npf_iscached(npc, NPC_IP6));
KASSERT(false); /* XXX: Not yet supported. */
oaddr = NULL;
offby = 0;
}
/* Determine whether TCP/UDP checksum update is needed. */
if (port == 0) {
return true;
}
KASSERT(npf_iscached(npc, NPC_TCP | NPC_UDP));
/* Calculate TCP/UDP checksum. */
if (proto == IPPROTO_TCP) {
struct tcphdr *th = &npc->npc_l4.tcp;
cksum = &th->th_sum;
offby += offsetof(struct tcphdr, th_sum);
oport = (di == PFIL_OUT) ? &th->th_sport : &th->th_dport;
} else {
struct udphdr *uh = &npc->npc_l4.udp;
KASSERT(proto == IPPROTO_UDP);
cksum = &uh->uh_sum;
if (*cksum == 0) {
/* No need to update. */
return true;
}
offby += offsetof(struct udphdr, uh_sum);
oport = (di == PFIL_OUT) ? &uh->uh_sport : &uh->uh_dport;
}
*cksum = npf_addr_cksum(*cksum, npc->npc_ipsz, oaddr, addr);
*cksum = npf_fixup16_cksum(*cksum, *oport, port);
/* Advance to TCP/UDP checksum and rewrite it. */
if (nbuf_advstore(&nbuf, &n_ptr, offby, sizeof(uint16_t), cksum)) {
return false;
}
return true;
}
static inline bool
npf_normalize_ip4(npf_cache_t *npc, nbuf_t *nbuf, bool rnd, int minttl)
{
void *n_ptr = nbuf_dataptr(nbuf);
struct ip *ip = &npc->npc_ip.v4;
uint16_t cksum = ip->ip_sum;
uint8_t ttl = ip->ip_ttl;
u_int offby = 0;
KASSERT(rnd || minttl);
/* Randomize IPv4 ID. */
if (rnd) {
uint16_t oid = ip->ip_id, nid;
nid = htons(ip_randomid(ip_ids, 0));
offby = offsetof(struct ip, ip_id);
if (nbuf_advstore(&nbuf, &n_ptr, offby, sizeof(nid), &nid)) {
return false;
}
cksum = npf_fixup16_cksum(cksum, oid, nid);
ip->ip_id = nid;
}
/* Enforce minimum TTL. */
if (minttl && ttl < minttl) {
if (nbuf_advstore(&nbuf, &n_ptr,
offsetof(struct ip, ip_ttl) - offby,
sizeof(uint8_t), &minttl)) {
return false;
}
cksum = npf_fixup16_cksum(cksum, ttl, minttl);
ip->ip_ttl = minttl;
offby = offsetof(struct ip, ip_ttl);
}
/* Update IP checksum. */
offby = offsetof(struct ip, ip_sum) - offby;
if (nbuf_advstore(&nbuf, &n_ptr, offby, sizeof(cksum), &cksum)) {
return false;
}
ip->ip_sum = cksum;
return true;
}
bool
npf_normalize(npf_cache_t *npc, nbuf_t *nbuf,
bool rnd, u_int minttl, u_int maxmss)
{
void *n_ptr = nbuf_dataptr(nbuf);
struct ip *ip = &npc->npc_ip.v4;
struct tcphdr *th = &npc->npc_l4.tcp;
uint16_t cksum, mss;
int offby, wscale;
/* Normalize IPv4. */
if (npf_iscached(npc, NPC_IP4) && (rnd || minttl)) {
if (!npf_normalize_ip4(npc, nbuf, rnd, minttl)) {
return false;
}
}
/*
* TCP Maximum Segment Size (MSS) "clamping". Only if SYN packet.
*/
if (maxmss == 0 || !npf_iscached(npc, NPC_TCP) ||
(th->th_flags & TH_SYN) == 0) {
/* Not required; done. */
return true;
}
mss = 0;
if (!npf_fetch_tcpopts(npc, nbuf, &mss, &wscale)) {
return false;
}
if (ntohs(mss) <= maxmss) {
return true;
}
if (!npf_iscached(npc, NPC_IP4)) { /* XXX: IPv6 */
return false;
}
/* Calculate checksums. */
maxmss = htons(maxmss);
cksum = npf_fixup16_cksum(th->th_sum, mss, maxmss);
ip->ip_sum = npf_fixup16_cksum(ip->ip_sum, mss, maxmss);
ip->ip_sum = npf_fixup16_cksum(ip->ip_sum, th->th_sum, cksum);
th->th_sum = cksum;
/* Rewrite MSS. */
mss = maxmss;
if (!npf_fetch_tcpopts(npc, nbuf, &mss, &wscale)) {
return false;
}
/* Update checksums. */
cksum = ip->ip_sum;
offby = offsetof(struct ip, ip_sum);
if (nbuf_advstore(&nbuf, &n_ptr, offby, sizeof(cksum), &cksum)) {
return false;
}
cksum = th->th_sum;
offby = (ip->ip_hl << 2) - offsetof(struct ip, ip_sum) +
offsetof(struct tcphdr, th_sum);
if (nbuf_advstore(&nbuf, &n_ptr, offby, sizeof(cksum), &cksum)) {
return false;
}
return true;
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: npf_instr.c,v 1.3 2010/09/25 00:25:31 rmind Exp $ */
/* $NetBSD: npf_instr.c,v 1.4 2010/11/11 06:30:39 rmind Exp $ */
/*-
* Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@ -35,7 +35,7 @@
#ifdef _KERNEL
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: npf_instr.c,v 1.3 2010/09/25 00:25:31 rmind Exp $");
__KERNEL_RCSID(0, "$NetBSD: npf_instr.c,v 1.4 2010/11/11 06:30:39 rmind Exp $");
#include <sys/param.h>
#include <sys/kernel.h>
@ -98,15 +98,16 @@ int
npf_match_ip4table(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr,
const int sd, const u_int tid)
{
struct ip *ip = &npc->npc_ip.v4;
in_addr_t ip4addr;
if (!npf_iscached(npc, NPC_ADDRS)) {
if (!npf_fetch_ip4addrs(npc, nbuf, n_ptr)) {
if (!npf_iscached(npc, NPC_IP46)) {
if (!npf_fetch_ip(npc, nbuf, n_ptr)) {
return -1;
}
KASSERT(npf_iscached(npc, NPC_ADDRS));
KASSERT(npf_iscached(npc, NPC_IP46));
}
ip4addr = sd ? npc->npc_srcip : npc->npc_dstip;
ip4addr = sd ? ip->ip_src.s_addr : ip->ip_dst.s_addr;
/* Match address against NPF table. */
return npf_table_match_v4addr(tid, ip4addr);
@ -119,15 +120,16 @@ int
npf_match_ip4mask(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr,
const int sd, in_addr_t netaddr, in_addr_t subnet)
{
struct ip *ip = &npc->npc_ip.v4;
in_addr_t ip4addr;
if (!npf_iscached(npc, NPC_ADDRS)) {
if (!npf_fetch_ip4addrs(npc, nbuf, n_ptr)) {
if (!npf_iscached(npc, NPC_IP46)) {
if (!npf_fetch_ip(npc, nbuf, n_ptr)) {
return -1;
}
KASSERT(npf_iscached(npc, NPC_ADDRS));
KASSERT(npf_iscached(npc, NPC_IP46));
}
ip4addr = sd ? npc->npc_srcip : npc->npc_dstip;
ip4addr = sd ? ip->ip_src.s_addr : ip->ip_dst.s_addr;
return (ip4addr & subnet) == netaddr ? 0 : -1;
}
@ -139,15 +141,16 @@ int
npf_match_tcp_ports(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr,
const int sd, const uint32_t prange)
{
struct tcphdr *th = &npc->npc_l4.tcp;
in_port_t p;
if (!npf_iscached(npc, NPC_PORTS)) {
if (!npf_fetch_ports(npc, nbuf, n_ptr, IPPROTO_TCP)) {
if (!npf_iscached(npc, NPC_TCP)) {
if (!npf_fetch_tcp(npc, nbuf, n_ptr)) {
return -1;
}
KASSERT(npf_iscached(npc, NPC_PORTS));
KASSERT(npf_iscached(npc, NPC_TCP));
}
p = sd ? npc->npc_sport : npc->npc_dport;
p = sd ? th->th_sport : th->th_dport;
/* Match against the port range. */
return NPF_PORTRANGE_MATCH(prange, p) ? 0 : -1;
@ -160,15 +163,16 @@ int
npf_match_udp_ports(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr,
const int sd, const uint32_t prange)
{
struct udphdr *uh = &npc->npc_l4.udp;
in_port_t p;
if (!npf_iscached(npc, NPC_PORTS)) {
if (!npf_fetch_ports(npc, nbuf, n_ptr, IPPROTO_UDP)) {
if (!npf_iscached(npc, NPC_UDP)) {
if (!npf_fetch_udp(npc, nbuf, n_ptr)) {
return -1;
}
KASSERT(npf_iscached(npc, NPC_PORTS));
KASSERT(npf_iscached(npc, NPC_UDP));
}
p = sd ? npc->npc_sport : npc->npc_dport;
p = sd ? uh->uh_sport : uh->uh_dport;
/* Match against the port range. */
return NPF_PORTRANGE_MATCH(prange, p) ? 0 : -1;
@ -178,34 +182,27 @@ npf_match_udp_ports(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr,
* npf_match_icmp4: match ICMPv4 packet.
*/
int
npf_match_icmp4(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr, const uint32_t tc)
npf_match_icmp4(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr, uint32_t tc)
{
struct icmp *ic = &npc->npc_l4.icmp;
if (!npf_iscached(npc, NPC_ICMP)) {
/* Perform checks, advance to ICMP header. */
if (!npf_iscached(npc, NPC_IP46) &&
!npf_ip4_proto(npc, nbuf, n_ptr)) {
return -1;
}
n_ptr = nbuf_advance(&nbuf, n_ptr, npc->npc_hlen);
if (n_ptr == NULL || npc->npc_proto != IPPROTO_ICMP) {
return -1;
}
if (!npf_fetch_icmp(npc, nbuf, n_ptr)) {
return -1;
}
KASSERT(npf_iscached(npc, NPC_ICMP));
}
/* Match code/type, if required. */
if ((1 << 31) & tc) {
const uint8_t type = (tc >> 8) & 0xff;
if (type != npc->npc_icmp_type) {
if (type != ic->icmp_type) {
return -1;
}
}
if ((1 << 30) & tc) {
const uint8_t code = tc & 0xff;
if (code != npc->npc_icmp_code) {
if (code != ic->icmp_code) {
return -1;
}
}
@ -216,15 +213,16 @@ npf_match_icmp4(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr, const uint32_t tc)
* npf_match_tcpfl: match TCP flags.
*/
int
npf_match_tcpfl(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr, const uint32_t fl)
npf_match_tcpfl(npf_cache_t *npc, nbuf_t *nbuf, void *n_ptr, uint32_t fl)
{
const uint8_t tcpfl = (fl >> 8) & 0xff, mask = fl & 0xff;
struct tcphdr *th = &npc->npc_l4.tcp;
if (!npf_iscached(npc, NPC_IP46) && !npf_ip4_proto(npc, nbuf, n_ptr)) {
return -1;
if (!npf_iscached(npc, NPC_TCP)) {
if (!npf_fetch_tcp(npc, nbuf, n_ptr)) {
return -1;
}
KASSERT(npf_iscached(npc, NPC_TCP));
}
if (!npf_fetch_tcpfl(npc, nbuf, n_ptr)) {
return -1;
}
return ((npc->npc_tcp_flags & mask) == tcpfl) ? 0 : -1;
return ((th->th_flags & mask) == tcpfl) ? 0 : -1;
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: npf_mbuf.c,v 1.4 2010/10/03 19:30:22 rmind Exp $ */
/* $NetBSD: npf_mbuf.c,v 1.5 2010/11/11 06:30:39 rmind Exp $ */
/*-
* Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@ -36,10 +36,8 @@
* abstracted within this source.
*/
#ifdef _KERNEL
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: npf_mbuf.c,v 1.4 2010/10/03 19:30:22 rmind Exp $");
#endif
__KERNEL_RCSID(0, "$NetBSD: npf_mbuf.c,v 1.5 2010/11/11 06:30:39 rmind Exp $");
#include <sys/param.h>
#include <sys/mbuf.h>
@ -210,6 +208,29 @@ nbuf_advfetch(nbuf_t **nbuf, void **n_ptr, u_int n, size_t len, void *buf)
return error;
}
/*
* nbuf_advstore: advance and store the datum.
*/
int
nbuf_advstore(nbuf_t **nbuf, void **n_ptr, u_int n, size_t len, void *buf)
{
nbuf_t *orig_nbuf = *nbuf;
void *orig_nptr = *n_ptr;
int error;
*n_ptr = nbuf_advance(nbuf, *n_ptr, n);
if (__predict_false(*n_ptr != NULL)) {
error = nbuf_store_datum(*nbuf, *n_ptr, len, buf);
} else {
error = EINVAL;
}
if (__predict_false(error)) {
*nbuf = orig_nbuf;
*n_ptr = orig_nptr;
}
return error;
}
/*
* nbuf_add_tag: add a tag to specified network buffer.
*

View File

@ -1,4 +1,4 @@
/* $NetBSD: npf_nat.c,v 1.2 2010/09/16 04:53:27 rmind Exp $ */
/* $NetBSD: npf_nat.c,v 1.3 2010/11/11 06:30:39 rmind Exp $ */
/*-
* Copyright (c) 2010 The NetBSD Foundation, Inc.
@ -75,13 +75,11 @@
* "NAT" session expires.
*/
#ifdef _KERNEL
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: npf_nat.c,v 1.2 2010/09/16 04:53:27 rmind Exp $");
__KERNEL_RCSID(0, "$NetBSD: npf_nat.c,v 1.3 2010/11/11 06:30:39 rmind Exp $");
#include <sys/param.h>
#include <sys/kernel.h>
#endif
#include <sys/atomic.h>
#include <sys/bitops.h>
@ -112,16 +110,17 @@ struct npf_natpolicy {
LIST_ENTRY(npf_natpolicy) n_entry;
int n_type;
int n_flags;
in_addr_t n_taddr;
in_port_t n_tport;
npf_portmap_t * n_portmap;
size_t n_addr_sz;
npf_addr_t n_taddr;
in_port_t n_tport;
};
/* NAT translation entry for a session. */
struct npf_nat {
npf_natpolicy_t * nt_natpolicy;
/* Original address and port (for backwards translation). */
in_addr_t nt_oaddr;
npf_addr_t nt_oaddr;
in_port_t nt_oport;
/* Translation port (for redirects). */
in_port_t nt_tport;
@ -166,7 +165,8 @@ npf_nat_sysfini(void)
* => XXX: serialise at upper layer.
*/
npf_natpolicy_t *
npf_nat_newpolicy(int type, int flags, in_addr_t taddr, in_port_t tport)
npf_nat_newpolicy(int type, int flags, const npf_addr_t *taddr,
size_t addr_sz, in_port_t tport)
{
npf_natpolicy_t *np, *it;
npf_portmap_t *pm;
@ -178,7 +178,8 @@ npf_nat_newpolicy(int type, int flags, in_addr_t taddr, in_port_t tport)
KASSERT(type == NPF_NATIN || type == NPF_NATOUT);
np->n_type = type;
np->n_flags = flags;
np->n_taddr = taddr;
np->n_addr_sz = addr_sz;
memcpy(&np->n_taddr, taddr, sizeof(npf_addr_t));
np->n_tport = tport;
pm = NULL;
@ -188,8 +189,9 @@ npf_nat_newpolicy(int type, int flags, in_addr_t taddr, in_port_t tport)
/* Search for a NAT policy using the same translation address. */
LIST_FOREACH(it, &nat_policy_list, n_entry) {
if (it->n_taddr != np->n_taddr)
if (memcmp(&it->n_taddr, &np->n_taddr, sizeof(npf_addr_t))) {
continue;
}
pm = it->n_portmap;
break;
}
@ -247,9 +249,8 @@ npf_nat_reload(npf_ruleset_t *nset)
npf_ruleset_t *oldnset;
oldnset = atomic_swap_ptr(&nat_ruleset, nset);
if (oldnset) {
npf_ruleset_destroy(oldnset);
}
KASSERT(oldnset != NULL);
npf_ruleset_destroy(oldnset);
}
/*
@ -329,9 +330,11 @@ npf_nat_inspect(npf_cache_t *npc, nbuf_t *nbuf, struct ifnet *ifp, const int di)
static npf_nat_t *
npf_nat_create(npf_cache_t *npc, npf_natpolicy_t *np)
{
const int proto = npc->npc_proto;
const int proto = npf_cache_ipproto(npc);
npf_nat_t *nt;
KASSERT(npf_iscached(npc, NPC_IP46 | NPC_LAYER4));
/* New NAT association. */
nt = pool_cache_get(nat_cache, PR_NOWAIT);
if (nt == NULL){
@ -343,11 +346,11 @@ npf_nat_create(npf_cache_t *npc, npf_natpolicy_t *np)
/* Save the original address which may be rewritten. */
if (np->n_type == NPF_NATOUT) {
/* Source (local) for Outbound NAT. */
nt->nt_oaddr = npc->npc_srcip;
memcpy(&nt->nt_oaddr, npc->npc_srcip, npc->npc_ipsz);
} else {
/* Destination (external) for Inbound NAT. */
KASSERT(np->n_type == NPF_NATIN);
nt->nt_oaddr = npc->npc_dstip;
memcpy(&nt->nt_oaddr, npc->npc_dstip, npc->npc_ipsz);
}
/*
@ -359,13 +362,17 @@ npf_nat_create(npf_cache_t *npc, npf_natpolicy_t *np)
nt->nt_tport = 0;
return nt;
}
/* Save a relevant TCP/UDP port. */
KASSERT(npf_iscached(npc, NPC_PORTS));
if (np->n_type == NPF_NATOUT) {
nt->nt_oport = npc->npc_sport;
/* Save the relevant TCP/UDP port. */
if (proto == IPPROTO_TCP) {
struct tcphdr *th = &npc->npc_l4.tcp;
nt->nt_oport = (np->n_type == NPF_NATOUT) ?
th->th_sport : th->th_dport;
} else {
nt->nt_oport = npc->npc_dport;
struct udphdr *uh = &npc->npc_l4.udp;
nt->nt_oport = (np->n_type == NPF_NATOUT) ?
uh->uh_sport : uh->uh_dport;
}
/* Get a new port for translation. */
if ((np->n_flags & NPF_NAT_PORTMAP) != 0) {
nt->nt_tport = npf_nat_getport(np);
@ -382,12 +389,12 @@ static int
npf_nat_translate(npf_cache_t *npc, nbuf_t *nbuf, npf_nat_t *nt,
const bool forw, const int di)
{
const npf_natpolicy_t *np = nt->nt_natpolicy;
void *n_ptr = nbuf_dataptr(nbuf);
in_addr_t addr;
npf_natpolicy_t *np = nt->nt_natpolicy;
npf_addr_t *addr;
in_port_t port;
KASSERT(npf_iscached(npc, NPC_IP46 | NPC_ADDRS));
KASSERT(npf_iscached(npc, NPC_IP46));
if (forw) {
/* "Forwards" stream: use translation address/port. */
@ -395,7 +402,7 @@ npf_nat_translate(npf_cache_t *npc, nbuf_t *nbuf, npf_nat_t *nt,
(np->n_type == NPF_NATIN && di == PFIL_IN) ^
(np->n_type == NPF_NATOUT && di == PFIL_OUT)
);
addr = np->n_taddr;
addr = &np->n_taddr;
port = nt->nt_tport;
} else {
/* "Backwards" stream: use original address/port. */
@ -403,53 +410,47 @@ npf_nat_translate(npf_cache_t *npc, nbuf_t *nbuf, npf_nat_t *nt,
(np->n_type == NPF_NATIN && di == PFIL_OUT) ^
(np->n_type == NPF_NATOUT && di == PFIL_IN)
);
addr = nt->nt_oaddr;
addr = &nt->nt_oaddr;
port = nt->nt_oport;
}
/* Execute ALG hooks first. */
/* Execute ALG hook first. */
npf_alg_exec(npc, nbuf, nt, di);
/*
* Rewrite IP and/or TCP/UDP checksums first, since it will use
* the cache containing original values for checksum calculation.
*/
if (!npf_rwrcksum(npc, nbuf, n_ptr, di, addr, port)) {
return EINVAL;
}
/*
* Address translation: rewrite source/destination address, depending
* on direction (PFIL_OUT - for source, PFIL_IN - for destination).
* Note: cache will be used in npf_rwrport(), update only in the end.
*/
if (!npf_rwrip(npc, nbuf, n_ptr, di, addr)) {
return EINVAL;
}
if ((np->n_flags & NPF_NAT_PORTS) == 0) {
/* Cache new address. */
if (di == PFIL_OUT) {
npc->npc_srcip = addr;
} else {
npc->npc_dstip = addr;
}
/* Done. */
return 0;
}
switch (npc->npc_proto) {
switch (npf_cache_ipproto(npc)) {
case IPPROTO_TCP:
case IPPROTO_UDP:
KASSERT(npf_iscached(npc, NPC_PORTS));
KASSERT(npf_iscached(npc, NPC_TCP | NPC_UDP));
/* Rewrite source/destination port. */
if (!npf_rwrport(npc, nbuf, n_ptr, di, port, addr)) {
if (!npf_rwrport(npc, nbuf, n_ptr, di, port)) {
return EINVAL;
}
break;
case IPPROTO_ICMP:
/* None. */
KASSERT(npf_iscached(npc, NPC_ICMP));
/* Nothing. */
break;
default:
return ENOTSUP;
}
/* Cache new address and port. */
if (di == PFIL_OUT) {
npc->npc_srcip = addr;
npc->npc_sport = port;
} else {
npc->npc_dstip = addr;
npc->npc_dport = port;
}
return 0;
}
@ -473,15 +474,13 @@ npf_do_nat(npf_cache_t *npc, npf_session_t *se, nbuf_t *nbuf,
bool forw, new;
/* All relevant IPv4 data should be already cached. */
if (!npf_iscached(npc, NPC_IP46 | NPC_ADDRS)) {
if (!npf_iscached(npc, NPC_IP46) || !npf_iscached(npc, NPC_LAYER4)) {
return 0;
}
/*
* Return the NAT entry associated with the session, if any.
* Assumptions:
* - If associated via linked session, then "forwards" stream.
* - If associated directly, then "backwards" stream.
* Determines whether the stream is "forwards" or "backwards".
*/
if (se && (nt = npf_session_retnat(se, di, &forw)) != NULL) {
np = nt->nt_natpolicy;
@ -504,6 +503,11 @@ npf_do_nat(npf_cache_t *npc, npf_session_t *se, nbuf_t *nbuf,
}
new = true;
/* Determine whether any ALG matches. */
if (npf_alg_match(npc, nbuf, nt)) {
KASSERT(nt->nt_alg != NULL);
}
/*
* If there is no local session (no "keep state" rule - unusual, but
* possible configuration), establish one before translation. Note
@ -511,7 +515,7 @@ npf_do_nat(npf_cache_t *npc, npf_session_t *se, nbuf_t *nbuf,
* stream depends on other, stateless filtering rules.
*/
if (se == NULL) {
nse = npf_session_establish(npc, NULL, di);
nse = npf_session_establish(npc, nbuf, NULL, di);
if (nse == NULL) {
error = ENOMEM;
goto out;
@ -533,7 +537,7 @@ translate:
*
* Note: packet now has a translated address in the cache.
*/
natse = npf_session_establish(npc, nt, di);
natse = npf_session_establish(npc, nbuf, nt, di);
if (natse == NULL) {
error = ENOMEM;
goto out;
@ -562,13 +566,16 @@ out:
* npf_nat_getorig: return original IP address and port from translation entry.
*/
void
npf_nat_getorig(npf_nat_t *nt, in_addr_t *addr, in_port_t *port)
npf_nat_getorig(npf_nat_t *nt, npf_addr_t **addr, in_port_t *port)
{
*addr = nt->nt_oaddr;
*addr = &nt->nt_oaddr;
*port = nt->nt_oport;
}
/*
* npf_nat_setalg: associate an ALG with the NAT entry.
*/
void
npf_nat_setalg(npf_nat_t *nt, npf_alg_t *alg, uintptr_t arg)
{
@ -606,13 +613,13 @@ npf_nat_dump(npf_nat_t *nt)
}
LIST_FOREACH(np, &nat_policy_list, n_entry) {
skip:
ip.s_addr = np->n_taddr;
printf("\tNAT policy: type = %d, flags = %d, taddr = %s\n",
np->n_type, np->n_flags, inet_ntoa(ip));
memcpy(&ip, &np->n_taddr, sizeof(ip));
printf("\tNAT policy: type %d, flags 0x%x, taddr %s, tport = %d\n",
np->n_type, np->n_flags, inet_ntoa(ip), np->n_tport);
if (nt == NULL) {
continue;
}
ip.s_addr = nt->nt_oaddr;
memcpy(&ip, &nt->nt_oaddr, sizeof(ip));
printf("\tNAT: original address %s, oport %d, tport = %d\n",
inet_ntoa(ip), ntohs(nt->nt_oport), ntohs(nt->nt_tport));
if (nt->nt_alg) {

View File

@ -1,4 +1,4 @@
/* $NetBSD: npf_ncode.h,v 1.2 2010/09/16 04:53:27 rmind Exp $ */
/* $NetBSD: npf_ncode.h,v 1.3 2010/11/11 06:30:39 rmind Exp $ */
/*-
* Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@ -41,12 +41,17 @@
#include "npf.h"
/* N-code processing, validation & building. */
#if defined(_KERNEL) || defined(_NPF_TESTING)
/*
* N-code processing, validation & building.
*/
void * npf_ncode_alloc(size_t);
void npf_ncode_free(void *, size_t);
int npf_ncode_process(npf_cache_t *, const void *, nbuf_t *, const int);
int npf_ncode_validate(const void *, size_t, int *);
void * npf_ncode_alloc(size_t);
void npf_ncode_free(void *, size_t);
#endif
/* Error codes. */
#define NPF_ERR_OPCODE -1 /* Invalid instruction. */

View File

@ -1,4 +1,4 @@
/* $NetBSD: npf_processor.c,v 1.2 2010/09/16 04:53:27 rmind Exp $ */
/* $NetBSD: npf_processor.c,v 1.3 2010/11/11 06:30:39 rmind Exp $ */
/*-
* Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@ -53,10 +53,8 @@
* maintenance in npf_ncode_process() and nc_insn_check() might be avoided.
*/
#ifdef _KERNEL
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: npf_processor.c,v 1.2 2010/09/16 04:53:27 rmind Exp $");
#endif
__KERNEL_RCSID(0, "$NetBSD: npf_processor.c,v 1.3 2010/11/11 06:30:39 rmind Exp $");
#include <sys/param.h>
#include <sys/kernel.h>

View File

@ -1,4 +1,4 @@
/* $NetBSD: npf_ruleset.c,v 1.2 2010/09/16 04:53:27 rmind Exp $ */
/* $NetBSD: npf_ruleset.c,v 1.3 2010/11/11 06:30:39 rmind Exp $ */
/*-
* Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@ -39,11 +39,10 @@
#ifdef _KERNEL
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: npf_ruleset.c,v 1.2 2010/09/16 04:53:27 rmind Exp $");
__KERNEL_RCSID(0, "$NetBSD: npf_ruleset.c,v 1.3 2010/11/11 06:30:39 rmind Exp $");
#include <sys/param.h>
#include <sys/kernel.h>
#endif
#include <sys/atomic.h>
#include <sys/kmem.h>
@ -52,22 +51,23 @@ __KERNEL_RCSID(0, "$NetBSD: npf_ruleset.c,v 1.2 2010/09/16 04:53:27 rmind Exp $"
#include <sys/rwlock.h>
#include <sys/types.h>
#include <net/if.h>
#include <net/pfil.h>
#include <net/if.h>
#endif
#include "npf_ncode.h"
#include "npf_impl.h"
struct npf_hook {
void (*hk_fn)(const npf_cache_t *, void *);
void * hk_arg;
LIST_ENTRY(npf_hook) hk_entry;
void (*hk_fn)(npf_cache_t *, nbuf_t *, void *);
void * hk_arg;
LIST_ENTRY(npf_hook) hk_entry;
};
struct npf_ruleset {
TAILQ_HEAD(, npf_rule) rs_queue;
npf_rule_t * rs_default;
int _reserved;
TAILQ_HEAD(, npf_rule) rs_queue;
npf_rule_t * rs_default;
int _reserved;
};
/* Rule structure. */
@ -83,11 +83,15 @@ struct npf_rule {
void * r_ncode;
size_t r_nc_size;
/* Attributes of this rule. */
int r_attr;
uint32_t r_attr;
/* Interface. */
u_int r_ifid;
/* Hit counter. */
u_long r_hitcount;
/* Normalization options (XXX - abstract). */
bool rl_rnd_ipid;
u_int rl_minttl;
u_int rl_maxmss;
/* List of hooks to process on match. */
LIST_HEAD(, npf_hook) r_hooks;
};
@ -187,6 +191,7 @@ npf_ruleset_reload(npf_ruleset_t *nrlset, npf_tableset_t *ntblset)
*/
rw_enter(&ruleset_lock, RW_WRITER);
oldrlset = atomic_swap_ptr(&ruleset, nrlset);
KASSERT(oldrlset != NULL);
/*
* Setup a new tableset. It will lock the global tableset lock,
@ -205,7 +210,8 @@ npf_ruleset_reload(npf_ruleset_t *nrlset, npf_tableset_t *ntblset)
* npf_rule_alloc: allocate a rule and copy ncode from user-space.
*/
npf_rule_t *
npf_rule_alloc(int attr, pri_t pri, int ifidx, void *nc, size_t sz)
npf_rule_alloc(int attr, pri_t pri, int ifidx, void *nc, size_t sz,
bool rnd_ipid, int minttl, int maxmss)
{
npf_rule_t *rl;
int errat;
@ -228,6 +234,11 @@ npf_rule_alloc(int attr, pri_t pri, int ifidx, void *nc, size_t sz)
rl->r_nc_size = sz;
rl->r_hitcount = 0;
rl->r_nat = NULL;
rl->rl_rnd_ipid = rnd_ipid;
rl->rl_minttl = minttl;
rl->rl_maxmss = maxmss;
return rl;
}
@ -296,6 +307,8 @@ npf_rule_getnat(const npf_rule_t *rl)
void
npf_rule_setnat(npf_rule_t *rl, npf_natpolicy_t *np)
{
KASSERT(rl->r_nat == NULL);
rl->r_nat = np;
}
@ -304,7 +317,7 @@ npf_rule_setnat(npf_rule_t *rl, npf_natpolicy_t *np)
*/
npf_hook_t *
npf_hook_register(npf_rule_t *rl,
void (*fn)(const npf_cache_t *, void *), void *arg)
void (*fn)(npf_cache_t *, nbuf_t *, void *), void *arg)
{
npf_hook_t *hk;
@ -420,7 +433,7 @@ reinspect:
* => Releases the ruleset lock.
*/
int
npf_rule_apply(const npf_cache_t *npc, npf_rule_t *rl,
npf_rule_apply(npf_cache_t *npc, nbuf_t *nbuf, npf_rule_t *rl,
bool *keepstate, int *retfl)
{
npf_hook_t *hk;
@ -443,11 +456,17 @@ npf_rule_apply(const npf_cache_t *npc, npf_rule_t *rl,
/* Passing. Run the hooks. */
LIST_FOREACH(hk, &rl->r_hooks, hk_entry) {
KASSERT(hk->hk_fn != NULL);
(*hk->hk_fn)(npc, hk->hk_arg);
(*hk->hk_fn)(npc, nbuf, hk->hk_arg);
}
/* Normalize the packet, if required. */
if (rl->r_attr & NPF_RULE_NORMALIZE) {
(void)npf_normalize(npc, nbuf,
rl->rl_rnd_ipid, rl->rl_minttl, rl->rl_maxmss);
}
*keepstate = (rl->r_attr & NPF_RULE_KEEPSTATE) != 0;
rw_exit(&ruleset_lock);
return 0;
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: npf_sendpkt.c,v 1.2 2010/09/25 00:25:31 rmind Exp $ */
/* $NetBSD: npf_sendpkt.c,v 1.3 2010/11/11 06:30:39 rmind Exp $ */
/*-
* Copyright (c) 2010 The NetBSD Foundation, Inc.
@ -35,7 +35,7 @@
#ifdef _KERNEL
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: npf_sendpkt.c,v 1.2 2010/09/25 00:25:31 rmind Exp $");
__KERNEL_RCSID(0, "$NetBSD: npf_sendpkt.c,v 1.3 2010/11/11 06:30:39 rmind Exp $");
#include <sys/param.h>
#include <sys/kernel.h>
@ -53,43 +53,6 @@ __KERNEL_RCSID(0, "$NetBSD: npf_sendpkt.c,v 1.2 2010/09/25 00:25:31 rmind Exp $"
#define DEFAULT_IP_TTL (ip_defttl)
/*
* npf_fetch_seqack: fetch TCP data length, SEQ and ACK numbers.
*
* NOTE: Returns in host byte-order.
*/
static inline bool
npf_fetch_seqack(nbuf_t *nbuf, npf_cache_t *npc,
tcp_seq *seq, tcp_seq *ack, size_t *tcpdlen)
{
void *n_ptr = nbuf_dataptr(nbuf);
u_int offby;
tcp_seq seqack[2];
uint16_t iplen;
uint8_t toff;
/* Fetch total length of IP. */
offby = offsetof(struct ip, ip_len);
if (nbuf_advfetch(&nbuf, &n_ptr, offby, sizeof(uint16_t), &iplen))
return false;
/* Fetch SEQ and ACK numbers. */
offby = (npc->npc_hlen - offby) + offsetof(struct tcphdr, th_seq);
if (nbuf_advfetch(&nbuf, &n_ptr, offby, sizeof(seqack), seqack))
return false;
/* Fetch TCP data offset (header length) value. */
offby = sizeof(seqack);
if (nbuf_advfetch(&nbuf, &n_ptr, offby, sizeof(uint8_t), &toff))
return false;
toff >>= 4;
*seq = ntohl(seqack[0]);
*ack = ntohl(seqack[1]);
*tcpdlen = ntohs(iplen) - npc->npc_hlen - (toff << 2);
return true;
}
/*
* npf_return_tcp: return a TCP reset (RST) packet.
*/
@ -97,17 +60,19 @@ static int
npf_return_tcp(npf_cache_t *npc, nbuf_t *nbuf)
{
struct mbuf *m;
struct ip *ip;
struct tcphdr *th;
struct ip *oip, *ip;
struct tcphdr *oth, *th;
tcp_seq seq, ack;
size_t tcpdlen, len;
int tcpdlen, len;
uint32_t win;
/* Fetch relevant data. */
if (!npf_iscached(npc, NPC_IP46 | NPC_ADDRS | NPC_PORTS) ||
!npf_fetch_seqack(nbuf, npc, &seq, &ack, &tcpdlen)) {
return EBADMSG;
}
if (npc->npc_tcp_flags & TH_RST) {
KASSERT(npf_iscached(npc, NPC_IP46 | NPC_LAYER4));
tcpdlen = npf_tcpsaw(npc, &seq, &ack, &win);
oip = &npc->npc_ip.v4;
oth = &npc->npc_l4.tcp;
if (oth->th_flags & TH_RST) {
return 0;
}
@ -129,16 +94,16 @@ npf_return_tcp(npf_cache_t *npc, nbuf_t *nbuf)
* Note: IP length contains TCP header length.
*/
ip->ip_p = IPPROTO_TCP;
ip->ip_src.s_addr = npc->npc_dstip;
ip->ip_dst.s_addr = npc->npc_srcip;
ip->ip_src.s_addr = oip->ip_dst.s_addr;
ip->ip_dst.s_addr = oip->ip_src.s_addr;
ip->ip_len = htons(sizeof(struct tcphdr));
/* Construct TCP header and compute the checksum. */
th = (struct tcphdr *)(ip + 1);
th->th_sport = npc->npc_dport;
th->th_dport = npc->npc_sport;
th->th_sport = oth->th_dport;
th->th_dport = oth->th_sport;
th->th_seq = htonl(ack);
if (npc->npc_tcp_flags & TH_SYN) {
if (oth->th_flags & TH_SYN) {
tcpdlen++;
}
th->th_ack = htonl(seq + tcpdlen);
@ -151,7 +116,6 @@ npf_return_tcp(npf_cache_t *npc, nbuf_t *nbuf)
ip->ip_hl = sizeof(struct ip) >> 2;
ip->ip_tos = IPTOS_LOWDELAY;
ip->ip_len = htons(len);
ip->ip_off = htons(IP_DF);
ip->ip_ttl = DEFAULT_IP_TTL;
/* Pass to IP layer. */
@ -177,20 +141,23 @@ void
npf_return_block(npf_cache_t *npc, nbuf_t *nbuf, const int retfl)
{
void *n_ptr = nbuf_dataptr(nbuf);
const int proto = npc->npc_proto;
if (!npf_iscached(npc, NPC_IP46) && !npf_ip4_proto(npc, nbuf, n_ptr))
return;
if ((proto == IPPROTO_TCP && (retfl & NPF_RULE_RETRST) == 0) ||
(proto == IPPROTO_UDP && (retfl & NPF_RULE_RETICMP) == 0)) {
if (!npf_iscached(npc, NPC_IP46) && !npf_fetch_ip(npc, nbuf, n_ptr)) {
return;
}
switch (proto) {
switch (npf_cache_ipproto(npc)) {
case IPPROTO_TCP:
(void)npf_return_tcp(npc, nbuf);
if (retfl & NPF_RULE_RETRST) {
if (!npf_fetch_tcp(npc, nbuf, n_ptr)) {
return;
}
(void)npf_return_tcp(npc, nbuf);
}
break;
case IPPROTO_UDP:
(void)npf_return_icmp(nbuf);
if (retfl & NPF_RULE_RETICMP) {
(void)npf_return_icmp(nbuf);
}
break;
}
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: npf_session.c,v 1.4 2010/10/03 19:36:38 rmind Exp $ */
/* $NetBSD: npf_session.c,v 1.5 2010/11/11 06:30:39 rmind Exp $ */
/*-
* Copyright (c) 2010 The NetBSD Foundation, Inc.
@ -65,8 +65,10 @@
*
* Often NAT policies have overlapping stateful filtering rules. In
* order to avoid unnecessary lookups, "pass" session can be linked
* with a "NAT" session (npf_session_t::s_nat_se pointer). Such link
* is used to detect translation on "forwards" stream.
* with a "NAT" session (npf_session_t::s_linked pointer). Such link
* is used to detect translation on "forwards" stream. "NAT" session
* also contains the link back to the "pass" session, therefore, both
* sessions point to each other.
*
* Additional reference is held on linked "NAT" sessions to prevent
* them from destruction while linked. Link is broken and reference
@ -83,16 +85,15 @@
* - Session monitoring via descriptor.
*/
#ifdef _KERNEL
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: npf_session.c,v 1.4 2010/10/03 19:36:38 rmind Exp $");
__KERNEL_RCSID(0, "$NetBSD: npf_session.c,v 1.5 2010/11/11 06:30:39 rmind Exp $");
#include <sys/param.h>
#include <sys/kernel.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#endif
#include <sys/atomic.h>
#include <sys/condvar.h>
#include <sys/hash.h>
@ -118,14 +119,15 @@ struct npf_session {
/* Session type. Supported: TCP, UDP, ICMP. */
int s_type;
int s_direction;
uint16_t s_state;
uint16_t s_flags;
/* NAT data associated with this session (if any). */
int s_flags;
npf_state_t s_state;
/* NAT associated with this session (if any) and link. */
npf_nat_t * s_nat;
npf_session_t * s_nat_se;
npf_session_t * s_linked;
/* Source and destination addresses. */
in_addr_t s_src_addr;
in_addr_t s_dst_addr;
npf_addr_t s_src_addr;
npf_addr_t s_dst_addr;
int s_addr_sz;
/* Source and destination ports (TCP / UDP) or generic IDs. */
union {
in_port_t port;
@ -139,6 +141,8 @@ struct npf_session {
struct timespec s_atime;
};
#define SE_PASSSING 0x01
LIST_HEAD(npf_sesslist, npf_session);
#define SESS_HASH_BUCKETS 1024 /* XXX tune + make tunable */
@ -162,28 +166,13 @@ static lwp_t * sess_gc_lwp;
#define SESS_GC_INTERVAL 5 /* 5 sec */
/* Session expiration table. XXX: TCP close: 2 * tcp_msl (e.g. 120)? Maybe. */
static const u_int sess_expire_table[ ] = {
[IPPROTO_TCP] = 600, /* 10 min */
[IPPROTO_UDP] = 300, /* 5 min */
[IPPROTO_ICMP] = 30 /* 1 min */
};
/* Session states and flags. */
#define SE_OPENING 1
#define SE_ACKNOWLEDGE 2
#define SE_ESTABLISHED 3
#define SE_CLOSING 4
#define SE_PASSSING 0x01
static void sess_tracking_stop(void);
static void npf_session_worker(void *);
#ifdef SE_DEBUG
#define DPRINTF(x) printf x
#define SEPRINTF(x) printf x
#else
#define DPRINTF(x)
#define SEPRINTF(x)
#endif
/*
@ -229,39 +218,32 @@ sess_rbtree_cmp_nodes(void *ctx, const void *n1, const void *n2)
{
const npf_session_t * const se1 = n1;
const npf_session_t * const se2 = n2;
const npf_addr_t *se2_addr1, *se2_addr2;
uint32_t se2_id1, se2_id2;
int ret;
/*
* Note: must compare equivalent streams.
* See sess_rbtree_cmp_key() below.
*/
if (se1->s_direction == se2->s_direction) {
/*
* Direction "forwards".
*/
if (se1->s_src.id != se2->s_src.id)
return (se1->s_src.id < se2->s_src.id) ? -1 : 1;
if (se1->s_dst.id != se2->s_dst.id)
return (se1->s_dst.id < se2->s_dst.id) ? -1 : 1;
if (__predict_false(se1->s_src_addr != se2->s_src_addr))
return (se1->s_src_addr < se2->s_src_addr) ? -1 : 1;
if (__predict_false(se1->s_dst_addr != se2->s_dst_addr))
return (se1->s_dst_addr < se2->s_dst_addr) ? -1 : 1;
/* Direction "forwards". */
se2_id1 = se2->s_src.id; se2_addr1 = &se2->s_src_addr;
se2_id2 = se2->s_dst.id; se2_addr2 = &se2->s_dst_addr;
} else {
/*
* Direction "backwards".
*/
if (se1->s_src.id != se2->s_dst.id)
return (se1->s_src.id < se2->s_dst.id) ? -1 : 1;
if (se1->s_dst.id != se2->s_src.id)
return (se1->s_dst.id < se2->s_src.id) ? -1 : 1;
if (__predict_false(se1->s_src_addr != se2->s_dst_addr))
return (se1->s_src_addr < se2->s_dst_addr) ? -1 : 1;
if (__predict_false(se1->s_dst_addr != se2->s_src_addr))
return (se1->s_dst_addr < se2->s_src_addr) ? -1 : 1;
/* Direction "backwards". */
se2_id1 = se2->s_dst.id; se2_addr1 = &se2->s_dst_addr;
se2_id2 = se2->s_src.id; se2_addr2 = &se2->s_src_addr;
}
return 0;
if (se1->s_src.id != se2_id1)
return (se1->s_src.id < se2_id1) ? -1 : 1;
if (se1->s_dst.id != se2_id2)
return (se1->s_dst.id < se2_id2) ? -1 : 1;
if (se1->s_addr_sz != se2->s_addr_sz)
return (se1->s_addr_sz < se2->s_addr_sz) ? -1 : 1;
if ((ret = memcmp(&se1->s_src_addr, se2_addr1, se1->s_addr_sz)) != 0)
return ret;
return memcmp(&se1->s_dst_addr, se2_addr2, se1->s_addr_sz);
}
static signed int
@ -269,33 +251,42 @@ sess_rbtree_cmp_key(void *ctx, const void *n1, const void *key)
{
const npf_session_t * const se = n1;
const npf_cache_t * const npc = key;
const npf_addr_t *addr1, *addr2;
in_port_t sport, dport;
in_addr_t src, dst;
uint32_t id1, id2;
int ret;
if (se->s_direction == npc->npc_dir) {
if (npf_cache_ipproto(npc) == IPPROTO_TCP) {
const struct tcphdr *th = &npc->npc_l4.tcp;
sport = th->th_sport;
dport = th->th_dport;
} else {
const struct udphdr *uh = &npc->npc_l4.udp;
sport = uh->uh_sport;
dport = uh->uh_dport;
}
if (se->s_direction == npc->npc_di) {
/* Direction "forwards". */
src = npc->npc_srcip; sport = npc->npc_sport;
dst = npc->npc_dstip; dport = npc->npc_dport;
addr1 = npc->npc_srcip; id1 = sport;
addr2 = npc->npc_dstip; id2 = dport;
} else {
/* Direction "backwards". */
src = npc->npc_dstip; sport = npc->npc_dport;
dst = npc->npc_srcip; dport = npc->npc_sport;
addr1 = npc->npc_dstip; id1 = dport;
addr2 = npc->npc_srcip; id2 = sport;
}
/* Ports are the main criteria and are first. */
if (se->s_src.id != sport)
return (se->s_src.id < sport) ? -1 : 1;
if (se->s_dst.id != dport)
return (se->s_dst.id < dport) ? -1 : 1;
if (se->s_src.id != id1)
return (se->s_src.id < id1) ? -1 : 1;
if (se->s_dst.id != id2)
return (se->s_dst.id < id2) ? -1 : 1;
/* Note that hash should minimise differentiation on these. */
if (__predict_false(se->s_src_addr != src))
return (se->s_src_addr < src) ? -1 : 1;
if (__predict_false(se->s_dst_addr < dst))
return (se->s_dst_addr < dst) ? -1 : 1;
return 0;
if (se->s_addr_sz != npc->npc_ipsz)
return (se->s_addr_sz < npc->npc_ipsz) ? -1 : 1;
if ((ret = memcmp(&se->s_src_addr, addr1, se->s_addr_sz)) != 0)
return ret;
return memcmp(&se->s_dst_addr, addr2, se->s_addr_sz);
}
static const rb_tree_ops_t sess_rbtree_ops = {
@ -308,12 +299,12 @@ static const rb_tree_ops_t sess_rbtree_ops = {
static inline npf_sess_hash_t *
sess_hash_bucket(const npf_cache_t *key)
{
uint32_t hash, mix;
uint32_t hash, mix = npf_cache_ipproto(key);
KASSERT(npf_iscached(key, NPC_IP46 | NPC_ADDRS));
KASSERT(npf_iscached(key, NPC_IP46));
/* Sum addresses for both directions and mix in protocol. */
mix = key->npc_srcip + key->npc_dstip + key->npc_proto;
/* Sum protocol and both addresses (for both directions). */
mix += npf_addr_sum(key->npc_ipsz, key->npc_srcip, key->npc_dstip);
hash = hash32_buf(&mix, sizeof(uint32_t), HASH32_BUF_INIT);
return &sess_hashtbl[hash & SESS_HASH_MASK];
}
@ -427,59 +418,13 @@ npf_session_tracking(bool track)
return 0;
}
/*
* npf_session_pstate: handle session state according to protocol data.
*/
static inline bool
npf_session_pstate(const npf_cache_t *npc, npf_session_t *se, const int dir)
{
const bool backwards = (se->s_direction != dir);
const int proto = npc->npc_proto;
if (proto != IPPROTO_TCP) {
/* Handle UDP or ICMP response for opening session. */
if (se->s_state == SE_OPENING && backwards) {
se->s_state = SE_ESTABLISHED;
}
return true;
}
const int tcpfl = npc->npc_tcp_flags & (TH_SYN|TH_ACK|TH_RST|TH_FIN);
switch (tcpfl) {
case TH_ACK:
/* Common case. */
if (__predict_true(se->s_state == SE_ESTABLISHED)) {
return true;
}
/* ACK seen after SYN-ACK: session fully established. */
if (se->s_state == SE_ACKNOWLEDGE && !backwards) {
se->s_state = SE_ESTABLISHED;
}
break;
case TH_SYN | TH_ACK:
/* SYN-ACK seen, wait for ACK. */
if (se->s_state == SE_OPENING && backwards) {
se->s_state = SE_ACKNOWLEDGE;
}
break;
case TH_RST:
case TH_FIN:
/* XXX/TODO: Handle TCP reset attacks; later. */
se->s_state = SE_CLOSING;
break;
}
return true;
}
/*
* npf_session_inspect: look if there is an established session (connection).
*
* => If found, we will hold a reference for caller.
*/
npf_session_t *
npf_session_inspect(npf_cache_t *npc, nbuf_t *nbuf,
struct ifnet *ifp, const int di)
npf_session_inspect(npf_cache_t *npc, nbuf_t *nbuf, const int di)
{
npf_sess_hash_t *sh;
npf_session_t *se;
@ -488,8 +433,8 @@ npf_session_inspect(npf_cache_t *npc, nbuf_t *nbuf,
if (!sess_tracking || !npf_cache_all(npc, nbuf)) {
return NULL;
}
KASSERT(npf_iscached(npc, NPC_IP46 | NPC_ADDRS));
KASSERT(npf_iscached(npc, NPC_PORTS) || npf_iscached(npc, NPC_ICMP));
KASSERT(npf_iscached(npc, NPC_IP46));
KASSERT(npf_iscached(npc, NPC_LAYER4));
/*
* Execute ALG session helpers.
@ -503,7 +448,7 @@ npf_session_inspect(npf_cache_t *npc, nbuf_t *nbuf,
/* Default: original packet, pass its cache. */
key = npc;
}
key->npc_dir = di;
key->npc_di = di;
/*
* Get a hash bucket from the cached key data.
@ -523,7 +468,17 @@ npf_session_inspect(npf_cache_t *npc, nbuf_t *nbuf,
}
/* Inspect the protocol data and handle state changes. */
if (npf_session_pstate(npc, se, di)) {
const bool forw = (se->s_direction == di);
npf_state_t *nst;
if (se->s_nat) {
npf_session_t *lse = se->s_linked;
nst = &lse->s_state;
} else {
nst = &se->s_state;
}
if (npf_state_inspect(npc, nbuf, nst, forw)) {
/* Must update the last activity time. */
getnanouptime(&se->s_atime);
/* Hold a reference. */
@ -542,14 +497,20 @@ npf_session_inspect(npf_cache_t *npc, nbuf_t *nbuf,
* => Sessions is created with the held reference (for caller).
*/
npf_session_t *
npf_session_establish(const npf_cache_t *npc, npf_nat_t *nt, const int di)
npf_session_establish(const npf_cache_t *npc, nbuf_t *nbuf,
npf_nat_t *nt, const int di)
{
const struct tcphdr *th;
const struct udphdr *uh;
npf_sess_hash_t *sh;
npf_session_t *se;
int proto, sz;
bool ok;
if (!sess_tracking) /* XXX */
KASSERT(npf_iscached(npc, NPC_IP46 | NPC_LAYER4));
if (!sess_tracking) { /* XXX */
return NULL;
}
/* Allocate and initialise new state. */
se = pool_cache_get(sess_cache, PR_NOWAIT);
@ -563,53 +524,68 @@ npf_session_establish(const npf_cache_t *npc, npf_nat_t *nt, const int di)
/* NAT and backwards session. */
se->s_nat = nt;
se->s_nat_se = NULL;
se->s_linked = NULL;
/* Unique IDs: IP addresses. */
KASSERT(npf_iscached(npc, NPC_IP46 | NPC_ADDRS));
se->s_src_addr = npc->npc_srcip;
se->s_dst_addr = npc->npc_dstip;
KASSERT(npf_iscached(npc, NPC_IP46));
sz = npc->npc_ipsz;
memcpy(&se->s_src_addr, npc->npc_srcip, sz);
memcpy(&se->s_dst_addr, npc->npc_dstip, sz);
se->s_addr_sz = sz;
/* Procotol. */
se->s_type = npc->npc_proto;
proto = npf_cache_ipproto(npc);
se->s_type = proto;
switch (npc->npc_proto) {
switch (proto) {
case IPPROTO_TCP:
case IPPROTO_UDP:
KASSERT(npf_iscached(npc, NPC_PORTS));
KASSERT(npf_iscached(npc, NPC_TCP));
th = &npc->npc_l4.tcp;
/* Additional IDs: ports. */
se->s_src.id = npc->npc_sport;
se->s_dst.id = npc->npc_dport;
se->s_src.id = th->th_sport;
se->s_dst.id = th->th_dport;
break;
case IPPROTO_UDP:
KASSERT(npf_iscached(npc, NPC_UDP));
/* Additional IDs: ports. */
uh = &npc->npc_l4.udp;
se->s_src.id = uh->uh_sport;
se->s_dst.id = uh->uh_dport;
break;
case IPPROTO_ICMP:
if (npf_iscached(npc, NPC_ICMP_ID)) {
/* ICMP query ID. (XXX) */
se->s_src.id = npc->npc_icmp_id;
se->s_dst.id = npc->npc_icmp_id;
const struct icmp *ic = &npc->npc_l4.icmp;
se->s_src.id = ic->icmp_id;
se->s_dst.id = ic->icmp_id;
break;
}
/* FALLTHROUGH */
default:
/* Unsupported. */
pool_cache_put(sess_cache, se);
return NULL;
ok = false;
goto out;
}
/* Initialize protocol state, but not for NAT sessions. */
if (nt == NULL && !npf_state_init(npc, nbuf, &se->s_state)) {
ok = false;
goto out;
}
/* Set last activity time for a new session. */
se->s_state = SE_OPENING;
getnanouptime(&se->s_atime);
/* Find the hash bucket and insert the state into the tree. */
sh = sess_hash_bucket(npc);
rw_enter(&sh->sh_lock, RW_WRITER);
ok = rb_tree_insert_node(&sh->sh_tree, se) == se;
ok = (rb_tree_insert_node(&sh->sh_tree, se) == se);
if (__predict_true(ok)) {
sh->sh_count++;
DPRINTF(("NPF: new se %p (link %p, nat %p)\n",
se, se->s_nat_se, se->s_nat));
SEPRINTF(("NPF: new se %p (link %p, nat %p)\n",
se, se->s_linked, se->s_nat));
}
rw_exit(&sh->sh_lock);
out:
if (__predict_false(!ok)) {
/* Race with duplicate packet. */
pool_cache_put(sess_cache, se);
@ -630,19 +606,15 @@ npf_session_pass(const npf_session_t *se)
}
/*
* npf_session_setpass: mark session as a "pass" one, also mark the
* linked session if there is one.
* npf_session_setpass: mark session as a "pass" one.
*/
void
npf_session_setpass(npf_session_t *se)
{
KASSERT(se->s_refcnt > 0);
KASSERT(se->s_linked == NULL);
se->s_flags |= SE_PASSSING; /* XXXSMP */
if (se->s_nat_se) {
se = se->s_nat_se;
se->s_flags |= SE_PASSSING; /* XXXSMP */
}
}
/*
@ -665,13 +637,17 @@ void
npf_session_link(npf_session_t *se, npf_session_t *natse)
{
/* Hold a reference on the session we link. Inherit the flags. */
/* Hold a reference on the "NAT" session. Inherit the flags. */
KASSERT(se->s_nat == NULL && natse->s_nat != NULL);
KASSERT(se->s_refcnt > 0 && natse->s_refcnt > 0);
atomic_inc_uint(&natse->s_refcnt);
natse->s_flags = se->s_flags;
KASSERT(se->s_nat_se == NULL);
se->s_nat_se = natse;
/* Link both sessions (point to each other). */
KASSERT(se->s_linked == NULL && natse->s_linked == NULL);
se->s_linked = natse;
natse->s_linked = se;
SEPRINTF(("NPF: linked se %p -> %p\n", se, se->s_linked));
}
/*
@ -683,9 +659,12 @@ npf_session_retnat(npf_session_t *se, const int di, bool *forw)
{
KASSERT(se->s_refcnt > 0);
if (se->s_linked == NULL) {
return NULL;
}
*forw = (se->s_direction == di);
if (se->s_nat_se) {
se = se->s_nat_se;
if (se->s_nat == NULL) {
se = se->s_linked;
KASSERT(se->s_refcnt > 0);
}
return se->s_nat;
@ -697,21 +676,9 @@ npf_session_retnat(npf_session_t *se, const int di, bool *forw)
static inline bool
npf_session_expired(const npf_session_t *se, const struct timespec *tsnow)
{
const int etime = npf_state_etime(&se->s_state, se->s_type);
struct timespec tsdiff;
int etime = 0;
switch (se->s_state) {
case SE_ESTABLISHED:
etime = sess_expire_table[se->s_type];
break;
case SE_OPENING:
case SE_ACKNOWLEDGE:
case SE_CLOSING:
etime = 10; /* XXX: figure out reasonable time */
break;
default:
KASSERT(false);
}
timespecsub(tsnow, &se->s_atime, &tsdiff);
return (tsdiff.tv_sec > etime);
}
@ -752,13 +719,19 @@ npf_session_gc(struct npf_sesslist *gc_list, bool flushall)
LIST_INSERT_HEAD(gc_list, se, se_entry.gclist);
sh->sh_count--;
/* If linked, drop the reference. */
DPRINTF(("NPF: se %p expired\n", se));
if (se->s_nat_se) {
npf_session_release(se->s_nat_se);
DPRINTF(("NPF: se %p unlinked %p\n",
se, se->s_nat_se));
se->s_nat_se = NULL;
/*
* If there is a link and it is a "pass" session,
* then drop the reference and unlink.
*/
SEPRINTF(("NPF: se %p expired\n", se));
if (se->s_linked && se->s_nat == NULL) {
npf_session_t *natse = se->s_linked;
SEPRINTF(("NPF: se %p unlinked %p\n",
se, se->s_linked));
natse->s_linked = NULL;
npf_session_release(natse);
se->s_linked = NULL;
}
se = nse;
}
@ -785,8 +758,11 @@ npf_session_free(struct npf_sesslist *gc_list)
if (se->s_nat) {
/* Release any NAT related structures. */
npf_nat_expire(se->s_nat);
} else {
/* Destroy the state. */
npf_state_destroy(&se->s_state);
}
DPRINTF(("NPF: se %p destroyed\n", se));
SEPRINTF(("NPF: se %p destroyed\n", se));
pool_cache_put(sess_cache, se);
}
se = nse;
@ -853,7 +829,7 @@ npf_sessions_dump(void)
sh = &sess_hashtbl[i];
if (sh->sh_count == 0) {
KASSERT(rb_tree_iterate(&sh->sh_tree,
NULL, RB_DIR_RIGHT) == NULL);
NULL, RB_DIR_LEFT) == NULL);
continue;
}
printf("s_bucket %d (count = %d)\n", i, sh->sh_count);
@ -863,20 +839,20 @@ npf_sessions_dump(void)
int etime;
timespecsub(&tsnow, &se->s_atime, &tsdiff);
etime = (se->s_state == SE_ESTABLISHED) ?
sess_expire_table[se->s_type] : 10;
etime = npf_state_etime(&se->s_state, se->s_type);
printf("\t%p: type(%d) di %d, pass %d, tsdiff %d, "
"etime %d\n", se, se->s_type, se->s_direction,
se->s_flags, (int)tsdiff.tv_sec, etime);
ip.s_addr = se->s_src_addr;
memcpy(&ip, &se->s_src_addr, sizeof(ip));
printf("\tsrc (%s, %d) ",
inet_ntoa(ip), ntohs(se->s_src.port));
ip.s_addr = se->s_dst_addr;
printf("dst (%s, %d)\n",
memcpy(&ip, &se->s_dst_addr, sizeof(ip));
printf("dst (%s, %d)\n",
inet_ntoa(ip), ntohs(se->s_dst.port));
if (se->s_nat_se != NULL) {
printf("\tlinked with %p\n", se->s_nat_se);
npf_state_dump(&se->s_state);
if (se->s_linked != NULL) {
printf("\tlinked with %p\n", se->s_linked);
}
if (se->s_nat != NULL) {
npf_nat_dump(se->s_nat);

316
sys/net/npf/npf_state.c Normal file
View File

@ -0,0 +1,316 @@
/* $NetBSD: npf_state.c,v 1.1 2010/11/11 06:30:39 rmind Exp $ */
/*-
* Copyright (c) 2010 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This material is based upon work partially supported by The
* NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* NPF state engine to track connections.
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: npf_state.c,v 1.1 2010/11/11 06:30:39 rmind Exp $");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/mutex.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <netinet/tcp_seq.h>
#include "npf_impl.h"
#define MAXACKWINDOW 66000
/* Session expiration table. XXX revisit later */
static const u_int expire_table[ ] = {
[IPPROTO_TCP] = 86400, /* 24 hours */
[IPPROTO_UDP] = 120, /* 2 min */
[IPPROTO_ICMP] = 30 /* 1 min */
};
static bool
npf_tcp_inwindow(const npf_cache_t *npc, nbuf_t *nbuf, npf_state_t *nst,
const bool forw)
{
const struct tcphdr *th = &npc->npc_l4.tcp;
const int tcpfl = th->th_flags;
npf_tcpstate_t *fstate, *tstate;
int tcpdlen, wscale, ackskew;
tcp_seq seq, ack, end;
uint32_t win;
KASSERT(npf_iscached(npc, NPC_TCP));
tcpdlen = npf_tcpsaw(__UNCONST(npc), &seq, &ack, &win);
end = seq + tcpdlen;
if (tcpfl & TH_SYN) {
end++;
}
if (tcpfl & TH_FIN) {
end++;
}
/*
* Perform SEQ/ACK numbers check against boundaries. Reference:
*
* Rooij G., "Real stateful TCP packet filtering in IP Filter",
* 10th USENIX Security Symposium invited talk, Aug. 2001.
*/
fstate = &nst->nst_tcpst[forw ? 0 : 1];
tstate = &nst->nst_tcpst[forw ? 1 : 0];
win = win ? (win << fstate->nst_wscale) : 1;
if (tcpfl == TH_SYN) {
/*
* First SYN or re-transmission of SYN. Initialize all
* values. State of other side will get set with a SYN-ACK
* reply (see below).
*/
fstate->nst_seqend = end;
fstate->nst_ackend = end;
fstate->nst_maxwin = win;
tstate->nst_ackend = 0;
tstate->nst_ackend = 0;
tstate->nst_maxwin = 0;
/*
* Handle TCP Window Scaling (RFC 1323). Both sides may
* send this option in their SYN packets.
*/
if (npf_fetch_tcpopts(npc, nbuf, NULL, &wscale)) {
fstate->nst_wscale = wscale;
} else {
fstate->nst_wscale = 0;
}
tstate->nst_wscale = 0;
/* Done. */
return true;
}
if (fstate->nst_seqend == 0) {
/*
* Should be a SYN-ACK reply to SYN. If SYN is not set,
* then we are in the middle connection and lost tracking.
*/
fstate->nst_seqend = end;
fstate->nst_ackend = end + 1;
fstate->nst_maxwin = 1;
/* Handle TCP Window Scaling (must be ignored if no SYN). */
if (tcpfl & TH_SYN) {
fstate->nst_wscale =
npf_fetch_tcpopts(npc, nbuf, NULL, &wscale) ?
wscale : 0;
}
}
if ((tcpfl & TH_ACK) == 0) {
/* Pretend that an ACK was sent. */
ack = tstate->nst_seqend;
} else if ((tcpfl & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST) && ack == 0) {
/* Workaround for some TCP stacks. */
ack = tstate->nst_seqend;
}
if (seq == end) {
/* If packet contains no data - assume it is valid. */
end = fstate->nst_seqend;
seq = end;
}
/*
* Determine whether the data is within previously noted window,
* that is, upper boundary for valid data (I).
*/
if (!SEQ_GEQ(fstate->nst_ackend, end)) {
return false;
}
/* Lower boundary (II), which is no more than one window back. */
if (!SEQ_GEQ(seq, fstate->nst_seqend - tstate->nst_maxwin)) {
return false;
}
/*
* Boundaries for valid acknowledgments (III, IV) - on predicted
* window up or down, since packets may be fragmented.
*/
ackskew = tstate->nst_seqend - ack;
if (ackskew < -MAXACKWINDOW || ackskew > MAXACKWINDOW) {
return false;
}
/*
* Negative ackskew might be due to fragmented packets. Since the
* total length of the packet is unknown - bump the boundary.
*/
if (ackskew < 0) {
tstate->nst_seqend = end;
}
/* Keep track of the maximum window seen. */
if (fstate->nst_maxwin < win) {
fstate->nst_maxwin = win;
}
if (SEQ_GT(end, fstate->nst_seqend)) {
fstate->nst_seqend = end;
}
/* Note the window for upper boundary. */
if (SEQ_GEQ(ack + win, tstate->nst_ackend)) {
tstate->nst_ackend = ack + win;
}
return true;
}
static inline bool
npf_state_tcp(const npf_cache_t *npc, nbuf_t *nbuf, npf_state_t *nst,
const bool forw)
{
const struct tcphdr *th = &npc->npc_l4.tcp;
const int tcpfl = th->th_flags;
/*
* Handle 3-way handshake (SYN -> SYN,ACK -> ACK).
*/
switch (nst->nst_state) {
case ST_ESTABLISHED:
/* Common case - connection established. */
if (tcpfl & TH_ACK) {
/*
* Data transmission.
*/
} else if (tcpfl & TH_FIN) {
/* XXX TODO */
}
break;
case ST_OPENING:
/* SYN has been sent, expecting SYN-ACK. */
if (tcpfl == (TH_SYN | TH_ACK) && !forw) {
/* Received backwards SYN-ACK. */
nst->nst_state = ST_ACKNOWLEDGE;
} else if (tcpfl == TH_SYN && forw) {
/* Re-transmission of SYN. */
} else {
return false;
}
break;
case ST_ACKNOWLEDGE:
/* SYN-ACK was seen, expecting ACK. */
if (tcpfl == TH_ACK && forw) {
nst->nst_state = ST_ESTABLISHED;
} else {
return false;
}
break;
case ST_CLOSING:
/* XXX TODO */
break;
default:
npf_state_dump(nst);
KASSERT(false);
}
return npf_tcp_inwindow(npc, nbuf, nst, forw);
}
bool
npf_state_init(const npf_cache_t *npc, nbuf_t *nbuf, npf_state_t *nst)
{
const int proto = npf_cache_ipproto(npc);
KASSERT(npf_iscached(npc, NPC_IP46 | NPC_LAYER4));
if (proto == IPPROTO_TCP) {
const struct tcphdr *th = &npc->npc_l4.tcp;
/* TCP case: must be SYN. */
KASSERT(npf_iscached(npc, NPC_TCP));
if (th->th_flags != TH_SYN) {
return false;
}
/* Initial values for TCP window and sequence tracking. */
if (!npf_tcp_inwindow(npc, nbuf, nst, true)) {
return false;
}
}
mutex_init(&nst->nst_lock, MUTEX_DEFAULT, IPL_SOFTNET);
nst->nst_state = ST_OPENING;
return true;
}
void
npf_state_destroy(npf_state_t *nst)
{
KASSERT(nst->nst_state != 0);
mutex_destroy(&nst->nst_lock);
}
bool
npf_state_inspect(const npf_cache_t *npc, nbuf_t *nbuf,
npf_state_t *nst, const bool forw)
{
const int proto = npf_cache_ipproto(npc);
bool ret;
mutex_enter(&nst->nst_lock);
switch (proto) {
case IPPROTO_TCP:
/* Handle TCP. */
ret = npf_state_tcp(npc, nbuf, nst, forw);
break;
default:
/* Handle UDP or ICMP response for opening session. */
if (nst->nst_state == ST_OPENING && !forw) {
nst->nst_state = ST_ESTABLISHED;
}
ret = true;
}
mutex_exit(&nst->nst_lock);
return ret;
}
int
npf_state_etime(const npf_state_t *nst, const int proto)
{
if (nst->nst_state == ST_ESTABLISHED) {
return expire_table[proto];
}
return 10; /* XXX TODO */
}
#if defined(DDB) || defined(_NPF_TESTING)
void
npf_state_dump(npf_state_t *nst)
{
npf_tcpstate_t *fst = &nst->nst_tcpst[0], *tst = &nst->nst_tcpst[1];
printf("\tstate (%p) %d:\n\t\t"
"F { seqend %u ackend %u mwin %u wscale %u }\n\t\t"
"T { seqend %u, ackend %u mwin %u wscale %u }\n",
nst, nst->nst_state,
fst->nst_seqend, fst->nst_ackend, fst->nst_maxwin, fst->nst_wscale,
tst->nst_seqend, tst->nst_ackend, tst->nst_maxwin, tst->nst_wscale
);
}
#endif

View File

@ -1,4 +1,4 @@
/* $NetBSD: npf_tableset.c,v 1.2 2010/09/24 22:51:50 rmind Exp $ */
/* $NetBSD: npf_tableset.c,v 1.3 2010/11/11 06:30:39 rmind Exp $ */
/*-
* Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@ -41,10 +41,8 @@
* - Dynamic array resize.
*/
#ifdef _KERNEL
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.2 2010/09/24 22:51:50 rmind Exp $");
#endif
__KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.3 2010/11/11 06:30:39 rmind Exp $");
#include <sys/param.h>
#include <sys/kernel.h>

View File

@ -1,4 +1,4 @@
/* $NetBSD: npf_data.c,v 1.3 2010/09/16 04:53:27 rmind Exp $ */
/* $NetBSD: npf_data.c,v 1.4 2010/11/11 06:30:39 rmind Exp $ */
/*-
* Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@ -32,6 +32,9 @@
* XXX: Needs some clean-up.
*/
#include <sys/cdefs.h>
__RCSID("$NetBSD: npf_data.c,v 1.4 2010/11/11 06:30:39 rmind Exp $");
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
@ -389,7 +392,8 @@ npfctl_add_rule(prop_dictionary_t rl, prop_dictionary_t parent)
}
void
npfctl_rule_setattr(prop_dictionary_t rl, int attr, char *iface)
npfctl_rule_setattr(prop_dictionary_t rl, int attr, char *iface,
bool ipid_rnd, int minttl, int maxmss)
{
prop_number_t attrnum;
@ -405,6 +409,14 @@ npfctl_rule_setattr(prop_dictionary_t rl, int attr, char *iface)
ifnum = prop_number_create_integer(if_idx);
prop_dictionary_set(rl, "interface", ifnum);
}
if (attr & NPF_RULE_NORMALIZE) {
prop_dictionary_set(rl, "randomize-id",
prop_bool_create(ipid_rnd));
prop_dictionary_set(rl, "min-ttl",
prop_number_create_integer(minttl));
prop_dictionary_set(rl, "max-mss",
prop_number_create_integer(maxmss));
}
}
/*
@ -649,6 +661,7 @@ npfctl_nat_setup(prop_dictionary_t rl, int type, int flags,
{
int attr = NPF_RULE_PASS | NPF_RULE_FINAL;
in_addr_t addr, mask;
void *addrptr;
/* Translation type and flags. */
prop_dictionary_set(rl, "type",
@ -658,12 +671,15 @@ npfctl_nat_setup(prop_dictionary_t rl, int type, int flags,
/* Interface and attributes. */
attr |= (type == NPF_NATOUT) ? NPF_RULE_OUT : NPF_RULE_IN;
npfctl_rule_setattr(rl, attr, iface);
npfctl_rule_setattr(rl, attr, iface, false, 0, 0);
/* Translation IP, XXX should be no mask. */
npfctl_parse_cidr(taddr, &addr, &mask);
prop_dictionary_set(rl, "translation_ip",
prop_number_create_integer(addr));
addrptr = prop_data_create_data(&addr, sizeof(in_addr_t));
if (addrptr == NULL) {
err(EXIT_FAILURE, "prop_data_create_data");
}
prop_dictionary_set(rl, "translation-ip", addrptr);
/* Translation port (for redirect case). */
if (rport) {
@ -676,7 +692,7 @@ npfctl_nat_setup(prop_dictionary_t rl, int type, int flags,
if (range) {
errx(EXIT_FAILURE, "range is not supported for 'rdr'");
}
prop_dictionary_set(rl, "translation_port",
prop_dictionary_set(rl, "translation-port",
prop_number_create_integer(port));
}
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: npf_ncgen.c,v 1.2 2010/09/16 04:53:27 rmind Exp $ */
/* $NetBSD: npf_ncgen.c,v 1.3 2010/11/11 06:30:39 rmind Exp $ */
/*-
* Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@ -36,6 +36,9 @@
* calculations, when changing generation routines.
*/
#include <sys/cdefs.h>
__RCSID("$NetBSD: npf_ncgen.c,v 1.3 2010/11/11 06:30:39 rmind Exp $");
#include <sys/types.h>
#include "npfctl.h"
@ -77,7 +80,6 @@ npfctl_failure_offset(int nblocks[])
}
#if 0
/*
* npfctl_gennc_ether: initial n-code fragment to check Ethernet frame.
*/
@ -111,7 +113,6 @@ npfctl_gennc_ether(void **ncptr, int foff, uint16_t ethertype)
/* + 13 words. */
*ncptr = (void *)nc;
}
#endif
/*

View File

@ -1,4 +1,4 @@
/* $NetBSD: npf_parser.c,v 1.2 2010/09/16 04:53:27 rmind Exp $ */
/* $NetBSD: npf_parser.c,v 1.3 2010/11/11 06:30:39 rmind Exp $ */
/*-
* Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@ -30,6 +30,9 @@
* XXX: This needs clean-up!
*/
#include <sys/cdefs.h>
__RCSID("$NetBSD: npf_parser.c,v 1.3 2010/11/11 06:30:39 rmind Exp $");
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@ -140,6 +143,34 @@ npfctl_parsevalue(char *buf)
return vr;
}
static inline int
npfctl_parsenorm(char *buf, bool *rnd, int *minttl, int *maxmss)
{
char *p = buf, *sptr;
DPRINTF(("norm\t|%s|\n", p));
p = strtok_r(buf, ", \t", &sptr);
if (p == NULL) {
return -1;
}
do {
if (strcmp(p, "random-id") == 0) {
*rnd = true;
} else if (strcmp(p, "min-ttl") == 0) {
p = strtok_r(NULL, ", \t", &sptr);
*minttl = atoi(p);
} else if (strcmp(p, "max-mss") == 0) {
p = strtok_r(NULL, ", \t", &sptr);
*maxmss = atoi(p);
} else {
return -1;
}
} while ((p = strtok_r(NULL, ", \t", &sptr)) != 0);
return 0;
}
/*
* npfctl_parserule: main routine to parse a rule. Syntax:
*
@ -154,10 +185,9 @@ npfctl_parserule(char *buf, prop_dictionary_t rl)
{
var_t *from_cidr = NULL, *fports = NULL;
var_t *to_cidr = NULL, *tports = NULL;
char *proto = NULL, *tcp_flags = NULL;
char *p, *sptr, *iface;
bool icmp = false, tcp = false;
int icmp_type = -1, icmp_code = -1;
char *p, *sptr, *iface, *proto = NULL, *tcp_flags = NULL;
int icmp_type = -1, icmp_code = -1, minttl = 0, maxmss = 0;
bool icmp = false, tcp = false, rnd = false;
int ret, attr = 0;
DPRINTF(("rule\t|%s|\n", buf));
@ -337,10 +367,32 @@ last:
if (p && strcmp(p, "keep") == 0) {
attr |= NPF_RULE_KEEPSTATE;
PARSE_NEXT_TOKEN();
if (p == NULL || strcmp(p, "state") != 0) {
return PARSE_ERR();
}
PARSE_NEXT_TOKEN_NOCHECK();
}
/* normalize ( .. ) */
if (p && strcmp(p, "normalize") == 0) {
p = strtok_r(NULL, "()", &sptr);
if (p == NULL) {
return PARSE_ERR();
}
if (npfctl_parsenorm(p, &rnd, &minttl, &maxmss)) {
return PARSE_ERR();
}
attr |= NPF_RULE_NORMALIZE;
PARSE_NEXT_TOKEN_NOCHECK();
}
/* Should have nothing more. */
if (p != NULL) {
return PARSE_ERR();
}
/* Set the rule attributes and interface, if any. */
npfctl_rule_setattr(rl, attr, iface);
npfctl_rule_setattr(rl, attr, iface, rnd, minttl, maxmss);
/*
* Generate all protocol data.
@ -386,7 +438,8 @@ npfctl_parsegroup(char *buf, prop_dictionary_t rl)
if (strcmp(p, "default") == 0) {
attr_dir = NPF_RULE_IN | NPF_RULE_OUT;
npfctl_rule_setattr(rl,
GROUP_ATTRS | NPF_RULE_DEFAULT | attr_dir, NULL);
GROUP_ATTRS | NPF_RULE_DEFAULT | attr_dir, NULL,
false, 0, 0);
return 0;
}
@ -433,7 +486,7 @@ npfctl_parsegroup(char *buf, prop_dictionary_t rl)
else
return -1;
}
npfctl_rule_setattr(rl, GROUP_ATTRS | attr_dir, iface);
npfctl_rule_setattr(rl, GROUP_ATTRS | attr_dir, iface, false, 0, 0);
return 0;
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: npfctl.c,v 1.1 2010/08/22 18:56:24 rmind Exp $ */
/* $NetBSD: npfctl.c,v 1.2 2010/11/11 06:30:39 rmind Exp $ */
/*-
* Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@ -29,6 +29,9 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__RCSID("$NetBSD: npfctl.c,v 1.2 2010/11/11 06:30:39 rmind Exp $");
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <sys/types.h>
@ -145,12 +148,6 @@ npfctl(int action, int argc, char **argv)
npf_ioctl_table_t tbl;
char *arg;
#ifdef DEBUG
npfctl_init_data();
npfctl_parsecfg("npf.conf");
ret = npfctl_ioctl_send(fd);
return;
#endif
fd = open(NPF_DEV_PATH, O_RDONLY);
if (fd == -1) {
err(EXIT_FAILURE, "cannot open " NPF_DEV_PATH);
@ -218,6 +215,12 @@ main(int argc, char **argv)
}
cmd = argv[1];
#ifdef DEBUG
npfctl_init_data();
npfctl_parsecfg("npf.conf");
return npfctl_ioctl_send(0);
#endif
/* Find and call the subroutine */
for (n = 0; operations[n].cmd != NULL; n++) {
if (strcmp(cmd, operations[n].cmd) != 0)

View File

@ -1,4 +1,4 @@
/* $NetBSD: npfctl.h,v 1.2 2010/09/16 04:53:27 rmind Exp $ */
/* $NetBSD: npfctl.h,v 1.3 2010/11/11 06:30:39 rmind Exp $ */
/*-
* Copyright (c) 2009-2010 The NetBSD Foundation, Inc.
@ -77,7 +77,8 @@ bool npfctl_parse_v4mask(char *, in_addr_t *, in_addr_t *);
prop_dictionary_t npfctl_mk_rule(bool);
void npfctl_add_rule(prop_dictionary_t, prop_dictionary_t);
void npfctl_rule_setattr(prop_dictionary_t, int, char *);
void npfctl_rule_setattr(prop_dictionary_t, int, char *,
bool, int, int);
void npfctl_rule_protodata(prop_dictionary_t, char *, char *,
int, int, var_t *, var_t *, var_t *, var_t *);
void npfctl_rule_icmpdata(prop_dictionary_t, var_t *, var_t *);