/* $NetBSD: ip_nat.c,v 1.3 1997/01/29 02:16:23 thorpej Exp $ */ /* * (C)opyright 1995 by Darren Reed. * * Redistribution and use in source and binary forms are permitted * provided that this notice is preserved and due credit is given * to the original author and the contributors. * * Added redirect stuff and a LOT of bug fixes. (mcn@EnGarde.com) * * Things still screwed: * 1) You can't specify a mapping to a class D address. By default, it * always adds 1 to that address. As a result, when a packet comes back, * the rule won't be matched. (e.g. outgoing address = 199.165.219.2, * whereas the rule says outgoing address = 199.165.219.1/32. Because * ADNATS always adds one, and there really isn't any provision for * only using 1 address (the in_space stuff is broke), there isn't any * easy solution) * 2) There needs to be a way to flush the NATs table completely. Either * an ioctl, or an easy way of doing it from ipnat.c. * * Missing from RFC 1631: ICMP header checksum recalculations. * */ #if !defined(lint) && defined(LIBC_SCCS) static char sccsid[] = "@(#)ip_nat.c 1.11 6/5/96 (C) 1995 Darren Reed"; static char rcsid[] = "Id: ip_nat.c,v 1.2.4.11 1996/12/02 11:52:02 darrenr Exp"; #endif #if !defined(_KERNEL) && !defined(KERNEL) # include # include # include #endif #include #include #include #include #include #include #include #include #ifdef _KERNEL # include #endif #if !defined(__SVR4) && !defined(__svr4__) # include #else # include # include # include # include #endif #include #ifdef sun #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef MIN #define MIN(a,b) (((a)<(b))?(a):(b)) #endif nat_t *nat_table[2][NAT_SIZE]; ipnat_t *nat_list = NULL; u_long nat_inuse = 0, fr_defnatage = 1200; natstat_t nat_stats; #if SOLARIS # ifndef _KERNEL #define bcmp(a,b,c) memcpy(a,b,c) #define bcopy(a,b,c) memmove(b,a,c) # else extern kmutex_t ipf_nat; # endif #endif static int flush_nattable __P((void)); static int clear_natlist __P((void)); nat_t *nat_new __P((ipnat_t *np, ip_t *ip, fr_info_t *fin, u_short flags, int direction)); /* * How the NAT is organised and works. * * Inside (interface y) NAT Outside (interface x) * -------------------- -+- ------------------------------------- * Packet going | out, processsed by ip_natout() for x * ------------> | ------------> * src=10.1.1.1 | src=192.1.1.1 * | * | in, processed by ip_natin() for x * <------------ | <------------ * dst=10.1.1.1 | dst=192.1.1.1 * -------------------- -+- ------------------------------------- * ip_natout() - changes ip_src and if required, sport * - creates a new mapping, if required. * ip_natin() - changes ip_dst and if required, dport * * In the NAT table, internal source is recorded as "in" and externally * seen as "out". */ /* * Handle ioctls which manipulate the NAT. */ int nat_ioctl(data, cmd, mode) caddr_t data; u_long cmd; int mode; { register ipnat_t *nat, *n = NULL, **np = NULL; ipnat_t natd; int error = 0, ret; /* * For add/delete, look to see if the NAT entry is already present */ MUTEX_ENTER(&ipf_nat); if ((cmd == SIOCADNAT) || (cmd == SIOCRMNAT)) { IRCOPY(data, (char *)&natd, sizeof(natd)); nat = &natd; for (np = &nat_list; (n = *np); np = &n->in_next) if (!bcmp((char *)&nat->in_flags, (char *)&n->in_flags, IPN_CMPSIZ)) break; } switch (cmd) { case SIOCADNAT : if (!(mode & FWRITE)) { error = EPERM; break; } if (n) { error = EEXIST; break; } if (!(n = (ipnat_t *)KMALLOC(sizeof(*n)))) { error = ENOMEM; break; } IRCOPY((char *)data, (char *)n, sizeof(*n)); n->in_ifp = (void *)GETUNIT(n->in_ifname); n->in_next = *np; n->in_space = ~(0xffffffff & ntohl(n->in_outmsk)); n->in_space -= 2; /* lose 2: broadcast + network address */ if (n->in_outmsk != 0xffffffff) n->in_nip = ntohl(n->in_outip) + 1; else n->in_nip = ntohl(n->in_outip); if (n->in_redir == NAT_MAP) n->in_pnext = ntohs(n->in_pmin); /* Otherwise, these fields are preset */ *np = n; break; case SIOCRMNAT : if (!(mode & FWRITE)) { error = EPERM; break; } if (!n) { error = ESRCH; break; } *np = n->in_next; KFREE(n); break; case SIOCGNATS : nat_stats.ns_table = (nat_t ***)nat_table; nat_stats.ns_list = nat_list; nat_stats.ns_inuse = nat_inuse; IWCOPY((char *)&nat_stats, (char *)data, sizeof(nat_stats)); break; case SIOCGNATL : { natlookup_t nl; nat_t *na; IRCOPY((char *)data, (char *)&nl, sizeof(nl)); if ((na = nat_lookupredir(&nl))) { nl.nl_inip = na->nat_outip; nl.nl_inport = na->nat_outport; IWCOPY((char *)&nl, (char *)data, sizeof(nl)); } else error = ESRCH; break; } case SIOCFLNAT : if (!(mode & FWRITE)) { error = EPERM; break; } ret = flush_nattable(); IWCOPY((caddr_t)&ret, data, sizeof(ret)); break; case SIOCCNATL : if (!(mode & FWRITE)) { error = EPERM; break; } ret = clear_natlist(); IWCOPY((caddr_t)&ret, data, sizeof(ret)); break; } MUTEX_EXIT(&ipf_nat); return error; } /* * flush_nattable - clear the NAT table of all mapping entries. */ static int flush_nattable() { nat_t *nat, **natp; int i, j = 0; for (natp = &nat_table[0][0], i = NAT_SIZE - 1; i >= 0; i--, natp++) while ((nat = *natp)) { *natp = nat->nat_next; KFREE((caddr_t)nat); j++; } for (natp = &nat_table[1][0], i = NAT_SIZE - 1; i >= 0; i--, natp++) while ((nat = *natp)) { *natp = nat->nat_next; KFREE((caddr_t)nat); j++; } return j; } /* * clear_natlist - delete all entries in the active NAT mapping list. */ static int clear_natlist() { register ipnat_t *n, **np; int i = 0; for (np = &nat_list; (n = *np); i++) { *np = n->in_next; KFREE(n); } return i; } /* * Create a new NAT table entry. */ nat_t *nat_new(np, ip, fin, flags, direction) ipnat_t *np; ip_t *ip; fr_info_t *fin; u_short flags; int direction; { register u_long sum1, sum2, sumd; u_short port = 0, sport = 0, dport = 0, nport = 0; struct in_addr in; tcphdr_t *tcp = NULL; nat_t *nat, **natp; if (flags) { tcp = (tcphdr_t *)fin->fin_dp; sport = tcp->th_sport; dport = tcp->th_dport; } /* Give me a new nat */ if (!(nat = (nat_t *)KMALLOC(sizeof(*nat)))) return NULL; /* * Search the current table for a match. */ if (direction == NAT_OUTBOUND) { /* * If it's an outbound packet which doesn't match any existing * record, then create a new port */ do { in.s_addr = np->in_nip; if (np->in_flags & IPN_TCPUDP) { port = htons(np->in_pnext++); if (np->in_pnext >= ntohs(np->in_pmax)) { np->in_pnext = ntohs(np->in_pmin); np->in_space--; if (np->in_outmsk != 0xffffffff) np->in_nip++; } } else { np->in_space--; if (np->in_outmsk != 0xffffffff) np->in_nip++; } if ((np->in_nip & ntohl(np->in_outmsk)) > ntohl(np->in_outip)) np->in_nip = ntohl(np->in_outip) + 1; } while (nat_lookupinip(in, sport)); /* Setup the NAT table */ nat->nat_use = 0; nat->nat_inip = ip->ip_src; nat->nat_outip.s_addr = htonl(in.s_addr); sum1 = (ntohl(ip->ip_src.s_addr) & 0xffff) + (ntohl(ip->ip_src.s_addr) >> 16) + ntohs(sport); /* Do it twice */ sum1 = (sum1 & 0xffff) + (sum1 >> 16); sum1 = (sum1 & 0xffff) + (sum1 >> 16); sum2 = (in.s_addr & 0xffff) + (in.s_addr >> 16) + ntohs(port); /* Do it twice */ sum2 = (sum2 & 0xffff) + (sum2 >> 16); sum2 = (sum2 & 0xffff) + (sum2 >> 16); if (sum1 > sum2) sum2--; /* Because ~1 == -2, We really need ~1 == -1 */ sumd = sum2 - sum1; sumd = (sumd & 0xffff) + (sumd >> 16); nat->nat_sumd = (sumd & 0xffff) + (sumd >> 16); if (sport) { nat->nat_inport = sport; nat->nat_outport = port; } else { nat->nat_inport = 0; nat->nat_outport = 0; } } else { /* * Otherwise, it's an inbound packet. Most likely, we don't * want to rewrite source ports and source addresses. Instead, * we want to rewrite to a fixed internal address and fixed * internal port. */ in.s_addr = ntohl(np->in_inip); nport = np->in_pnext; nat->nat_use = 0; nat->nat_inip.s_addr = htonl(in.s_addr); nat->nat_outip = ip->ip_dst; nat->nat_oip = ip->ip_src; sum1 = (ntohl(ip->ip_dst.s_addr) & 0xffff) + (ntohl(ip->ip_dst.s_addr) >> 16) + ntohs(dport); /* Do it twice */ sum1 = (sum1 & 0xffff) + (sum1 >> 16); sum1 = (sum1 & 0xffff) + (sum1 >> 16); sum2 = (in.s_addr & 0xffff) + (in.s_addr >> 16) + ntohs(nport); /* Do it twice */ sum2 = (sum2 & 0xffff) + (sum2 >> 16); sum2 = (sum2 & 0xffff) + (sum2 >> 16); if (sum2 > sum1) sum1--; /* Because ~1 == -2, We really need ~1 == -1 */ sumd = (sum1 - sum2); sumd = (sumd & 0xffff) + (sumd >> 16); nat->nat_sumd = (sumd & 0xffff) + (sumd >> 16); if (dport) { nat->nat_inport = nport; nat->nat_outport = dport; nat->nat_oport = sport; } else { nat->nat_inport = 0; nat->nat_outport = 0; } } in.s_addr = htonl(in.s_addr); natp = &nat_table[0][nat->nat_inip.s_addr % NAT_SIZE]; nat->nat_next = *natp; *natp = nat; nat->nat_use++; natp = &nat_table[1][nat->nat_outip.s_addr % NAT_SIZE]; nat->nat_next = *natp; *natp = nat; nat->nat_use++; if (direction == NAT_REDIRECT) { ip->ip_src = in; if (flags) tcp->th_sport = htons(port); } else { ip->ip_dst = in; if (flags) tcp->th_dport = htons(nport); } nat_stats.ns_added++; nat_inuse++; return nat; } /* * NB: these lookups don't lock access to the list, it assume it has already * been done! */ nat_t *nat_lookupredir(np) natlookup_t *np; { nat_t *nat; nat = nat_table[0][np->nl_inip.s_addr % NAT_SIZE]; for (; nat; nat = nat->nat_next) if ((nat->nat_inip.s_addr == np->nl_inip.s_addr) && (nat->nat_oip.s_addr == np->nl_outip.s_addr) && (np->nl_inport == nat->nat_inport) && (np->nl_outport == nat->nat_oport)) return nat; return NULL; } nat_t *nat_lookupinip(ipaddr, sport) struct in_addr ipaddr; u_short sport; { nat_t *nat; nat = nat_table[0][ipaddr.s_addr % NAT_SIZE]; for (; nat; nat = nat->nat_next) if (nat->nat_inip.s_addr == ipaddr.s_addr) { if (nat->nat_inport && (sport != nat->nat_inport)) continue; return nat; } return NULL; } nat_t *nat_lookupoutip(np, ip, tcp) register ipnat_t *np; ip_t *ip; tcphdr_t *tcp; { struct in_addr ipaddr; u_short port = 0; nat_t *nat; if (ip->ip_p == IPPROTO_TCP || ip->ip_p == IPPROTO_UDP) port = tcp->th_dport; ipaddr.s_addr = ip->ip_dst.s_addr; nat = nat_table[1][ipaddr.s_addr % NAT_SIZE]; if (np->in_redir == NAT_MAP) { for (; nat; nat = nat->nat_next) if (nat->nat_outip.s_addr == ipaddr.s_addr && (!nat->nat_outport || (port == nat->nat_outport))) return nat; } else for (; nat; nat = nat->nat_next) if (nat->nat_outip.s_addr == ipaddr.s_addr && nat->nat_oip.s_addr == ip->ip_src.s_addr && port == nat->nat_outport && tcp->th_sport == nat->nat_oport) return nat; return NULL; } /* * Packets going out on the external interface go through this. * Here, the source address requires alteration, if anything. */ int ip_natout(ip, hlen, fin) ip_t *ip; int hlen; fr_info_t *fin; { register ipnat_t *np; register u_long ipa; register u_long sum1; tcphdr_t *tcp = NULL; nat_t *nat; u_short nflags = 0, sport = 0; struct ifnet *ifp = fin->fin_ifp; if (!(ip->ip_off & 0x1fff) && !(fin->fin_fi.fi_fl & FI_SHORT)) { if (ip->ip_p == IPPROTO_TCP) nflags = IPN_TCP; else if (ip->ip_p == IPPROTO_UDP) nflags = IPN_UDP; } if (nflags) { tcp = (tcphdr_t *)fin->fin_dp; sport = tcp->th_sport; } ipa = ip->ip_src.s_addr; MUTEX_ENTER(&ipf_nat); for (np = nat_list; np; np = np->in_next) if ((np->in_ifp == ifp) && np->in_space && (!np->in_flags || (np->in_flags & nflags)) && ((ipa & np->in_inmsk) == np->in_inip) && (np->in_redir == NAT_MAP || np->in_pnext == sport)) { /* * If there is no current entry in the nat table for * this IP#, create one for it. */ if (!(nat = nat_lookupinip(ip->ip_src, sport))) { if (np->in_redir == NAT_REDIRECT) continue; /* * if it's a redirection, then we don't want * to create new outgoing port stuff. * Redirections are only for incoming * connections. */ if (!(nat = nat_new(np, ip, fin, nflags & np->in_flags, NAT_OUTBOUND))) break; } else ip->ip_src = nat->nat_outip; nat->nat_age = fr_defnatage; /* 5 mins */ /* * Fix up checksums, not by recalculating them, but * simply computing adjustments. */ if (nflags && !(ip->ip_off & 0x1fff) && !(fin->fin_fi.fi_fl & FI_SHORT)) { u_short *sp; u_short sumshort; if (nat->nat_outport) tcp->th_sport = nat->nat_outport; if (ip->ip_p == IPPROTO_TCP) { sp = &tcp->th_sum; sum1 = (~ntohs(*sp)) & 0xffff; sum1 += nat->nat_sumd; sum1 = (sum1 >> 16) + (sum1 & 0xffff); /* Again */ sum1 = (sum1 >> 16) + (sum1 & 0xffff); sumshort = ~(u_short)sum1; *sp = htons(sumshort); set_tcp_age(&nat->nat_age, nat->nat_state, ip, fin, tcp->th_dport == nat->nat_inport); } else if (ip->ip_p == IPPROTO_UDP) { udphdr_t *udp = (udphdr_t *)tcp; sp = &udp->uh_sum; if (udp->uh_sum) { sum1 = (~ntohs(*sp)) & 0xffff; sum1 += nat->nat_sumd; sum1 = (sum1 >> 16) + (sum1 & 0xffff); /* Again */ sum1 = (sum1 >> 16) + (sum1 & 0xffff); sumshort = ~(u_short)sum1; *sp = htons(sumshort); } } } nat_stats.ns_mapped[1]++; MUTEX_EXIT(&ipf_nat); return 1; } MUTEX_EXIT(&ipf_nat); return 0; } /* * Packets coming in from the external interface go through this. * Here, the destination address requires alteration, if anything. */ int ip_natin(ip, hlen, fin) ip_t *ip; int hlen; fr_info_t *fin; { register ipnat_t *np; register struct in_addr in; register u_long sum1; struct ifnet *ifp = fin->fin_ifp; tcphdr_t *tcp = NULL; u_short port = 0, nflags = 0; nat_t *nat; if (!(ip->ip_off & 0x1fff) && !(fin->fin_fi.fi_fl & FI_SHORT)) { if (ip->ip_p == IPPROTO_TCP) nflags = IPN_TCP; else if (ip->ip_p == IPPROTO_UDP) nflags = IPN_UDP; } if (nflags) { tcp = (tcphdr_t *)((char *)ip + hlen); port = tcp->th_dport; } in = ip->ip_dst; MUTEX_ENTER(&ipf_nat); for (np = nat_list; np; np = np->in_next) if ((np->in_ifp == ifp) && (!np->in_flags || (nflags & np->in_flags)) && ((in.s_addr & np->in_outmsk) == np->in_outip) && (np->in_redir == NAT_MAP || np->in_pmin == port)) { if (!(nat = nat_lookupoutip(np, ip, tcp))) { if (np->in_redir == NAT_MAP) continue; else { /* * If this rule (np) is a redirection, * rather than a mapping, then do a * nat_new. Otherwise, if it's just a * mapping, do a continue; */ nflags &= np->in_flags; if (!(nat = nat_new(np, ip, fin, nflags, NAT_INBOUND))) break; } } else ip->ip_dst = nat->nat_inip; nat->nat_age = fr_defnatage; /* * Fix up checksums, not by recalculating them, but * simply computing adjustments. */ if (nflags && !(ip->ip_off & 0x1fff) && !(fin->fin_fi.fi_fl & FI_SHORT)) { u_short *sp; u_short sumshort; if (nat->nat_inport) tcp->th_dport = nat->nat_inport; if (ip->ip_p == IPPROTO_TCP) { sp = &tcp->th_sum; sum1 = (~ntohs(*sp)) & 0xffff; sum1 += ~nat->nat_sumd & 0xffff; sum1 = (sum1 >> 16) + (sum1 & 0xffff); /* Again */ sum1 = (sum1 >> 16) + (sum1 & 0xffff); sumshort = ~(u_short)sum1; *sp = htons(sumshort); set_tcp_age(&nat->nat_age, nat->nat_state, ip, fin, tcp->th_dport == nat->nat_inport); } else if (ip->ip_p == IPPROTO_UDP) { udphdr_t *udp = (udphdr_t *)tcp; sp = &udp->uh_sum; if (udp->uh_sum) { sum1 = (~ntohs(*sp)) & 0xffff; sum1+= ~nat->nat_sumd & 0xffff; sum1 = (sum1 >> 16) + (sum1 & 0xffff); /* Again */ sum1 = (sum1 >> 16) + (sum1 & 0xffff); sumshort = ~(u_short)sum1; *sp = htons(sumshort); } } } nat_stats.ns_mapped[0]++; MUTEX_EXIT(&ipf_nat); return 1; } MUTEX_EXIT(&ipf_nat); return 0; } /* * Free all memory used by NAT structures allocated at runtime. */ void ip_natunload() { register struct nat *nat, **natp; register struct ipnat *ipn, **ipnp; register int i; MUTEX_ENTER(&ipf_nat); for (i = 0; i < NAT_SIZE; i++) for (natp = &nat_table[0][i]; (nat = *natp); ) { *natp = nat->nat_next; if (!--nat->nat_use) KFREE(nat); } for (i = 0; i < NAT_SIZE; i++) for (natp = &nat_table[1][i]; (nat = *natp); ) { *natp = nat->nat_next; if (!--nat->nat_use) KFREE(nat); } for (ipnp = &nat_list; (ipn = *ipnp); ) { *ipnp = ipn->in_next; KFREE(ipn); } MUTEX_EXIT(&ipf_nat); } /* * Slowly expire held state for NAT entries. Timeouts are set in * expectation of this being called twice per second. */ void ip_natexpire() { register struct nat *nat, **natp; register int i; MUTEX_ENTER(&ipf_nat); for (i = 0; i < NAT_SIZE; i++) for (natp = &nat_table[0][i]; (nat = *natp); ) { if (nat->nat_age > 0) nat->nat_age--; if (!nat->nat_use || !nat->nat_age) { *natp = nat->nat_next; if (nat->nat_use) nat->nat_use--; if (!nat->nat_use) { KFREE(nat); nat_stats.ns_expire++; nat_inuse--; } } else natp = &nat->nat_next; } for (i = 0; i < NAT_SIZE; i++) for (natp = &nat_table[1][i]; (nat = *natp); ) { if (nat->nat_age > 0) nat->nat_age--; if (!nat->nat_use || !nat->nat_age) { *natp = nat->nat_next; if (nat->nat_use) nat->nat_use--; if (!nat->nat_use) { KFREE(nat); nat_stats.ns_expire++; nat_inuse--; } } else natp = &nat->nat_next; } MUTEX_EXIT(&ipf_nat); }