NetBSD/sys/netinet/in_proto.c

531 lines
14 KiB
C
Raw Normal View History

2009-02-28 21:31:12 +03:00
/* $NetBSD: in_proto.c,v 1.97 2009/02/28 18:31:12 pooka Exp $ */
/*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
* All rights reserved.
2002-06-09 20:33:36 +04:00
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the project nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
2002-06-09 20:33:36 +04:00
*
* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
1993-03-21 12:45:37 +03:00
/*
* Copyright (c) 1982, 1986, 1993
* The Regents of the University of California. All rights reserved.
1993-03-21 12:45:37 +03:00
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
1993-03-21 12:45:37 +03:00
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)in_proto.c 8.2 (Berkeley) 2/9/95
1993-03-21 12:45:37 +03:00
*/
2001-11-13 03:32:34 +03:00
#include <sys/cdefs.h>
2009-02-28 21:31:12 +03:00
__KERNEL_RCSID(0, "$NetBSD: in_proto.c,v 1.97 2009/02/28 18:31:12 pooka Exp $");
2001-11-13 03:32:34 +03:00
1998-01-12 06:02:48 +03:00
#include "opt_mrouting.h"
1998-07-05 08:37:35 +04:00
#include "opt_eon.h" /* ISO CLNL over IP */
#include "opt_iso.h" /* ISO TP tunneled over IP */
#include "opt_inet.h"
#include "opt_ipsec.h"
#include "opt_pim.h"
2009-02-28 21:31:12 +03:00
#include "opt_gateway.h"
1998-01-12 06:02:48 +03:00
1993-12-18 03:40:47 +03:00
#include <sys/param.h>
#include <sys/socket.h>
#include <sys/protosw.h>
#include <sys/domain.h>
#include <sys/mbuf.h>
1993-03-21 12:45:37 +03:00
#include <net/if.h>
#include <net/radix.h>
#include <net/route.h>
1993-12-18 03:40:47 +03:00
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/ip_var.h>
#include <netinet/ip_icmp.h>
#include <netinet/in_ifattach.h>
#include <netinet/in_pcb.h>
#include <netinet/in_proto.h>
#ifdef INET6
#ifndef INET
#include <netinet/in.h>
#endif
#include <netinet/ip6.h>
#endif
#include <netinet/igmp_var.h>
#ifdef PIM
#include <netinet/pim_var.h>
#endif
#include <netinet/tcp.h>
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#include <netinet/tcpip.h>
#include <netinet/tcp_debug.h>
#include <netinet/udp.h>
#include <netinet/udp_var.h>
#include <netinet/ip_encap.h>
1993-03-21 12:45:37 +03:00
/*
* TCP/IP protocol family: IP, ICMP, UDP, TCP.
*/
#ifdef IPSEC
#include <netinet6/ipsec.h>
#include <netinet6/ah.h>
#ifdef IPSEC_ESP
#include <netinet6/esp.h>
#endif
#include <netinet6/ipcomp.h>
#endif /* IPSEC */
#ifdef FAST_IPSEC
#include <netipsec/ipsec.h>
#include <netipsec/key.h>
#endif /* FAST_IPSEC */
1993-03-21 12:45:37 +03:00
#ifdef TPIP
1996-02-14 02:40:59 +03:00
#include <netiso/tp_param.h>
#include <netiso/tp_var.h>
#endif /* TPIP */
1993-03-21 12:45:37 +03:00
#ifdef EON
1996-02-14 02:40:59 +03:00
#include <netiso/eonvar.h>
#endif /* EON */
1993-03-21 12:45:37 +03:00
#include "carp.h"
#if NCARP > 0
#include <netinet/ip_carp.h>
#endif
#include "etherip.h"
#if NETHERIP > 0
#include <netinet/ip_etherip.h>
#endif
DOMAIN_DEFINE(inetdomain); /* forward declare and add to link set */
/* Wrappers to acquire kernel_lock. */
PR_WRAP_USRREQ(rip_usrreq)
PR_WRAP_USRREQ(udp_usrreq)
PR_WRAP_USRREQ(tcp_usrreq)
#define rip_usrreq rip_usrreq_wrapper
#define udp_usrreq udp_usrreq_wrapper
#define tcp_usrreq tcp_usrreq_wrapper
PR_WRAP_CTLINPUT(rip_ctlinput)
PR_WRAP_CTLINPUT(udp_ctlinput)
PR_WRAP_CTLINPUT(tcp_ctlinput)
#define rip_ctlinput rip_ctlinput_wrapper
#define udp_ctlinput udp_ctlinput_wrapper
#define tcp_ctlinput tcp_ctlinput_wrapper
PR_WRAP_CTLOUTPUT(rip_ctloutput)
PR_WRAP_CTLOUTPUT(udp_ctloutput)
PR_WRAP_CTLOUTPUT(tcp_ctloutput)
#define rip_ctloutput rip_ctloutput_wrapper
#define udp_ctloutput udp_ctloutput_wrapper
#define tcp_ctloutput tcp_ctloutput_wrapper
#if defined(IPSEC) || defined(FAST_IPSEC)
PR_WRAP_CTLINPUT(ah4_ctlinput)
#define ah4_ctlinput ah4_ctlinput_wrapper
#endif
#if defined(IPSEC_ESP) || defined(FAST_IPSEC)
PR_WRAP_CTLINPUT(esp4_ctlinput)
#define esp4_ctlinput esp4_ctlinput_wrapper
#endif
#ifdef TPIP
PR_WRAP_CTLOUTPUT(tp_ctloutput)
#define tp_ctloutput tp_ctloutput_wrapper
PR_WRAP_CTLINPUT(tpip_ctlinput)
#define tpip_ctlinput tpip_ctlinput_wrapper
#endif
#ifdef EON
PR_WRAP_CTLINPUT(eonctlinput)
#define eonctlinput eonctlinput_wrapper
#endif
const struct protosw inetsw[] = {
{ .pr_domain = &inetdomain,
.pr_init = ip_init,
.pr_output = ip_output,
.pr_slowtimo = ip_slowtimo,
.pr_drain = ip_drain,
1993-03-21 12:45:37 +03:00
},
{ .pr_type = SOCK_DGRAM,
.pr_domain = &inetdomain,
.pr_protocol = IPPROTO_UDP,
.pr_flags = PR_ATOMIC|PR_ADDR|PR_PURGEIF,
.pr_input = udp_input,
.pr_ctlinput = udp_ctlinput,
.pr_ctloutput = udp_ctloutput,
.pr_usrreq = udp_usrreq,
.pr_init = udp_init,
1993-03-21 12:45:37 +03:00
},
{ .pr_type = SOCK_STREAM,
.pr_domain = &inetdomain,
.pr_protocol = IPPROTO_TCP,
.pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ABRTACPTDIS|PR_PURGEIF,
.pr_input = tcp_input,
.pr_ctlinput = tcp_ctlinput,
.pr_ctloutput = tcp_ctloutput,
.pr_usrreq = tcp_usrreq,
.pr_init = tcp_init,
.pr_slowtimo = tcp_slowtimo,
.pr_drain = tcp_drain,
1993-03-21 12:45:37 +03:00
},
{ .pr_type = SOCK_RAW,
.pr_domain = &inetdomain,
.pr_protocol = IPPROTO_RAW,
.pr_flags = PR_ATOMIC|PR_ADDR|PR_PURGEIF,
.pr_input = rip_input,
.pr_output = rip_output,
.pr_ctlinput = rip_ctlinput,
.pr_ctloutput = rip_ctloutput,
.pr_usrreq = rip_usrreq,
1993-03-21 12:45:37 +03:00
},
{ .pr_type = SOCK_RAW,
.pr_domain = &inetdomain,
.pr_protocol = IPPROTO_ICMP,
.pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
.pr_input = icmp_input,
.pr_output = rip_output,
.pr_ctlinput = rip_ctlinput,
.pr_ctloutput = rip_ctloutput,
.pr_usrreq = rip_usrreq,
.pr_init = icmp_init,
},
#ifdef GATEWAY
{ .pr_domain = &inetdomain,
.pr_protocol = IPPROTO_IP,
.pr_slowtimo = ipflow_slowtimo,
.pr_init = ipflow_poolinit,
},
#endif /* GATEWAY */
#ifdef IPSEC
{ .pr_type = SOCK_RAW,
.pr_domain = &inetdomain,
.pr_protocol = IPPROTO_AH,
.pr_flags = PR_ATOMIC|PR_ADDR,
.pr_input = ah4_input,
.pr_ctlinput = ah4_ctlinput,
.pr_init = ah4_init,
},
#ifdef IPSEC_ESP
{ .pr_type = SOCK_RAW,
.pr_domain = &inetdomain,
.pr_protocol = IPPROTO_ESP,
.pr_flags = PR_ATOMIC|PR_ADDR,
.pr_input = esp4_input,
.pr_ctlinput = esp4_ctlinput,
.pr_init = esp4_init,
},
#endif /* IPSEC_ESP */
{ .pr_type = SOCK_RAW,
2007-03-05 02:53:36 +03:00
.pr_domain = &inetdomain,
.pr_protocol = IPPROTO_IPCOMP,
.pr_flags = PR_ATOMIC|PR_ADDR,
.pr_input = ipcomp4_input,
.pr_init = ipcomp4_init,
},
#endif /* IPSEC */
#ifdef FAST_IPSEC
{ .pr_type = SOCK_RAW,
.pr_domain = &inetdomain,
.pr_protocol = IPPROTO_AH,
.pr_flags = PR_ATOMIC|PR_ADDR,
.pr_input = ipsec4_common_input,
.pr_ctlinput = ah4_ctlinput,
},
{ .pr_type = SOCK_RAW,
.pr_domain = &inetdomain,
.pr_protocol = IPPROTO_ESP,
.pr_flags = PR_ATOMIC|PR_ADDR,
.pr_input = ipsec4_common_input,
.pr_ctlinput = esp4_ctlinput,
},
{ .pr_type = SOCK_RAW,
.pr_domain = &inetdomain,
.pr_protocol = IPPROTO_IPCOMP,
.pr_flags = PR_ATOMIC|PR_ADDR,
.pr_input = ipsec4_common_input,
},
#endif /* FAST_IPSEC */
{ .pr_type = SOCK_RAW,
.pr_domain = &inetdomain,
.pr_protocol = IPPROTO_IPV4,
.pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
.pr_input = encap4_input,
.pr_output = rip_output,
.pr_ctlinput = rip_ctlinput,
.pr_ctloutput = rip_ctloutput,
.pr_usrreq = rip_usrreq,
.pr_init = encap_init,
},
#ifdef INET6
{ .pr_type = SOCK_RAW,
.pr_domain = &inetdomain,
.pr_protocol = IPPROTO_IPV6,
.pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
.pr_input = encap4_input,
.pr_output = rip_output,
.pr_ctlinput = rip_ctlinput,
.pr_ctloutput = rip_ctloutput,
.pr_usrreq = rip_usrreq,
.pr_init = encap_init,
},
#endif /* INET6 */
#if NETHERIP > 0
{ .pr_type = SOCK_RAW,
.pr_domain = &inetdomain,
.pr_protocol = IPPROTO_ETHERIP,
.pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
.pr_input = ip_etherip_input,
.pr_output = rip_output,
.pr_ctlinput = rip_ctlinput,
.pr_ctloutput = rip_ctloutput,
.pr_usrreq = rip_usrreq,
},
#endif /* NETHERIP > 0 */
#if NCARP > 0
{ .pr_type = SOCK_RAW,
.pr_domain = &inetdomain,
.pr_protocol = IPPROTO_CARP,
.pr_flags = PR_ATOMIC|PR_ADDR,
.pr_input = carp_proto_input,
.pr_output = rip_output,
.pr_ctloutput = rip_ctloutput,
.pr_usrreq = rip_usrreq,
},
#endif /* NCARP > 0 */
{ .pr_type = SOCK_RAW,
.pr_domain = &inetdomain,
.pr_protocol = IPPROTO_IGMP,
.pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
.pr_input = igmp_input,
.pr_output = rip_output,
.pr_ctloutput = rip_ctloutput,
.pr_ctlinput = rip_ctlinput,
.pr_usrreq = rip_usrreq,
.pr_fasttimo = igmp_fasttimo,
.pr_slowtimo = igmp_slowtimo,
2008-04-15 20:02:03 +04:00
.pr_init = igmp_init,
1993-03-21 12:45:37 +03:00
},
#ifdef PIM
{ .pr_type = SOCK_RAW,
.pr_domain = &inetdomain,
.pr_protocol = IPPROTO_PIM,
.pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
.pr_input = pim_input,
.pr_output = rip_output,
.pr_ctloutput = rip_ctloutput,
.pr_ctlinput = rip_ctlinput,
.pr_usrreq = rip_usrreq,
},
#endif /* PIM */
1993-03-21 12:45:37 +03:00
#ifdef TPIP
{ .pr_type = SOCK_SEQPACKET,
.pr_domain = &inetdomain,
.pr_protocol = IPPROTO_TP,
.pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_LASTHDR|PR_ABRTACPTDIS,
.pr_input = tpip_input,
.pr_ctloutput = tp_ctloutput,
.pr_ctlinput = tpip_ctlinput,
.pr_usrreq = tp_usrreq,
.pr_init = tp_init,
.pr_slowtimo = tp_slowtimo,
.pr_drain = tp_drain,
1993-03-21 12:45:37 +03:00
},
#endif /* TPIP */
#ifdef ISO
1993-03-21 12:45:37 +03:00
/* EON (ISO CLNL over IP) */
#ifdef EON
{ .pr_type = SOCK_RAW,
.pr_domain = &inetdomain,
.pr_protocol = IPPROTO_EON,
.pr_flags = PR_LASTHDR,
.pr_input = eoninput,
.pr_ctlinput = eonctlinput,
.pr_init = eonprotoinit,
1993-03-21 12:45:37 +03:00
},
#else
{ .pr_type = SOCK_RAW,
.pr_domain = &inetdomain,
.pr_protocol = IPPROTO_EON,
.pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
.pr_input = encap4_input,
.pr_output = rip_output,
.pr_ctloutput = rip_ctloutput,
.pr_ctlinput = rip_ctlinput,
.pr_usrreq = rip_usrreq,
.pr_init = encap_init,
},
#endif /* EON */
#endif /* ISO */
/* raw wildcard */
{ .pr_type = SOCK_RAW,
.pr_domain = &inetdomain,
.pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
.pr_input = rip_input,
.pr_output = rip_output,
.pr_ctloutput = rip_ctloutput,
.pr_ctlinput = rip_ctlinput,
.pr_usrreq = rip_usrreq,
.pr_init = rip_init,
1993-03-21 12:45:37 +03:00
},
};
extern struct ifqueue ipintrq;
1) Introduce a new socket option, (SOL_SOCKET, SO_NOHEADER), that tells a socket that it should both add a protocol header to tx'd datagrams and remove the header from rx'd datagrams: int onoff = 1, s = socket(...); setsockopt(s, SOL_SOCKET, SO_NOHEADER, &onoff); 2) Add an implementation of (SOL_SOCKET, SO_NOHEADER) for raw IPv4 sockets. 3) Reorganize the protocols' pr_ctloutput implementations a bit. Consistently return ENOPROTOOPT when an option is unsupported, and EINVAL if a supported option's arguments are incorrect. Reorganize the flow of code so that it's more clear how/when options are passed down the stack until they are handled. Shorten some pr_ctloutput staircases for readability. 4) Extract common mbuf code into subroutines, add new sockaddr methods, and introduce a new subroutine, fsocreate(), for reuse later; use it first in sys_socket(): struct mbuf *m_getsombuf(struct socket *so) Create an mbuf and make its owner the socket `so'. struct mbuf *m_intopt(struct socket *so, int val) Create an mbuf, make its owner the socket `so', put the int `val' into it, and set its length to sizeof(int). int fsocreate(..., int *fd) Create a socket, a la socreate(9), put the socket into the given LWP's descriptor table, return the descriptor at `fd' on success. void *sockaddr_addr(struct sockaddr *sa, socklen_t *slenp) const void *sockaddr_const_addr(const struct sockaddr *sa, socklen_t *slenp) Extract a pointer to the address part of a sockaddr. Write the length of the address part at `slenp', if `slenp' is not NULL. socklen_t sockaddr_getlen(const struct sockaddr *sa) Return the length of a sockaddr. This just evaluates to sa->sa_len. I only add this for consistency with code that appears in a portable userland library that I am going to import. const struct sockaddr *sockaddr_any(const struct sockaddr *sa) Return the "don't care" sockaddr in the same family as `sa'. This is the address a client should sobind(9) if it does not care the source address and, if applicable, the port et cetera that it uses. const void *sockaddr_anyaddr(const struct sockaddr *sa, socklen_t *slenp) Return the "don't care" sockaddr in the same family as `sa'. This is the address a client should sobind(9) if it does not care the source address and, if applicable, the port et cetera that it uses.
2007-09-19 08:33:42 +04:00
const struct sockaddr_in in_any = {
.sin_len = sizeof(struct sockaddr_in)
, .sin_family = AF_INET
, .sin_port = 0
, .sin_addr = {.s_addr = 0 /* INADDR_ANY */}
1) Introduce a new socket option, (SOL_SOCKET, SO_NOHEADER), that tells a socket that it should both add a protocol header to tx'd datagrams and remove the header from rx'd datagrams: int onoff = 1, s = socket(...); setsockopt(s, SOL_SOCKET, SO_NOHEADER, &onoff); 2) Add an implementation of (SOL_SOCKET, SO_NOHEADER) for raw IPv4 sockets. 3) Reorganize the protocols' pr_ctloutput implementations a bit. Consistently return ENOPROTOOPT when an option is unsupported, and EINVAL if a supported option's arguments are incorrect. Reorganize the flow of code so that it's more clear how/when options are passed down the stack until they are handled. Shorten some pr_ctloutput staircases for readability. 4) Extract common mbuf code into subroutines, add new sockaddr methods, and introduce a new subroutine, fsocreate(), for reuse later; use it first in sys_socket(): struct mbuf *m_getsombuf(struct socket *so) Create an mbuf and make its owner the socket `so'. struct mbuf *m_intopt(struct socket *so, int val) Create an mbuf, make its owner the socket `so', put the int `val' into it, and set its length to sizeof(int). int fsocreate(..., int *fd) Create a socket, a la socreate(9), put the socket into the given LWP's descriptor table, return the descriptor at `fd' on success. void *sockaddr_addr(struct sockaddr *sa, socklen_t *slenp) const void *sockaddr_const_addr(const struct sockaddr *sa, socklen_t *slenp) Extract a pointer to the address part of a sockaddr. Write the length of the address part at `slenp', if `slenp' is not NULL. socklen_t sockaddr_getlen(const struct sockaddr *sa) Return the length of a sockaddr. This just evaluates to sa->sa_len. I only add this for consistency with code that appears in a portable userland library that I am going to import. const struct sockaddr *sockaddr_any(const struct sockaddr *sa) Return the "don't care" sockaddr in the same family as `sa'. This is the address a client should sobind(9) if it does not care the source address and, if applicable, the port et cetera that it uses. const void *sockaddr_anyaddr(const struct sockaddr *sa, socklen_t *slenp) Return the "don't care" sockaddr in the same family as `sa'. This is the address a client should sobind(9) if it does not care the source address and, if applicable, the port et cetera that it uses.
2007-09-19 08:33:42 +04:00
};
struct domain inetdomain = {
Here are various changes designed to protect against bad IPv4 routing caused by stale route caches (struct route). Route caches are sprinkled throughout PCBs, the IP fast-forwarding table, and IP tunnel interfaces (gre, gif, stf). Stale IPv6 and ISO route caches will be treated by separate patches. Thank you to Christoph Badura for suggesting the general approach to invalidating route caches that I take here. Here are the details: Add hooks to struct domain for tracking and for invalidating each domain's route caches: dom_rtcache, dom_rtflush, and dom_rtflushall. Introduce helper subroutines, rtflush(ro) for invalidating a route cache, rtflushall(family) for invalidating all route caches in a routing domain, and rtcache(ro) for notifying the domain of a new cached route. Chain together all IPv4 route caches where ro_rt != NULL. Provide in_rtcache() for adding a route to the chain. Provide in_rtflush() and in_rtflushall() for invalidating IPv4 route caches. In in_rtflush(), set ro_rt to NULL, and remove the route from the chain. In in_rtflushall(), walk the chain and remove every route cache. In rtrequest1(), call rtflushall() to invalidate route caches when a route is added. In gif(4), discard the workaround for stale caches that involves expiring them every so often. Replace the pattern 'RTFREE(ro->ro_rt); ro->ro_rt = NULL;' with a call to rtflush(ro). Update ipflow_fastforward() and all other users of route caches so that they expect a cached route, ro->ro_rt, to turn to NULL. Take care when moving a 'struct route' to rtflush() the source and to rtcache() the destination. In domain initializers, use .dom_xxx tags. KNF here and there.
2006-12-09 08:33:04 +03:00
.dom_family = PF_INET, .dom_name = "internet", .dom_init = NULL,
.dom_externalize = NULL, .dom_dispose = NULL,
.dom_protosw = inetsw,
2007-06-14 01:08:29 +04:00
.dom_protoswNPROTOSW = &inetsw[__arraycount(inetsw)],
Here are various changes designed to protect against bad IPv4 routing caused by stale route caches (struct route). Route caches are sprinkled throughout PCBs, the IP fast-forwarding table, and IP tunnel interfaces (gre, gif, stf). Stale IPv6 and ISO route caches will be treated by separate patches. Thank you to Christoph Badura for suggesting the general approach to invalidating route caches that I take here. Here are the details: Add hooks to struct domain for tracking and for invalidating each domain's route caches: dom_rtcache, dom_rtflush, and dom_rtflushall. Introduce helper subroutines, rtflush(ro) for invalidating a route cache, rtflushall(family) for invalidating all route caches in a routing domain, and rtcache(ro) for notifying the domain of a new cached route. Chain together all IPv4 route caches where ro_rt != NULL. Provide in_rtcache() for adding a route to the chain. Provide in_rtflush() and in_rtflushall() for invalidating IPv4 route caches. In in_rtflush(), set ro_rt to NULL, and remove the route from the chain. In in_rtflushall(), walk the chain and remove every route cache. In rtrequest1(), call rtflushall() to invalidate route caches when a route is added. In gif(4), discard the workaround for stale caches that involves expiring them every so often. Replace the pattern 'RTFREE(ro->ro_rt); ro->ro_rt = NULL;' with a call to rtflush(ro). Update ipflow_fastforward() and all other users of route caches so that they expect a cached route, ro->ro_rt, to turn to NULL. Take care when moving a 'struct route' to rtflush() the source and to rtcache() the destination. In domain initializers, use .dom_xxx tags. KNF here and there.
2006-12-09 08:33:04 +03:00
.dom_rtattach = rn_inithead,
.dom_rtoffset = 32,
.dom_maxrtkey = sizeof(struct ip_pack4),
#ifdef IPSELSRC
Here are various changes designed to protect against bad IPv4 routing caused by stale route caches (struct route). Route caches are sprinkled throughout PCBs, the IP fast-forwarding table, and IP tunnel interfaces (gre, gif, stf). Stale IPv6 and ISO route caches will be treated by separate patches. Thank you to Christoph Badura for suggesting the general approach to invalidating route caches that I take here. Here are the details: Add hooks to struct domain for tracking and for invalidating each domain's route caches: dom_rtcache, dom_rtflush, and dom_rtflushall. Introduce helper subroutines, rtflush(ro) for invalidating a route cache, rtflushall(family) for invalidating all route caches in a routing domain, and rtcache(ro) for notifying the domain of a new cached route. Chain together all IPv4 route caches where ro_rt != NULL. Provide in_rtcache() for adding a route to the chain. Provide in_rtflush() and in_rtflushall() for invalidating IPv4 route caches. In in_rtflush(), set ro_rt to NULL, and remove the route from the chain. In in_rtflushall(), walk the chain and remove every route cache. In rtrequest1(), call rtflushall() to invalidate route caches when a route is added. In gif(4), discard the workaround for stale caches that involves expiring them every so often. Replace the pattern 'RTFREE(ro->ro_rt); ro->ro_rt = NULL;' with a call to rtflush(ro). Update ipflow_fastforward() and all other users of route caches so that they expect a cached route, ro->ro_rt, to turn to NULL. Take care when moving a 'struct route' to rtflush() the source and to rtcache() the destination. In domain initializers, use .dom_xxx tags. KNF here and there.
2006-12-09 08:33:04 +03:00
.dom_ifattach = in_domifattach,
.dom_ifdetach = in_domifdetach,
#else
Here are various changes designed to protect against bad IPv4 routing caused by stale route caches (struct route). Route caches are sprinkled throughout PCBs, the IP fast-forwarding table, and IP tunnel interfaces (gre, gif, stf). Stale IPv6 and ISO route caches will be treated by separate patches. Thank you to Christoph Badura for suggesting the general approach to invalidating route caches that I take here. Here are the details: Add hooks to struct domain for tracking and for invalidating each domain's route caches: dom_rtcache, dom_rtflush, and dom_rtflushall. Introduce helper subroutines, rtflush(ro) for invalidating a route cache, rtflushall(family) for invalidating all route caches in a routing domain, and rtcache(ro) for notifying the domain of a new cached route. Chain together all IPv4 route caches where ro_rt != NULL. Provide in_rtcache() for adding a route to the chain. Provide in_rtflush() and in_rtflushall() for invalidating IPv4 route caches. In in_rtflush(), set ro_rt to NULL, and remove the route from the chain. In in_rtflushall(), walk the chain and remove every route cache. In rtrequest1(), call rtflushall() to invalidate route caches when a route is added. In gif(4), discard the workaround for stale caches that involves expiring them every so often. Replace the pattern 'RTFREE(ro->ro_rt); ro->ro_rt = NULL;' with a call to rtflush(ro). Update ipflow_fastforward() and all other users of route caches so that they expect a cached route, ro->ro_rt, to turn to NULL. Take care when moving a 'struct route' to rtflush() the source and to rtcache() the destination. In domain initializers, use .dom_xxx tags. KNF here and there.
2006-12-09 08:33:04 +03:00
.dom_ifattach = NULL,
.dom_ifdetach = NULL,
#endif
Here are various changes designed to protect against bad IPv4 routing caused by stale route caches (struct route). Route caches are sprinkled throughout PCBs, the IP fast-forwarding table, and IP tunnel interfaces (gre, gif, stf). Stale IPv6 and ISO route caches will be treated by separate patches. Thank you to Christoph Badura for suggesting the general approach to invalidating route caches that I take here. Here are the details: Add hooks to struct domain for tracking and for invalidating each domain's route caches: dom_rtcache, dom_rtflush, and dom_rtflushall. Introduce helper subroutines, rtflush(ro) for invalidating a route cache, rtflushall(family) for invalidating all route caches in a routing domain, and rtcache(ro) for notifying the domain of a new cached route. Chain together all IPv4 route caches where ro_rt != NULL. Provide in_rtcache() for adding a route to the chain. Provide in_rtflush() and in_rtflushall() for invalidating IPv4 route caches. In in_rtflush(), set ro_rt to NULL, and remove the route from the chain. In in_rtflushall(), walk the chain and remove every route cache. In rtrequest1(), call rtflushall() to invalidate route caches when a route is added. In gif(4), discard the workaround for stale caches that involves expiring them every so often. Replace the pattern 'RTFREE(ro->ro_rt); ro->ro_rt = NULL;' with a call to rtflush(ro). Update ipflow_fastforward() and all other users of route caches so that they expect a cached route, ro->ro_rt, to turn to NULL. Take care when moving a 'struct route' to rtflush() the source and to rtcache() the destination. In domain initializers, use .dom_xxx tags. KNF here and there.
2006-12-09 08:33:04 +03:00
.dom_ifqueues = { &ipintrq, NULL },
.dom_link = { NULL },
.dom_mowner = MOWNER_INIT("",""),
.dom_sa_cmpofs = offsetof(struct sockaddr_in, sin_addr),
.dom_sa_cmplen = sizeof(struct in_addr),
1) Introduce a new socket option, (SOL_SOCKET, SO_NOHEADER), that tells a socket that it should both add a protocol header to tx'd datagrams and remove the header from rx'd datagrams: int onoff = 1, s = socket(...); setsockopt(s, SOL_SOCKET, SO_NOHEADER, &onoff); 2) Add an implementation of (SOL_SOCKET, SO_NOHEADER) for raw IPv4 sockets. 3) Reorganize the protocols' pr_ctloutput implementations a bit. Consistently return ENOPROTOOPT when an option is unsupported, and EINVAL if a supported option's arguments are incorrect. Reorganize the flow of code so that it's more clear how/when options are passed down the stack until they are handled. Shorten some pr_ctloutput staircases for readability. 4) Extract common mbuf code into subroutines, add new sockaddr methods, and introduce a new subroutine, fsocreate(), for reuse later; use it first in sys_socket(): struct mbuf *m_getsombuf(struct socket *so) Create an mbuf and make its owner the socket `so'. struct mbuf *m_intopt(struct socket *so, int val) Create an mbuf, make its owner the socket `so', put the int `val' into it, and set its length to sizeof(int). int fsocreate(..., int *fd) Create a socket, a la socreate(9), put the socket into the given LWP's descriptor table, return the descriptor at `fd' on success. void *sockaddr_addr(struct sockaddr *sa, socklen_t *slenp) const void *sockaddr_const_addr(const struct sockaddr *sa, socklen_t *slenp) Extract a pointer to the address part of a sockaddr. Write the length of the address part at `slenp', if `slenp' is not NULL. socklen_t sockaddr_getlen(const struct sockaddr *sa) Return the length of a sockaddr. This just evaluates to sa->sa_len. I only add this for consistency with code that appears in a portable userland library that I am going to import. const struct sockaddr *sockaddr_any(const struct sockaddr *sa) Return the "don't care" sockaddr in the same family as `sa'. This is the address a client should sobind(9) if it does not care the source address and, if applicable, the port et cetera that it uses. const void *sockaddr_anyaddr(const struct sockaddr *sa, socklen_t *slenp) Return the "don't care" sockaddr in the same family as `sa'. This is the address a client should sobind(9) if it does not care the source address and, if applicable, the port et cetera that it uses.
2007-09-19 08:33:42 +04:00
.dom_sa_any = (const struct sockaddr *)&in_any,
.dom_sockaddr_const_addr = sockaddr_in_const_addr,
.dom_sockaddr_addr = sockaddr_in_addr,
Eliminate address family-specific route caches (struct route, struct route_in6, struct route_iso), replacing all caches with a struct route. The principle benefit of this change is that all of the protocol families can benefit from route cache-invalidation, which is necessary for correct routing. Route-cache invalidation fixes an ancient PR, kern/3508, at long last; it fixes various other PRs, also. Discussions with and ideas from Joerg Sonnenberger influenced this work tremendously. Of course, all design oversights and bugs are mine. DETAILS 1 I added to each address family a pool of sockaddrs. I have introduced routines for allocating, copying, and duplicating, and freeing sockaddrs: struct sockaddr *sockaddr_alloc(sa_family_t af, int flags); struct sockaddr *sockaddr_copy(struct sockaddr *dst, const struct sockaddr *src); struct sockaddr *sockaddr_dup(const struct sockaddr *src, int flags); void sockaddr_free(struct sockaddr *sa); sockaddr_alloc() returns either a sockaddr from the pool belonging to the specified family, or NULL if the pool is exhausted. The returned sockaddr has the right size for that family; sa_family and sa_len fields are initialized to the family and sockaddr length---e.g., sa_family = AF_INET and sa_len = sizeof(struct sockaddr_in). sockaddr_free() puts the given sockaddr back into its family's pool. sockaddr_dup() and sockaddr_copy() work analogously to strdup() and strcpy(), respectively. sockaddr_copy() KASSERTs that the family of the destination and source sockaddrs are alike. The 'flags' argumet for sockaddr_alloc() and sockaddr_dup() is passed directly to pool_get(9). 2 I added routines for initializing sockaddrs in each address family, sockaddr_in_init(), sockaddr_in6_init(), sockaddr_iso_init(), etc. They are fairly self-explanatory. 3 structs route_in6 and route_iso are no more. All protocol families use struct route. I have changed the route cache, 'struct route', so that it does not contain storage space for a sockaddr. Instead, struct route points to a sockaddr coming from the pool the sockaddr belongs to. I added a new method to struct route, rtcache_setdst(), for setting the cache destination: int rtcache_setdst(struct route *, const struct sockaddr *); rtcache_setdst() returns 0 on success, or ENOMEM if no memory is available to create the sockaddr storage. It is now possible for rtcache_getdst() to return NULL if, say, rtcache_setdst() failed. I check the return value for NULL everywhere in the kernel. 4 Each routing domain (struct domain) has a list of live route caches, dom_rtcache. rtflushall(sa_family_t af) looks up the domain indicated by 'af', walks the domain's list of route caches and invalidates each one.
2007-05-03 00:40:22 +04:00
.dom_rtcache = LIST_HEAD_INITIALIZER(inetdomain.dom_rtcache)
};
1993-03-21 12:45:37 +03:00
u_char ip_protox[IPPROTO_MAX];
int icmperrppslim = 100; /* 100pps */
Eliminate address family-specific route caches (struct route, struct route_in6, struct route_iso), replacing all caches with a struct route. The principle benefit of this change is that all of the protocol families can benefit from route cache-invalidation, which is necessary for correct routing. Route-cache invalidation fixes an ancient PR, kern/3508, at long last; it fixes various other PRs, also. Discussions with and ideas from Joerg Sonnenberger influenced this work tremendously. Of course, all design oversights and bugs are mine. DETAILS 1 I added to each address family a pool of sockaddrs. I have introduced routines for allocating, copying, and duplicating, and freeing sockaddrs: struct sockaddr *sockaddr_alloc(sa_family_t af, int flags); struct sockaddr *sockaddr_copy(struct sockaddr *dst, const struct sockaddr *src); struct sockaddr *sockaddr_dup(const struct sockaddr *src, int flags); void sockaddr_free(struct sockaddr *sa); sockaddr_alloc() returns either a sockaddr from the pool belonging to the specified family, or NULL if the pool is exhausted. The returned sockaddr has the right size for that family; sa_family and sa_len fields are initialized to the family and sockaddr length---e.g., sa_family = AF_INET and sa_len = sizeof(struct sockaddr_in). sockaddr_free() puts the given sockaddr back into its family's pool. sockaddr_dup() and sockaddr_copy() work analogously to strdup() and strcpy(), respectively. sockaddr_copy() KASSERTs that the family of the destination and source sockaddrs are alike. The 'flags' argumet for sockaddr_alloc() and sockaddr_dup() is passed directly to pool_get(9). 2 I added routines for initializing sockaddrs in each address family, sockaddr_in_init(), sockaddr_in6_init(), sockaddr_iso_init(), etc. They are fairly self-explanatory. 3 structs route_in6 and route_iso are no more. All protocol families use struct route. I have changed the route cache, 'struct route', so that it does not contain storage space for a sockaddr. Instead, struct route points to a sockaddr coming from the pool the sockaddr belongs to. I added a new method to struct route, rtcache_setdst(), for setting the cache destination: int rtcache_setdst(struct route *, const struct sockaddr *); rtcache_setdst() returns 0 on success, or ENOMEM if no memory is available to create the sockaddr storage. It is now possible for rtcache_getdst() to return NULL if, say, rtcache_setdst() failed. I check the return value for NULL everywhere in the kernel. 4 Each routing domain (struct domain) has a list of live route caches, dom_rtcache. rtflushall(sa_family_t af) looks up the domain indicated by 'af', walks the domain's list of route caches and invalidates each one.
2007-05-03 00:40:22 +04:00
1) Introduce a new socket option, (SOL_SOCKET, SO_NOHEADER), that tells a socket that it should both add a protocol header to tx'd datagrams and remove the header from rx'd datagrams: int onoff = 1, s = socket(...); setsockopt(s, SOL_SOCKET, SO_NOHEADER, &onoff); 2) Add an implementation of (SOL_SOCKET, SO_NOHEADER) for raw IPv4 sockets. 3) Reorganize the protocols' pr_ctloutput implementations a bit. Consistently return ENOPROTOOPT when an option is unsupported, and EINVAL if a supported option's arguments are incorrect. Reorganize the flow of code so that it's more clear how/when options are passed down the stack until they are handled. Shorten some pr_ctloutput staircases for readability. 4) Extract common mbuf code into subroutines, add new sockaddr methods, and introduce a new subroutine, fsocreate(), for reuse later; use it first in sys_socket(): struct mbuf *m_getsombuf(struct socket *so) Create an mbuf and make its owner the socket `so'. struct mbuf *m_intopt(struct socket *so, int val) Create an mbuf, make its owner the socket `so', put the int `val' into it, and set its length to sizeof(int). int fsocreate(..., int *fd) Create a socket, a la socreate(9), put the socket into the given LWP's descriptor table, return the descriptor at `fd' on success. void *sockaddr_addr(struct sockaddr *sa, socklen_t *slenp) const void *sockaddr_const_addr(const struct sockaddr *sa, socklen_t *slenp) Extract a pointer to the address part of a sockaddr. Write the length of the address part at `slenp', if `slenp' is not NULL. socklen_t sockaddr_getlen(const struct sockaddr *sa) Return the length of a sockaddr. This just evaluates to sa->sa_len. I only add this for consistency with code that appears in a portable userland library that I am going to import. const struct sockaddr *sockaddr_any(const struct sockaddr *sa) Return the "don't care" sockaddr in the same family as `sa'. This is the address a client should sobind(9) if it does not care the source address and, if applicable, the port et cetera that it uses. const void *sockaddr_anyaddr(const struct sockaddr *sa, socklen_t *slenp) Return the "don't care" sockaddr in the same family as `sa'. This is the address a client should sobind(9) if it does not care the source address and, if applicable, the port et cetera that it uses.
2007-09-19 08:33:42 +04:00
static void
sockaddr_in_addrlen(const struct sockaddr *sa, socklen_t *slenp)
{
socklen_t slen;
if (slenp == NULL)
return;
slen = sockaddr_getlen(sa);
*slenp = (socklen_t)MIN(sizeof(struct in_addr),
slen - MIN(slen, offsetof(struct sockaddr_in, sin_addr)));
}
const void *
sockaddr_in_const_addr(const struct sockaddr *sa, socklen_t *slenp)
{
const struct sockaddr_in *sin;
sockaddr_in_addrlen(sa, slenp);
sin = (const struct sockaddr_in *)sa;
return &sin->sin_addr;
}
void *
sockaddr_in_addr(struct sockaddr *sa, socklen_t *slenp)
{
struct sockaddr_in *sin;
sockaddr_in_addrlen(sa, slenp);
sin = (struct sockaddr_in *)sa;
return &sin->sin_addr;
}
Eliminate address family-specific route caches (struct route, struct route_in6, struct route_iso), replacing all caches with a struct route. The principle benefit of this change is that all of the protocol families can benefit from route cache-invalidation, which is necessary for correct routing. Route-cache invalidation fixes an ancient PR, kern/3508, at long last; it fixes various other PRs, also. Discussions with and ideas from Joerg Sonnenberger influenced this work tremendously. Of course, all design oversights and bugs are mine. DETAILS 1 I added to each address family a pool of sockaddrs. I have introduced routines for allocating, copying, and duplicating, and freeing sockaddrs: struct sockaddr *sockaddr_alloc(sa_family_t af, int flags); struct sockaddr *sockaddr_copy(struct sockaddr *dst, const struct sockaddr *src); struct sockaddr *sockaddr_dup(const struct sockaddr *src, int flags); void sockaddr_free(struct sockaddr *sa); sockaddr_alloc() returns either a sockaddr from the pool belonging to the specified family, or NULL if the pool is exhausted. The returned sockaddr has the right size for that family; sa_family and sa_len fields are initialized to the family and sockaddr length---e.g., sa_family = AF_INET and sa_len = sizeof(struct sockaddr_in). sockaddr_free() puts the given sockaddr back into its family's pool. sockaddr_dup() and sockaddr_copy() work analogously to strdup() and strcpy(), respectively. sockaddr_copy() KASSERTs that the family of the destination and source sockaddrs are alike. The 'flags' argumet for sockaddr_alloc() and sockaddr_dup() is passed directly to pool_get(9). 2 I added routines for initializing sockaddrs in each address family, sockaddr_in_init(), sockaddr_in6_init(), sockaddr_iso_init(), etc. They are fairly self-explanatory. 3 structs route_in6 and route_iso are no more. All protocol families use struct route. I have changed the route cache, 'struct route', so that it does not contain storage space for a sockaddr. Instead, struct route points to a sockaddr coming from the pool the sockaddr belongs to. I added a new method to struct route, rtcache_setdst(), for setting the cache destination: int rtcache_setdst(struct route *, const struct sockaddr *); rtcache_setdst() returns 0 on success, or ENOMEM if no memory is available to create the sockaddr storage. It is now possible for rtcache_getdst() to return NULL if, say, rtcache_setdst() failed. I check the return value for NULL everywhere in the kernel. 4 Each routing domain (struct domain) has a list of live route caches, dom_rtcache. rtflushall(sa_family_t af) looks up the domain indicated by 'af', walks the domain's list of route caches and invalidates each one.
2007-05-03 00:40:22 +04:00
int
sockaddr_in_cmp(const struct sockaddr *sa1, const struct sockaddr *sa2)
{
uint_fast8_t len;
const uint_fast8_t addrofs = offsetof(struct sockaddr_in, sin_addr),
addrend = addrofs + sizeof(struct in_addr);
int rc;
const struct sockaddr_in *sin1, *sin2;
sin1 = satocsin(sa1);
sin2 = satocsin(sa2);
len = MIN(addrend, MIN(sin1->sin_len, sin2->sin_len));
if (len > addrofs &&
(rc = memcmp(&sin1->sin_addr, &sin2->sin_addr,
len - addrofs)) != 0)
return rc;
return sin1->sin_len - sin2->sin_len;
}