2013-11-23 18:20:21 +04:00
|
|
|
/* $NetBSD: raw_ip6.c,v 1.112 2013/11/23 14:20:22 christos Exp $ */
|
2001-07-23 23:29:53 +04:00
|
|
|
/* $KAME: raw_ip6.c,v 1.82 2001/07/23 18:57:56 jinmei Exp $ */
|
1999-07-04 01:24:45 +04:00
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
/*
|
|
|
|
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
|
|
|
|
* All rights reserved.
|
2000-07-07 19:54:16 +04:00
|
|
|
*
|
1999-06-28 10:36:47 +04:00
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* 3. Neither the name of the project nor the names of its contributors
|
|
|
|
* may be used to endorse or promote products derived from this software
|
|
|
|
* without specific prior written permission.
|
2000-07-07 19:54:16 +04:00
|
|
|
*
|
1999-06-28 10:36:47 +04:00
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Copyright (c) 1982, 1986, 1988, 1993
|
|
|
|
* The Regents of the University of California. All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
2003-08-07 20:26:28 +04:00
|
|
|
* 3. Neither the name of the University nor the names of its contributors
|
1999-06-28 10:36:47 +04:00
|
|
|
* may be used to endorse or promote products derived from this software
|
|
|
|
* without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*
|
|
|
|
* @(#)raw_ip.c 8.2 (Berkeley) 1/4/94
|
|
|
|
*/
|
|
|
|
|
2001-11-13 03:56:55 +03:00
|
|
|
#include <sys/cdefs.h>
|
2013-11-23 18:20:21 +04:00
|
|
|
__KERNEL_RCSID(0, "$NetBSD: raw_ip6.c,v 1.112 2013/11/23 14:20:22 christos Exp $");
|
2001-11-13 03:56:55 +03:00
|
|
|
|
1999-07-10 02:57:15 +04:00
|
|
|
#include "opt_ipsec.h"
|
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
#include <sys/param.h>
|
2005-03-09 08:07:19 +03:00
|
|
|
#include <sys/sysctl.h>
|
1999-06-28 10:36:47 +04:00
|
|
|
#include <sys/malloc.h>
|
|
|
|
#include <sys/mbuf.h>
|
|
|
|
#include <sys/socket.h>
|
|
|
|
#include <sys/protosw.h>
|
|
|
|
#include <sys/socketvar.h>
|
|
|
|
#include <sys/errno.h>
|
|
|
|
#include <sys/systm.h>
|
|
|
|
#include <sys/proc.h>
|
2006-05-15 01:19:33 +04:00
|
|
|
#include <sys/kauth.h>
|
1999-06-28 10:36:47 +04:00
|
|
|
|
|
|
|
#include <net/if.h>
|
|
|
|
#include <net/route.h>
|
|
|
|
#include <net/if_types.h>
|
2008-04-23 10:09:04 +04:00
|
|
|
#include <net/net_stats.h>
|
1999-06-28 10:36:47 +04:00
|
|
|
|
|
|
|
#include <netinet/in.h>
|
|
|
|
#include <netinet/in_var.h>
|
2000-02-06 15:49:37 +03:00
|
|
|
#include <netinet/ip6.h>
|
1999-06-28 10:36:47 +04:00
|
|
|
#include <netinet6/ip6_var.h>
|
2008-04-15 07:57:04 +04:00
|
|
|
#include <netinet6/ip6_private.h>
|
1999-06-28 10:36:47 +04:00
|
|
|
#include <netinet6/ip6_mroute.h>
|
2000-02-06 15:49:37 +03:00
|
|
|
#include <netinet/icmp6.h>
|
2008-04-15 07:57:04 +04:00
|
|
|
#include <netinet6/icmp6_private.h>
|
1999-06-28 10:36:47 +04:00
|
|
|
#include <netinet6/in6_pcb.h>
|
|
|
|
#include <netinet6/nd6.h>
|
2000-02-26 12:09:17 +03:00
|
|
|
#include <netinet6/ip6protosw.h>
|
2000-07-07 19:54:16 +04:00
|
|
|
#include <netinet6/scope6_var.h>
|
2001-10-18 13:12:13 +04:00
|
|
|
#include <netinet6/raw_ip6.h>
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2013-06-05 23:01:26 +04:00
|
|
|
#ifdef IPSEC
|
2007-02-10 12:43:05 +03:00
|
|
|
#include <netipsec/ipsec.h>
|
2008-04-23 10:09:04 +04:00
|
|
|
#include <netipsec/ipsec_var.h>
|
|
|
|
#include <netipsec/ipsec_private.h>
|
2007-02-10 12:43:05 +03:00
|
|
|
#include <netipsec/ipsec6.h>
|
|
|
|
#endif
|
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
#include "faith.h"
|
2001-05-08 14:15:13 +04:00
|
|
|
#if defined(NFAITH) && 0 < NFAITH
|
|
|
|
#include <net/if_faith.h>
|
|
|
|
#endif
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2003-09-04 13:16:57 +04:00
|
|
|
extern struct inpcbtable rawcbtable;
|
|
|
|
struct inpcbtable raw6cbtable;
|
1999-06-28 10:36:47 +04:00
|
|
|
#define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa))
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Raw interface to IP6 protocol.
|
|
|
|
*/
|
|
|
|
|
2008-04-15 09:13:37 +04:00
|
|
|
static percpu_t *rip6stat_percpu;
|
|
|
|
|
2008-04-23 10:09:04 +04:00
|
|
|
#define RIP6_STATINC(x) _NET_STATINC(rip6stat_percpu, x)
|
2001-10-18 13:12:13 +04:00
|
|
|
|
2009-09-16 19:23:04 +04:00
|
|
|
static void sysctl_net_inet6_raw6_setup(struct sysctllog **);
|
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
/*
|
|
|
|
* Initialize raw connection block queue.
|
|
|
|
*/
|
|
|
|
void
|
2009-03-16 00:23:31 +03:00
|
|
|
rip6_init(void)
|
1999-06-28 10:36:47 +04:00
|
|
|
{
|
2002-06-08 02:08:41 +04:00
|
|
|
|
2009-09-16 19:23:04 +04:00
|
|
|
sysctl_net_inet6_raw6_setup(NULL);
|
2003-09-04 13:16:57 +04:00
|
|
|
in6_pcbinit(&raw6cbtable, 1, 1);
|
2008-04-15 09:13:37 +04:00
|
|
|
|
|
|
|
rip6stat_percpu = percpu_alloc(sizeof(uint64_t) * RIP6_NSTATS);
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Setup generic address and protocol structures
|
|
|
|
* for raw_input routine, then pass them along with
|
|
|
|
* mbuf chain.
|
|
|
|
*/
|
|
|
|
int
|
2007-05-23 21:14:59 +04:00
|
|
|
rip6_input(struct mbuf **mp, int *offp, int proto)
|
1999-06-28 10:36:47 +04:00
|
|
|
{
|
|
|
|
struct mbuf *m = *mp;
|
2001-02-08 21:43:17 +03:00
|
|
|
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
|
2003-09-04 13:16:57 +04:00
|
|
|
struct inpcb_hdr *inph;
|
2001-02-08 21:43:17 +03:00
|
|
|
struct in6pcb *in6p;
|
1999-06-28 10:36:47 +04:00
|
|
|
struct in6pcb *last = NULL;
|
|
|
|
struct sockaddr_in6 rip6src;
|
|
|
|
struct mbuf *opts = NULL;
|
|
|
|
|
2008-04-15 09:13:37 +04:00
|
|
|
RIP6_STATINC(RIP6_STAT_IPACKETS);
|
2001-10-18 13:12:13 +04:00
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
#if defined(NFAITH) && 0 < NFAITH
|
2001-05-08 14:15:13 +04:00
|
|
|
if (faithprefix(&ip6->ip6_dst)) {
|
|
|
|
/* send icmp6 host unreach? */
|
|
|
|
m_freem(m);
|
|
|
|
return IPPROTO_DONE;
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
#endif
|
1999-12-22 07:03:01 +03:00
|
|
|
|
|
|
|
/* Be proactive about malicious use of IPv4 mapped address */
|
|
|
|
if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) ||
|
|
|
|
IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) {
|
|
|
|
/* XXX stat */
|
|
|
|
m_freem(m);
|
|
|
|
return IPPROTO_DONE;
|
|
|
|
}
|
|
|
|
|
2007-11-07 02:40:38 +03:00
|
|
|
sockaddr_in6_init(&rip6src, &ip6->ip6_src, 0, 0, 0);
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
if (sa6_recoverscope(&rip6src) != 0) {
|
|
|
|
/* XXX: should be impossible. */
|
|
|
|
m_freem(m);
|
|
|
|
return IPPROTO_DONE;
|
|
|
|
}
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2013-11-23 18:20:21 +04:00
|
|
|
TAILQ_FOREACH(inph, &raw6cbtable.inpt_queue, inph_queue) {
|
2003-09-04 13:16:57 +04:00
|
|
|
in6p = (struct in6pcb *)inph;
|
|
|
|
if (in6p->in6p_af != AF_INET6)
|
|
|
|
continue;
|
1999-06-28 10:36:47 +04:00
|
|
|
if (in6p->in6p_ip6.ip6_nxt &&
|
|
|
|
in6p->in6p_ip6.ip6_nxt != proto)
|
|
|
|
continue;
|
1999-12-13 18:17:17 +03:00
|
|
|
if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) &&
|
2001-10-18 13:12:13 +04:00
|
|
|
!IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst))
|
1999-06-28 10:36:47 +04:00
|
|
|
continue;
|
1999-12-13 18:17:17 +03:00
|
|
|
if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) &&
|
2001-10-18 13:12:13 +04:00
|
|
|
!IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src))
|
1999-06-28 10:36:47 +04:00
|
|
|
continue;
|
2001-10-18 13:12:13 +04:00
|
|
|
if (in6p->in6p_cksum != -1) {
|
2008-04-15 09:13:37 +04:00
|
|
|
RIP6_STATINC(RIP6_STAT_ISUM);
|
2004-04-22 21:58:59 +04:00
|
|
|
if (in6_cksum(m, proto, *offp,
|
2001-10-18 13:12:13 +04:00
|
|
|
m->m_pkthdr.len - *offp)) {
|
2008-04-15 09:13:37 +04:00
|
|
|
RIP6_STATINC(RIP6_STAT_BADSUM);
|
2001-10-18 13:12:13 +04:00
|
|
|
continue;
|
|
|
|
}
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
if (last) {
|
|
|
|
struct mbuf *n;
|
2001-02-26 10:20:44 +03:00
|
|
|
|
2013-06-05 23:01:26 +04:00
|
|
|
#ifdef IPSEC
|
2007-02-10 12:43:05 +03:00
|
|
|
/*
|
|
|
|
* Check AH/ESP integrity
|
|
|
|
*/
|
|
|
|
if (!ipsec6_in_reject(m,last))
|
2013-06-05 23:01:26 +04:00
|
|
|
#endif /* IPSEC */
|
1999-06-28 10:36:47 +04:00
|
|
|
if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) {
|
|
|
|
if (last->in6p_flags & IN6P_CONTROLOPTS)
|
|
|
|
ip6_savecontrol(last, &opts, ip6, n);
|
|
|
|
/* strip intermediate headers */
|
|
|
|
m_adj(n, *offp);
|
|
|
|
if (sbappendaddr(&last->in6p_socket->so_rcv,
|
2002-06-08 02:03:02 +04:00
|
|
|
(struct sockaddr *)&rip6src, n, opts) == 0) {
|
1999-06-28 10:36:47 +04:00
|
|
|
/* should notify about lost packet */
|
|
|
|
m_freem(n);
|
|
|
|
if (opts)
|
|
|
|
m_freem(opts);
|
2008-04-15 09:13:37 +04:00
|
|
|
RIP6_STATINC(RIP6_STAT_FULLSOCK);
|
1999-06-28 10:36:47 +04:00
|
|
|
} else
|
|
|
|
sorwakeup(last->in6p_socket);
|
|
|
|
opts = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
last = in6p;
|
|
|
|
}
|
2013-06-05 23:01:26 +04:00
|
|
|
#ifdef IPSEC
|
2007-02-10 12:43:05 +03:00
|
|
|
if (last && ipsec6_in_reject(m, last)) {
|
|
|
|
m_freem(m);
|
|
|
|
/*
|
|
|
|
* XXX ipsec6_in_reject update stat if there is an error
|
|
|
|
* so we just need to update stats by hand in the case of last is
|
|
|
|
* NULL
|
|
|
|
*/
|
|
|
|
if (!last)
|
2008-04-23 10:09:04 +04:00
|
|
|
IPSEC6_STATINC(IPSEC_STAT_IN_POLVIO);
|
2008-04-15 07:57:04 +04:00
|
|
|
IP6_STATDEC(IP6_STAT_DELIVERED);
|
2007-02-10 12:43:05 +03:00
|
|
|
/* do not inject data into pcb */
|
|
|
|
} else
|
2013-06-05 23:01:26 +04:00
|
|
|
#endif /* IPSEC */
|
1999-06-28 10:36:47 +04:00
|
|
|
if (last) {
|
|
|
|
if (last->in6p_flags & IN6P_CONTROLOPTS)
|
|
|
|
ip6_savecontrol(last, &opts, ip6, m);
|
|
|
|
/* strip intermediate headers */
|
|
|
|
m_adj(m, *offp);
|
|
|
|
if (sbappendaddr(&last->in6p_socket->so_rcv,
|
2002-06-08 02:03:02 +04:00
|
|
|
(struct sockaddr *)&rip6src, m, opts) == 0) {
|
1999-06-28 10:36:47 +04:00
|
|
|
m_freem(m);
|
|
|
|
if (opts)
|
|
|
|
m_freem(opts);
|
2008-04-15 09:13:37 +04:00
|
|
|
RIP6_STATINC(RIP6_STAT_FULLSOCK);
|
1999-06-28 10:36:47 +04:00
|
|
|
} else
|
|
|
|
sorwakeup(last->in6p_socket);
|
|
|
|
} else {
|
2008-04-15 09:13:37 +04:00
|
|
|
RIP6_STATINC(RIP6_STAT_NOSOCK);
|
2001-10-18 13:12:13 +04:00
|
|
|
if (m->m_flags & M_MCAST)
|
2008-04-15 09:13:37 +04:00
|
|
|
RIP6_STATINC(RIP6_STAT_NOSOCKMCAST);
|
1999-06-28 10:36:47 +04:00
|
|
|
if (proto == IPPROTO_NONE)
|
|
|
|
m_freem(m);
|
|
|
|
else {
|
2002-09-11 06:41:19 +04:00
|
|
|
u_int8_t *prvnxtp = ip6_get_prevhdr(m, *offp); /* XXX */
|
2001-02-08 21:43:17 +03:00
|
|
|
in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_protounknown);
|
1999-06-28 10:36:47 +04:00
|
|
|
icmp6_error(m, ICMP6_PARAM_PROB,
|
2002-06-08 02:03:02 +04:00
|
|
|
ICMP6_PARAMPROB_NEXTHEADER,
|
2002-09-11 06:41:19 +04:00
|
|
|
prvnxtp - mtod(m, u_int8_t *));
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
2008-04-15 07:57:04 +04:00
|
|
|
IP6_STATDEC(IP6_STAT_DELIVERED);
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
return IPPROTO_DONE;
|
|
|
|
}
|
|
|
|
|
2008-04-24 15:38:36 +04:00
|
|
|
void *
|
KNF: de-__P, bzero -> memset, bcmp -> memcmp. Remove extraneous
parentheses in return statements.
Cosmetic: don't open-code TAILQ_FOREACH().
Cosmetic: change types of variables to avoid oodles of casts: in
in6_src.c, avoid casts by changing several route_in6 pointers
to struct route pointers. Remove unnecessary casts to caddr_t
elsewhere.
Pave the way for eliminating address family-specific route caches:
soon, struct route will not embed a sockaddr, but it will hold
a reference to an external sockaddr, instead. We will set the
destination sockaddr using rtcache_setdst(). (I created a stub
for it, but it isn't used anywhere, yet.) rtcache_free() will
free the sockaddr. I have extracted from rtcache_free() a helper
subroutine, rtcache_clear(). rtcache_clear() will "forget" a
cached route, but it will not forget the destination by releasing
the sockaddr. I use rtcache_clear() instead of rtcache_free()
in rtcache_update(), because rtcache_update() is not supposed
to forget the destination.
Constify:
1 Introduce const accessor for route->ro_dst, rtcache_getdst().
2 Constify the 'dst' argument to ifnet->if_output(). This
led me to constify a lot of code called by output routines.
3 Constify the sockaddr argument to protosw->pr_ctlinput. This
led me to constify a lot of code called by ctlinput routines.
4 Introduce const macros for converting from a generic sockaddr
to family-specific sockaddrs, e.g., sockaddr_in: satocsin6,
satocsin, et cetera.
2007-02-18 01:34:07 +03:00
|
|
|
rip6_ctlinput(int cmd, const struct sockaddr *sa, void *d)
|
2000-02-26 12:09:17 +03:00
|
|
|
{
|
2001-02-08 21:43:17 +03:00
|
|
|
struct ip6_hdr *ip6;
|
2001-02-11 09:49:49 +03:00
|
|
|
struct ip6ctlparam *ip6cp = NULL;
|
|
|
|
const struct sockaddr_in6 *sa6_src = NULL;
|
|
|
|
void *cmdarg;
|
2007-11-01 23:33:56 +03:00
|
|
|
void (*notify)(struct in6pcb *, int) = in6_rtchange;
|
2001-02-11 09:49:49 +03:00
|
|
|
int nxt;
|
2000-02-26 12:09:17 +03:00
|
|
|
|
|
|
|
if (sa->sa_family != AF_INET6 ||
|
|
|
|
sa->sa_len != sizeof(struct sockaddr_in6))
|
2009-01-03 06:43:21 +03:00
|
|
|
return NULL;
|
2000-02-26 12:09:17 +03:00
|
|
|
|
2000-02-28 19:10:52 +03:00
|
|
|
if ((unsigned)cmd >= PRC_NCMDS)
|
2009-01-03 06:43:21 +03:00
|
|
|
return NULL;
|
2000-02-28 19:10:52 +03:00
|
|
|
if (PRC_IS_REDIRECT(cmd))
|
|
|
|
notify = in6_rtchange, d = NULL;
|
|
|
|
else if (cmd == PRC_HOSTDEAD)
|
|
|
|
d = NULL;
|
2001-02-11 09:49:49 +03:00
|
|
|
else if (cmd == PRC_MSGSIZE)
|
|
|
|
; /* special code is present, see below */
|
2000-02-28 19:10:52 +03:00
|
|
|
else if (inet6ctlerrmap[cmd] == 0)
|
2009-01-03 06:43:21 +03:00
|
|
|
return NULL;
|
2000-02-26 12:09:17 +03:00
|
|
|
|
|
|
|
/* if the parameter is from icmp6, decode it. */
|
|
|
|
if (d != NULL) {
|
2001-02-11 09:49:49 +03:00
|
|
|
ip6cp = (struct ip6ctlparam *)d;
|
2000-02-26 12:09:17 +03:00
|
|
|
ip6 = ip6cp->ip6c_ip6;
|
2001-02-11 09:49:49 +03:00
|
|
|
cmdarg = ip6cp->ip6c_cmdarg;
|
|
|
|
sa6_src = ip6cp->ip6c_src;
|
|
|
|
nxt = ip6cp->ip6c_nxt;
|
2000-02-26 12:09:17 +03:00
|
|
|
} else {
|
|
|
|
ip6 = NULL;
|
2001-02-11 09:49:49 +03:00
|
|
|
cmdarg = NULL;
|
|
|
|
sa6_src = &sa6_any;
|
|
|
|
nxt = -1;
|
2000-02-26 12:09:17 +03:00
|
|
|
}
|
|
|
|
|
2001-02-11 09:49:49 +03:00
|
|
|
if (ip6 && cmd == PRC_MSGSIZE) {
|
KNF: de-__P, bzero -> memset, bcmp -> memcmp. Remove extraneous
parentheses in return statements.
Cosmetic: don't open-code TAILQ_FOREACH().
Cosmetic: change types of variables to avoid oodles of casts: in
in6_src.c, avoid casts by changing several route_in6 pointers
to struct route pointers. Remove unnecessary casts to caddr_t
elsewhere.
Pave the way for eliminating address family-specific route caches:
soon, struct route will not embed a sockaddr, but it will hold
a reference to an external sockaddr, instead. We will set the
destination sockaddr using rtcache_setdst(). (I created a stub
for it, but it isn't used anywhere, yet.) rtcache_free() will
free the sockaddr. I have extracted from rtcache_free() a helper
subroutine, rtcache_clear(). rtcache_clear() will "forget" a
cached route, but it will not forget the destination by releasing
the sockaddr. I use rtcache_clear() instead of rtcache_free()
in rtcache_update(), because rtcache_update() is not supposed
to forget the destination.
Constify:
1 Introduce const accessor for route->ro_dst, rtcache_getdst().
2 Constify the 'dst' argument to ifnet->if_output(). This
led me to constify a lot of code called by output routines.
3 Constify the sockaddr argument to protosw->pr_ctlinput. This
led me to constify a lot of code called by ctlinput routines.
4 Introduce const macros for converting from a generic sockaddr
to family-specific sockaddrs, e.g., sockaddr_in: satocsin6,
satocsin, et cetera.
2007-02-18 01:34:07 +03:00
|
|
|
const struct sockaddr_in6 *sa6 = (const struct sockaddr_in6 *)sa;
|
2001-02-11 09:49:49 +03:00
|
|
|
int valid = 0;
|
|
|
|
struct in6pcb *in6p;
|
2000-02-26 12:09:17 +03:00
|
|
|
|
|
|
|
/*
|
2001-02-11 09:49:49 +03:00
|
|
|
* Check to see if we have a valid raw IPv6 socket
|
|
|
|
* corresponding to the address in the ICMPv6 message
|
|
|
|
* payload, and the protocol (ip6_nxt) meets the socket.
|
|
|
|
* XXX chase extension headers, or pass final nxt value
|
|
|
|
* from icmp6_notify_error()
|
2000-02-26 12:09:17 +03:00
|
|
|
*/
|
2001-02-11 09:49:49 +03:00
|
|
|
in6p = NULL;
|
2003-09-04 13:16:57 +04:00
|
|
|
in6p = in6_pcblookup_connect(&raw6cbtable, &sa6->sin6_addr, 0,
|
Reduces the resources demanded by TCP sessions in TIME_WAIT-state using
methods called Vestigial Time-Wait (VTW) and Maximum Segment Lifetime
Truncation (MSLT).
MSLT and VTW were contributed by Coyote Point Systems, Inc.
Even after a TCP session enters the TIME_WAIT state, its corresponding
socket and protocol control blocks (PCBs) stick around until the TCP
Maximum Segment Lifetime (MSL) expires. On a host whose workload
necessarily creates and closes down many TCP sockets, the sockets & PCBs
for TCP sessions in TIME_WAIT state amount to many megabytes of dead
weight in RAM.
Maximum Segment Lifetimes Truncation (MSLT) assigns each TCP session to
a class based on the nearness of the peer. Corresponding to each class
is an MSL, and a session uses the MSL of its class. The classes are
loopback (local host equals remote host), local (local host and remote
host are on the same link/subnet), and remote (local host and remote
host communicate via one or more gateways). Classes corresponding to
nearer peers have lower MSLs by default: 2 seconds for loopback, 10
seconds for local, 60 seconds for remote. Loopback and local sessions
expire more quickly when MSLT is used.
Vestigial Time-Wait (VTW) replaces a TIME_WAIT session's PCB/socket
dead weight with a compact representation of the session, called a
"vestigial PCB". VTW data structures are designed to be very fast and
memory-efficient: for fast insertion and lookup of vestigial PCBs,
the PCBs are stored in a hash table that is designed to minimize the
number of cacheline visits per lookup/insertion. The memory both
for vestigial PCBs and for elements of the PCB hashtable come from
fixed-size pools, and linked data structures exploit this to conserve
memory by representing references with a narrow index/offset from the
start of a pool instead of a pointer. When space for new vestigial PCBs
runs out, VTW makes room by discarding old vestigial PCBs, oldest first.
VTW cooperates with MSLT.
It may help to think of VTW as a "FIN cache" by analogy to the SYN
cache.
A 2.8-GHz Pentium 4 running a test workload that creates TIME_WAIT
sessions as fast as it can is approximately 17% idle when VTW is active
versus 0% idle when VTW is inactive. It has 103 megabytes more free RAM
when VTW is active (approximately 64k vestigial PCBs are created) than
when it is inactive.
2011-05-03 22:28:44 +04:00
|
|
|
(const struct in6_addr *)&sa6_src->sin6_addr, 0, 0, 0);
|
2001-02-11 09:49:49 +03:00
|
|
|
#if 0
|
|
|
|
if (!in6p) {
|
|
|
|
/*
|
|
|
|
* As the use of sendto(2) is fairly popular,
|
|
|
|
* we may want to allow non-connected pcb too.
|
|
|
|
* But it could be too weak against attacks...
|
|
|
|
* We should at least check if the local
|
|
|
|
* address (= s) is really ours.
|
|
|
|
*/
|
2003-09-04 13:16:57 +04:00
|
|
|
in6p = in6_pcblookup_bind(&raw6cbtable,
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
&sa6->sin6_addr, 0, 0);
|
2001-02-11 09:49:49 +03:00
|
|
|
}
|
|
|
|
#endif
|
2000-02-26 12:09:17 +03:00
|
|
|
|
2001-02-11 09:49:49 +03:00
|
|
|
if (in6p && in6p->in6p_ip6.ip6_nxt &&
|
|
|
|
in6p->in6p_ip6.ip6_nxt == nxt)
|
|
|
|
valid++;
|
2000-02-26 12:09:17 +03:00
|
|
|
|
2001-02-11 09:49:49 +03:00
|
|
|
/*
|
|
|
|
* Depending on the value of "valid" and routing table
|
|
|
|
* size (mtudisc_{hi,lo}wat), we will:
|
2002-06-08 02:05:37 +04:00
|
|
|
* - recalculate the new MTU and create the
|
2001-02-11 09:49:49 +03:00
|
|
|
* corresponding routing entry, or
|
|
|
|
* - ignore the MTU change notification.
|
|
|
|
*/
|
|
|
|
icmp6_mtudisc_update((struct ip6ctlparam *)d, valid);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* regardless of if we called icmp6_mtudisc_update(),
|
2006-05-05 04:03:21 +04:00
|
|
|
* we need to call in6_pcbnotify(), to notify path MTU
|
|
|
|
* change to the userland (RFC3542), because some
|
|
|
|
* unconnected sockets may share the same destination
|
|
|
|
* and want to know the path MTU.
|
2001-02-11 09:49:49 +03:00
|
|
|
*/
|
2000-02-26 12:09:17 +03:00
|
|
|
}
|
2001-02-11 09:49:49 +03:00
|
|
|
|
2003-09-04 13:16:57 +04:00
|
|
|
(void) in6_pcbnotify(&raw6cbtable, sa, 0,
|
2005-05-30 01:43:51 +04:00
|
|
|
(const struct sockaddr *)sa6_src, 0, cmd, cmdarg, notify);
|
2008-04-24 15:38:36 +04:00
|
|
|
return NULL;
|
2000-02-26 12:09:17 +03:00
|
|
|
}
|
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
/*
|
|
|
|
* Generate IPv6 header and pass packet to ip6_output.
|
|
|
|
* Tack on options user may have setup with control call.
|
|
|
|
*/
|
|
|
|
int
|
2010-07-08 05:13:01 +04:00
|
|
|
rip6_output(struct mbuf *m, struct socket * const so,
|
|
|
|
struct sockaddr_in6 * const dstsock, struct mbuf * const control)
|
1999-06-28 10:36:47 +04:00
|
|
|
{
|
|
|
|
struct in6_addr *dst;
|
|
|
|
struct ip6_hdr *ip6;
|
|
|
|
struct in6pcb *in6p;
|
|
|
|
u_int plen = m->m_pkthdr.len;
|
|
|
|
int error = 0;
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
struct ip6_pktopts opt, *optp = NULL;
|
1999-06-28 10:36:47 +04:00
|
|
|
struct ifnet *oifp = NULL;
|
1999-12-13 18:17:17 +03:00
|
|
|
int type, code; /* for ICMPv6 output statistics only */
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
int scope_ambiguous = 0;
|
|
|
|
struct in6_addr *in6a;
|
1999-06-28 10:36:47 +04:00
|
|
|
|
|
|
|
in6p = sotoin6pcb(so);
|
|
|
|
|
|
|
|
dst = &dstsock->sin6_addr;
|
|
|
|
if (control) {
|
2006-05-05 04:03:21 +04:00
|
|
|
if ((error = ip6_setpktopts(control, &opt,
|
|
|
|
in6p->in6p_outputopts,
|
2009-05-07 01:41:59 +04:00
|
|
|
kauth_cred_get(), so->so_proto->pr_protocol)) != 0) {
|
1999-06-28 10:36:47 +04:00
|
|
|
goto bad;
|
2006-05-05 04:03:21 +04:00
|
|
|
}
|
1999-06-28 10:36:47 +04:00
|
|
|
optp = &opt;
|
|
|
|
} else
|
|
|
|
optp = in6p->in6p_outputopts;
|
|
|
|
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
/*
|
|
|
|
* Check and convert scope zone ID into internal form.
|
|
|
|
* XXX: we may still need to determine the zone later.
|
|
|
|
*/
|
|
|
|
if (!(so->so_state & SS_ISCONNECTED)) {
|
|
|
|
if (dstsock->sin6_scope_id == 0 && !ip6_use_defzone)
|
|
|
|
scope_ambiguous = 1;
|
|
|
|
if ((error = sa6_embedscope(dstsock, ip6_use_defzone)) != 0)
|
|
|
|
goto bad;
|
|
|
|
}
|
|
|
|
|
1999-12-13 18:17:17 +03:00
|
|
|
/*
|
|
|
|
* For an ICMPv6 packet, we should know its type and code
|
|
|
|
* to update statistics.
|
|
|
|
*/
|
|
|
|
if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) {
|
|
|
|
struct icmp6_hdr *icmp6;
|
|
|
|
if (m->m_len < sizeof(struct icmp6_hdr) &&
|
|
|
|
(m = m_pullup(m, sizeof(struct icmp6_hdr))) == NULL) {
|
|
|
|
error = ENOBUFS;
|
|
|
|
goto bad;
|
|
|
|
}
|
|
|
|
icmp6 = mtod(m, struct icmp6_hdr *);
|
|
|
|
type = icmp6->icmp6_type;
|
|
|
|
code = icmp6->icmp6_code;
|
2003-10-25 12:26:14 +04:00
|
|
|
} else {
|
|
|
|
type = 0;
|
|
|
|
code = 0;
|
1999-12-13 18:17:17 +03:00
|
|
|
}
|
|
|
|
|
2003-05-28 02:36:38 +04:00
|
|
|
M_PREPEND(m, sizeof(*ip6), M_DONTWAIT);
|
|
|
|
if (!m) {
|
|
|
|
error = ENOBUFS;
|
|
|
|
goto bad;
|
|
|
|
}
|
1999-06-28 10:36:47 +04:00
|
|
|
ip6 = mtod(m, struct ip6_hdr *);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Next header might not be ICMP6 but use its pseudo header anyway.
|
|
|
|
*/
|
|
|
|
ip6->ip6_dst = *dst;
|
|
|
|
|
1999-12-13 18:17:17 +03:00
|
|
|
/*
|
|
|
|
* Source address selection.
|
|
|
|
*/
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
if ((in6a = in6_selectsrc(dstsock, optp, in6p->in6p_moptions,
|
2010-07-08 05:22:28 +04:00
|
|
|
&in6p->in6p_route, &in6p->in6p_laddr, &oifp,
|
KNF: de-__P, bzero -> memset, bcmp -> memcmp. Remove extraneous
parentheses in return statements.
Cosmetic: don't open-code TAILQ_FOREACH().
Cosmetic: change types of variables to avoid oodles of casts: in
in6_src.c, avoid casts by changing several route_in6 pointers
to struct route pointers. Remove unnecessary casts to caddr_t
elsewhere.
Pave the way for eliminating address family-specific route caches:
soon, struct route will not embed a sockaddr, but it will hold
a reference to an external sockaddr, instead. We will set the
destination sockaddr using rtcache_setdst(). (I created a stub
for it, but it isn't used anywhere, yet.) rtcache_free() will
free the sockaddr. I have extracted from rtcache_free() a helper
subroutine, rtcache_clear(). rtcache_clear() will "forget" a
cached route, but it will not forget the destination by releasing
the sockaddr. I use rtcache_clear() instead of rtcache_free()
in rtcache_update(), because rtcache_update() is not supposed
to forget the destination.
Constify:
1 Introduce const accessor for route->ro_dst, rtcache_getdst().
2 Constify the 'dst' argument to ifnet->if_output(). This
led me to constify a lot of code called by output routines.
3 Constify the sockaddr argument to protosw->pr_ctlinput. This
led me to constify a lot of code called by ctlinput routines.
4 Introduce const macros for converting from a generic sockaddr
to family-specific sockaddrs, e.g., sockaddr_in: satocsin6,
satocsin, et cetera.
2007-02-18 01:34:07 +03:00
|
|
|
&error)) == 0) {
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
if (error == 0)
|
|
|
|
error = EADDRNOTAVAIL;
|
|
|
|
goto bad;
|
|
|
|
}
|
|
|
|
ip6->ip6_src = *in6a;
|
1999-06-28 10:36:47 +04:00
|
|
|
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
if (oifp && scope_ambiguous) {
|
|
|
|
/*
|
|
|
|
* Application should provide a proper zone ID or the use of
|
|
|
|
* default zone IDs should be enabled. Unfortunately, some
|
|
|
|
* applications do not behave as it should, so we need a
|
|
|
|
* workaround. Even if an appropriate ID is not determined
|
|
|
|
* (when it's required), if we can determine the outgoing
|
|
|
|
* interface. determine the zone ID based on the interface.
|
|
|
|
*/
|
|
|
|
error = in6_setscope(&dstsock->sin6_addr, oifp, NULL);
|
|
|
|
if (error != 0)
|
1999-06-28 10:36:47 +04:00
|
|
|
goto bad;
|
1999-12-13 18:17:17 +03:00
|
|
|
}
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
ip6->ip6_dst = dstsock->sin6_addr;
|
1999-06-28 10:36:47 +04:00
|
|
|
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
/* fill in the rest of the IPv6 header fields */
|
1999-06-28 10:36:47 +04:00
|
|
|
ip6->ip6_flow = in6p->in6p_flowinfo & IPV6_FLOWINFO_MASK;
|
2000-02-26 12:09:17 +03:00
|
|
|
ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
|
|
|
|
ip6->ip6_vfc |= IPV6_VERSION;
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
/* ip6_plen will be filled in ip6_output, so not fill it here. */
|
1999-06-28 10:36:47 +04:00
|
|
|
ip6->ip6_nxt = in6p->in6p_ip6.ip6_nxt;
|
1999-12-13 18:17:17 +03:00
|
|
|
ip6->ip6_hlim = in6_selecthlim(in6p, oifp);
|
1999-06-28 10:36:47 +04:00
|
|
|
|
|
|
|
if (so->so_proto->pr_protocol == IPPROTO_ICMPV6 ||
|
|
|
|
in6p->in6p_cksum != -1) {
|
|
|
|
int off;
|
2004-07-23 13:53:10 +04:00
|
|
|
u_int16_t sum;
|
1999-06-28 10:36:47 +04:00
|
|
|
|
|
|
|
/* compute checksum */
|
|
|
|
if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
|
|
|
|
off = offsetof(struct icmp6_hdr, icmp6_cksum);
|
|
|
|
else
|
|
|
|
off = in6p->in6p_cksum;
|
|
|
|
if (plen < off + 1) {
|
|
|
|
error = EINVAL;
|
|
|
|
goto bad;
|
|
|
|
}
|
|
|
|
off += sizeof(struct ip6_hdr);
|
|
|
|
|
2004-09-06 14:05:14 +04:00
|
|
|
sum = 0;
|
2007-03-04 08:59:00 +03:00
|
|
|
m = m_copyback_cow(m, off, sizeof(sum), (void *)&sum,
|
2004-09-06 14:05:14 +04:00
|
|
|
M_DONTWAIT);
|
|
|
|
if (m == NULL) {
|
2004-07-22 09:26:46 +04:00
|
|
|
error = ENOBUFS;
|
|
|
|
goto bad;
|
|
|
|
}
|
2004-07-23 13:53:10 +04:00
|
|
|
sum = in6_cksum(m, ip6->ip6_nxt, sizeof(*ip6), plen);
|
2007-03-04 08:59:00 +03:00
|
|
|
m = m_copyback_cow(m, off, sizeof(sum), (void *)&sum,
|
2004-09-06 14:05:14 +04:00
|
|
|
M_DONTWAIT);
|
|
|
|
if (m == NULL) {
|
|
|
|
error = ENOBUFS;
|
|
|
|
goto bad;
|
|
|
|
}
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
|
2006-05-05 04:03:21 +04:00
|
|
|
error = ip6_output(m, optp, &in6p->in6p_route, 0,
|
2003-08-23 01:53:01 +04:00
|
|
|
in6p->in6p_moptions, so, &oifp);
|
1999-12-13 18:17:17 +03:00
|
|
|
if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) {
|
|
|
|
if (oifp)
|
|
|
|
icmp6_ifoutstat_inc(oifp, type, code);
|
2008-04-15 07:57:04 +04:00
|
|
|
ICMP6_STATINC(ICMP6_STAT_OUTHIST + type);
|
2001-10-18 13:12:13 +04:00
|
|
|
} else
|
2008-04-15 09:13:37 +04:00
|
|
|
RIP6_STATINC(RIP6_STAT_OPACKETS);
|
1999-06-28 10:36:47 +04:00
|
|
|
|
|
|
|
goto freectl;
|
|
|
|
|
|
|
|
bad:
|
|
|
|
if (m)
|
|
|
|
m_freem(m);
|
|
|
|
|
|
|
|
freectl:
|
2006-05-05 04:03:21 +04:00
|
|
|
if (control) {
|
|
|
|
ip6_clearpktopts(&opt, -1);
|
1999-06-28 10:36:47 +04:00
|
|
|
m_freem(control);
|
2006-05-05 04:03:21 +04:00
|
|
|
}
|
2007-02-22 12:30:33 +03:00
|
|
|
return error;
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
|
2001-12-18 06:04:02 +03:00
|
|
|
/*
|
1999-06-28 10:36:47 +04:00
|
|
|
* Raw IPv6 socket option processing.
|
2001-12-18 06:04:02 +03:00
|
|
|
*/
|
1999-06-28 10:36:47 +04:00
|
|
|
int
|
2008-08-06 19:01:23 +04:00
|
|
|
rip6_ctloutput(int op, struct socket *so, struct sockopt *sopt)
|
1999-06-28 10:36:47 +04:00
|
|
|
{
|
|
|
|
int error = 0;
|
|
|
|
|
2008-08-06 19:01:23 +04:00
|
|
|
if (sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_NOHEADER) {
|
|
|
|
int optval;
|
|
|
|
|
1) Introduce a new socket option, (SOL_SOCKET, SO_NOHEADER), that
tells a socket that it should both add a protocol header to tx'd
datagrams and remove the header from rx'd datagrams:
int onoff = 1, s = socket(...);
setsockopt(s, SOL_SOCKET, SO_NOHEADER, &onoff);
2) Add an implementation of (SOL_SOCKET, SO_NOHEADER) for raw IPv4
sockets.
3) Reorganize the protocols' pr_ctloutput implementations a bit.
Consistently return ENOPROTOOPT when an option is unsupported,
and EINVAL if a supported option's arguments are incorrect.
Reorganize the flow of code so that it's more clear how/when
options are passed down the stack until they are handled.
Shorten some pr_ctloutput staircases for readability.
4) Extract common mbuf code into subroutines, add new sockaddr
methods, and introduce a new subroutine, fsocreate(), for reuse
later; use it first in sys_socket():
struct mbuf *m_getsombuf(struct socket *so)
Create an mbuf and make its owner the socket `so'.
struct mbuf *m_intopt(struct socket *so, int val)
Create an mbuf, make its owner the socket `so', put the
int `val' into it, and set its length to sizeof(int).
int fsocreate(..., int *fd)
Create a socket, a la socreate(9), put the socket into the
given LWP's descriptor table, return the descriptor at `fd'
on success.
void *sockaddr_addr(struct sockaddr *sa, socklen_t *slenp)
const void *sockaddr_const_addr(const struct sockaddr *sa, socklen_t *slenp)
Extract a pointer to the address part of a sockaddr. Write
the length of the address part at `slenp', if `slenp' is
not NULL.
socklen_t sockaddr_getlen(const struct sockaddr *sa)
Return the length of a sockaddr. This just evaluates to
sa->sa_len. I only add this for consistency with code that
appears in a portable userland library that I am going to
import.
const struct sockaddr *sockaddr_any(const struct sockaddr *sa)
Return the "don't care" sockaddr in the same family as
`sa'. This is the address a client should sobind(9) if it
does not care the source address and, if applicable, the
port et cetera that it uses.
const void *sockaddr_anyaddr(const struct sockaddr *sa, socklen_t *slenp)
Return the "don't care" sockaddr in the same family as
`sa'. This is the address a client should sobind(9) if it
does not care the source address and, if applicable, the
port et cetera that it uses.
2007-09-19 08:33:42 +04:00
|
|
|
/* need to fiddle w/ opt(IPPROTO_IPV6, IPV6_CHECKSUM)? */
|
2007-11-07 02:50:41 +03:00
|
|
|
if (op == PRCO_GETOPT) {
|
2008-08-06 19:01:23 +04:00
|
|
|
optval = 1;
|
|
|
|
error = sockopt_set(sopt, &optval, sizeof(optval));
|
|
|
|
} else if (op == PRCO_SETOPT) {
|
|
|
|
error = sockopt_getint(sopt, &optval);
|
|
|
|
if (error)
|
|
|
|
goto out;
|
|
|
|
if (optval == 0)
|
|
|
|
error = EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
goto out;
|
|
|
|
} else if (sopt->sopt_level != IPPROTO_IPV6)
|
|
|
|
return ip6_ctloutput(op, so, sopt);
|
1) Introduce a new socket option, (SOL_SOCKET, SO_NOHEADER), that
tells a socket that it should both add a protocol header to tx'd
datagrams and remove the header from rx'd datagrams:
int onoff = 1, s = socket(...);
setsockopt(s, SOL_SOCKET, SO_NOHEADER, &onoff);
2) Add an implementation of (SOL_SOCKET, SO_NOHEADER) for raw IPv4
sockets.
3) Reorganize the protocols' pr_ctloutput implementations a bit.
Consistently return ENOPROTOOPT when an option is unsupported,
and EINVAL if a supported option's arguments are incorrect.
Reorganize the flow of code so that it's more clear how/when
options are passed down the stack until they are handled.
Shorten some pr_ctloutput staircases for readability.
4) Extract common mbuf code into subroutines, add new sockaddr
methods, and introduce a new subroutine, fsocreate(), for reuse
later; use it first in sys_socket():
struct mbuf *m_getsombuf(struct socket *so)
Create an mbuf and make its owner the socket `so'.
struct mbuf *m_intopt(struct socket *so, int val)
Create an mbuf, make its owner the socket `so', put the
int `val' into it, and set its length to sizeof(int).
int fsocreate(..., int *fd)
Create a socket, a la socreate(9), put the socket into the
given LWP's descriptor table, return the descriptor at `fd'
on success.
void *sockaddr_addr(struct sockaddr *sa, socklen_t *slenp)
const void *sockaddr_const_addr(const struct sockaddr *sa, socklen_t *slenp)
Extract a pointer to the address part of a sockaddr. Write
the length of the address part at `slenp', if `slenp' is
not NULL.
socklen_t sockaddr_getlen(const struct sockaddr *sa)
Return the length of a sockaddr. This just evaluates to
sa->sa_len. I only add this for consistency with code that
appears in a portable userland library that I am going to
import.
const struct sockaddr *sockaddr_any(const struct sockaddr *sa)
Return the "don't care" sockaddr in the same family as
`sa'. This is the address a client should sobind(9) if it
does not care the source address and, if applicable, the
port et cetera that it uses.
const void *sockaddr_anyaddr(const struct sockaddr *sa, socklen_t *slenp)
Return the "don't care" sockaddr in the same family as
`sa'. This is the address a client should sobind(9) if it
does not care the source address and, if applicable, the
port et cetera that it uses.
2007-09-19 08:33:42 +04:00
|
|
|
|
2008-08-06 19:01:23 +04:00
|
|
|
switch (sopt->sopt_name) {
|
1) Introduce a new socket option, (SOL_SOCKET, SO_NOHEADER), that
tells a socket that it should both add a protocol header to tx'd
datagrams and remove the header from rx'd datagrams:
int onoff = 1, s = socket(...);
setsockopt(s, SOL_SOCKET, SO_NOHEADER, &onoff);
2) Add an implementation of (SOL_SOCKET, SO_NOHEADER) for raw IPv4
sockets.
3) Reorganize the protocols' pr_ctloutput implementations a bit.
Consistently return ENOPROTOOPT when an option is unsupported,
and EINVAL if a supported option's arguments are incorrect.
Reorganize the flow of code so that it's more clear how/when
options are passed down the stack until they are handled.
Shorten some pr_ctloutput staircases for readability.
4) Extract common mbuf code into subroutines, add new sockaddr
methods, and introduce a new subroutine, fsocreate(), for reuse
later; use it first in sys_socket():
struct mbuf *m_getsombuf(struct socket *so)
Create an mbuf and make its owner the socket `so'.
struct mbuf *m_intopt(struct socket *so, int val)
Create an mbuf, make its owner the socket `so', put the
int `val' into it, and set its length to sizeof(int).
int fsocreate(..., int *fd)
Create a socket, a la socreate(9), put the socket into the
given LWP's descriptor table, return the descriptor at `fd'
on success.
void *sockaddr_addr(struct sockaddr *sa, socklen_t *slenp)
const void *sockaddr_const_addr(const struct sockaddr *sa, socklen_t *slenp)
Extract a pointer to the address part of a sockaddr. Write
the length of the address part at `slenp', if `slenp' is
not NULL.
socklen_t sockaddr_getlen(const struct sockaddr *sa)
Return the length of a sockaddr. This just evaluates to
sa->sa_len. I only add this for consistency with code that
appears in a portable userland library that I am going to
import.
const struct sockaddr *sockaddr_any(const struct sockaddr *sa)
Return the "don't care" sockaddr in the same family as
`sa'. This is the address a client should sobind(9) if it
does not care the source address and, if applicable, the
port et cetera that it uses.
const void *sockaddr_anyaddr(const struct sockaddr *sa, socklen_t *slenp)
Return the "don't care" sockaddr in the same family as
`sa'. This is the address a client should sobind(9) if it
does not care the source address and, if applicable, the
port et cetera that it uses.
2007-09-19 08:33:42 +04:00
|
|
|
case MRT6_INIT:
|
|
|
|
case MRT6_DONE:
|
|
|
|
case MRT6_ADD_MIF:
|
|
|
|
case MRT6_DEL_MIF:
|
|
|
|
case MRT6_ADD_MFC:
|
|
|
|
case MRT6_DEL_MFC:
|
|
|
|
case MRT6_PIM:
|
|
|
|
if (op == PRCO_SETOPT)
|
2008-08-06 19:01:23 +04:00
|
|
|
error = ip6_mrouter_set(so, sopt);
|
1) Introduce a new socket option, (SOL_SOCKET, SO_NOHEADER), that
tells a socket that it should both add a protocol header to tx'd
datagrams and remove the header from rx'd datagrams:
int onoff = 1, s = socket(...);
setsockopt(s, SOL_SOCKET, SO_NOHEADER, &onoff);
2) Add an implementation of (SOL_SOCKET, SO_NOHEADER) for raw IPv4
sockets.
3) Reorganize the protocols' pr_ctloutput implementations a bit.
Consistently return ENOPROTOOPT when an option is unsupported,
and EINVAL if a supported option's arguments are incorrect.
Reorganize the flow of code so that it's more clear how/when
options are passed down the stack until they are handled.
Shorten some pr_ctloutput staircases for readability.
4) Extract common mbuf code into subroutines, add new sockaddr
methods, and introduce a new subroutine, fsocreate(), for reuse
later; use it first in sys_socket():
struct mbuf *m_getsombuf(struct socket *so)
Create an mbuf and make its owner the socket `so'.
struct mbuf *m_intopt(struct socket *so, int val)
Create an mbuf, make its owner the socket `so', put the
int `val' into it, and set its length to sizeof(int).
int fsocreate(..., int *fd)
Create a socket, a la socreate(9), put the socket into the
given LWP's descriptor table, return the descriptor at `fd'
on success.
void *sockaddr_addr(struct sockaddr *sa, socklen_t *slenp)
const void *sockaddr_const_addr(const struct sockaddr *sa, socklen_t *slenp)
Extract a pointer to the address part of a sockaddr. Write
the length of the address part at `slenp', if `slenp' is
not NULL.
socklen_t sockaddr_getlen(const struct sockaddr *sa)
Return the length of a sockaddr. This just evaluates to
sa->sa_len. I only add this for consistency with code that
appears in a portable userland library that I am going to
import.
const struct sockaddr *sockaddr_any(const struct sockaddr *sa)
Return the "don't care" sockaddr in the same family as
`sa'. This is the address a client should sobind(9) if it
does not care the source address and, if applicable, the
port et cetera that it uses.
const void *sockaddr_anyaddr(const struct sockaddr *sa, socklen_t *slenp)
Return the "don't care" sockaddr in the same family as
`sa'. This is the address a client should sobind(9) if it
does not care the source address and, if applicable, the
port et cetera that it uses.
2007-09-19 08:33:42 +04:00
|
|
|
else if (op == PRCO_GETOPT)
|
2008-08-06 19:01:23 +04:00
|
|
|
error = ip6_mrouter_get(so, sopt);
|
1) Introduce a new socket option, (SOL_SOCKET, SO_NOHEADER), that
tells a socket that it should both add a protocol header to tx'd
datagrams and remove the header from rx'd datagrams:
int onoff = 1, s = socket(...);
setsockopt(s, SOL_SOCKET, SO_NOHEADER, &onoff);
2) Add an implementation of (SOL_SOCKET, SO_NOHEADER) for raw IPv4
sockets.
3) Reorganize the protocols' pr_ctloutput implementations a bit.
Consistently return ENOPROTOOPT when an option is unsupported,
and EINVAL if a supported option's arguments are incorrect.
Reorganize the flow of code so that it's more clear how/when
options are passed down the stack until they are handled.
Shorten some pr_ctloutput staircases for readability.
4) Extract common mbuf code into subroutines, add new sockaddr
methods, and introduce a new subroutine, fsocreate(), for reuse
later; use it first in sys_socket():
struct mbuf *m_getsombuf(struct socket *so)
Create an mbuf and make its owner the socket `so'.
struct mbuf *m_intopt(struct socket *so, int val)
Create an mbuf, make its owner the socket `so', put the
int `val' into it, and set its length to sizeof(int).
int fsocreate(..., int *fd)
Create a socket, a la socreate(9), put the socket into the
given LWP's descriptor table, return the descriptor at `fd'
on success.
void *sockaddr_addr(struct sockaddr *sa, socklen_t *slenp)
const void *sockaddr_const_addr(const struct sockaddr *sa, socklen_t *slenp)
Extract a pointer to the address part of a sockaddr. Write
the length of the address part at `slenp', if `slenp' is
not NULL.
socklen_t sockaddr_getlen(const struct sockaddr *sa)
Return the length of a sockaddr. This just evaluates to
sa->sa_len. I only add this for consistency with code that
appears in a portable userland library that I am going to
import.
const struct sockaddr *sockaddr_any(const struct sockaddr *sa)
Return the "don't care" sockaddr in the same family as
`sa'. This is the address a client should sobind(9) if it
does not care the source address and, if applicable, the
port et cetera that it uses.
const void *sockaddr_anyaddr(const struct sockaddr *sa, socklen_t *slenp)
Return the "don't care" sockaddr in the same family as
`sa'. This is the address a client should sobind(9) if it
does not care the source address and, if applicable, the
port et cetera that it uses.
2007-09-19 08:33:42 +04:00
|
|
|
else
|
|
|
|
error = EINVAL;
|
|
|
|
break;
|
|
|
|
case IPV6_CHECKSUM:
|
2008-08-06 19:01:23 +04:00
|
|
|
return ip6_raw_ctloutput(op, so, sopt);
|
2001-02-08 21:43:17 +03:00
|
|
|
default:
|
2008-08-06 19:01:23 +04:00
|
|
|
return ip6_ctloutput(op, so, sopt);
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
2008-08-06 19:01:23 +04:00
|
|
|
out:
|
1) Introduce a new socket option, (SOL_SOCKET, SO_NOHEADER), that
tells a socket that it should both add a protocol header to tx'd
datagrams and remove the header from rx'd datagrams:
int onoff = 1, s = socket(...);
setsockopt(s, SOL_SOCKET, SO_NOHEADER, &onoff);
2) Add an implementation of (SOL_SOCKET, SO_NOHEADER) for raw IPv4
sockets.
3) Reorganize the protocols' pr_ctloutput implementations a bit.
Consistently return ENOPROTOOPT when an option is unsupported,
and EINVAL if a supported option's arguments are incorrect.
Reorganize the flow of code so that it's more clear how/when
options are passed down the stack until they are handled.
Shorten some pr_ctloutput staircases for readability.
4) Extract common mbuf code into subroutines, add new sockaddr
methods, and introduce a new subroutine, fsocreate(), for reuse
later; use it first in sys_socket():
struct mbuf *m_getsombuf(struct socket *so)
Create an mbuf and make its owner the socket `so'.
struct mbuf *m_intopt(struct socket *so, int val)
Create an mbuf, make its owner the socket `so', put the
int `val' into it, and set its length to sizeof(int).
int fsocreate(..., int *fd)
Create a socket, a la socreate(9), put the socket into the
given LWP's descriptor table, return the descriptor at `fd'
on success.
void *sockaddr_addr(struct sockaddr *sa, socklen_t *slenp)
const void *sockaddr_const_addr(const struct sockaddr *sa, socklen_t *slenp)
Extract a pointer to the address part of a sockaddr. Write
the length of the address part at `slenp', if `slenp' is
not NULL.
socklen_t sockaddr_getlen(const struct sockaddr *sa)
Return the length of a sockaddr. This just evaluates to
sa->sa_len. I only add this for consistency with code that
appears in a portable userland library that I am going to
import.
const struct sockaddr *sockaddr_any(const struct sockaddr *sa)
Return the "don't care" sockaddr in the same family as
`sa'. This is the address a client should sobind(9) if it
does not care the source address and, if applicable, the
port et cetera that it uses.
const void *sockaddr_anyaddr(const struct sockaddr *sa, socklen_t *slenp)
Return the "don't care" sockaddr in the same family as
`sa'. This is the address a client should sobind(9) if it
does not care the source address and, if applicable, the
port et cetera that it uses.
2007-09-19 08:33:42 +04:00
|
|
|
return error;
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
extern u_long rip6_sendspace;
|
|
|
|
extern u_long rip6_recvspace;
|
|
|
|
|
|
|
|
int
|
2007-05-23 21:14:59 +04:00
|
|
|
rip6_usrreq(struct socket *so, int req, struct mbuf *m,
|
|
|
|
struct mbuf *nam, struct mbuf *control, struct lwp *l)
|
1999-06-28 10:36:47 +04:00
|
|
|
{
|
2001-02-08 21:43:17 +03:00
|
|
|
struct in6pcb *in6p = sotoin6pcb(so);
|
1999-06-28 10:36:47 +04:00
|
|
|
int s;
|
|
|
|
int error = 0;
|
|
|
|
|
|
|
|
if (req == PRU_CONTROL)
|
2007-03-04 08:59:00 +03:00
|
|
|
return in6_control(so, (u_long)m, (void *)nam,
|
2007-02-22 12:30:33 +03:00
|
|
|
(struct ifnet *)control, l);
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2000-02-03 02:28:08 +03:00
|
|
|
if (req == PRU_PURGEIF) {
|
2008-04-24 15:38:36 +04:00
|
|
|
mutex_enter(softnet_lock);
|
2003-09-04 13:16:57 +04:00
|
|
|
in6_pcbpurgeif0(&raw6cbtable, (struct ifnet *)control);
|
2000-02-03 02:28:08 +03:00
|
|
|
in6_purgeif((struct ifnet *)control);
|
2003-09-04 13:16:57 +04:00
|
|
|
in6_pcbpurgeif(&raw6cbtable, (struct ifnet *)control);
|
2008-04-24 15:38:36 +04:00
|
|
|
mutex_exit(softnet_lock);
|
2007-02-22 12:30:33 +03:00
|
|
|
return 0;
|
2000-02-02 01:52:04 +03:00
|
|
|
}
|
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
switch (req) {
|
|
|
|
case PRU_ATTACH:
|
2009-05-07 01:41:59 +04:00
|
|
|
error = kauth_authorize_network(l->l_cred,
|
|
|
|
KAUTH_NETWORK_SOCKET, KAUTH_REQ_NETWORK_SOCKET_RAWSOCK,
|
Reduces the resources demanded by TCP sessions in TIME_WAIT-state using
methods called Vestigial Time-Wait (VTW) and Maximum Segment Lifetime
Truncation (MSLT).
MSLT and VTW were contributed by Coyote Point Systems, Inc.
Even after a TCP session enters the TIME_WAIT state, its corresponding
socket and protocol control blocks (PCBs) stick around until the TCP
Maximum Segment Lifetime (MSL) expires. On a host whose workload
necessarily creates and closes down many TCP sockets, the sockets & PCBs
for TCP sessions in TIME_WAIT state amount to many megabytes of dead
weight in RAM.
Maximum Segment Lifetimes Truncation (MSLT) assigns each TCP session to
a class based on the nearness of the peer. Corresponding to each class
is an MSL, and a session uses the MSL of its class. The classes are
loopback (local host equals remote host), local (local host and remote
host are on the same link/subnet), and remote (local host and remote
host communicate via one or more gateways). Classes corresponding to
nearer peers have lower MSLs by default: 2 seconds for loopback, 10
seconds for local, 60 seconds for remote. Loopback and local sessions
expire more quickly when MSLT is used.
Vestigial Time-Wait (VTW) replaces a TIME_WAIT session's PCB/socket
dead weight with a compact representation of the session, called a
"vestigial PCB". VTW data structures are designed to be very fast and
memory-efficient: for fast insertion and lookup of vestigial PCBs,
the PCBs are stored in a hash table that is designed to minimize the
number of cacheline visits per lookup/insertion. The memory both
for vestigial PCBs and for elements of the PCB hashtable come from
fixed-size pools, and linked data structures exploit this to conserve
memory by representing references with a narrow index/offset from the
start of a pool instead of a pointer. When space for new vestigial PCBs
runs out, VTW makes room by discarding old vestigial PCBs, oldest first.
VTW cooperates with MSLT.
It may help to think of VTW as a "FIN cache" by analogy to the SYN
cache.
A 2.8-GHz Pentium 4 running a test workload that creates TIME_WAIT
sessions as fast as it can is approximately 17% idle when VTW is active
versus 0% idle when VTW is inactive. It has 103 megabytes more free RAM
when VTW is active (approximately 64k vestigial PCBs are created) than
when it is inactive.
2011-05-03 22:28:44 +04:00
|
|
|
KAUTH_ARG(AF_INET6),
|
|
|
|
KAUTH_ARG(SOCK_RAW),
|
|
|
|
KAUTH_ARG(so->so_proto->pr_protocol));
|
2008-04-24 15:38:36 +04:00
|
|
|
sosetlock(so);
|
2007-02-22 12:30:33 +03:00
|
|
|
if (in6p != NULL)
|
1999-06-28 10:36:47 +04:00
|
|
|
panic("rip6_attach");
|
2009-05-07 01:41:59 +04:00
|
|
|
if (error) {
|
1999-06-28 10:36:47 +04:00
|
|
|
break;
|
|
|
|
}
|
1999-07-04 06:01:15 +04:00
|
|
|
s = splsoftnet();
|
2007-02-22 12:30:33 +03:00
|
|
|
error = soreserve(so, rip6_sendspace, rip6_recvspace);
|
|
|
|
if (error != 0) {
|
2001-02-08 21:43:17 +03:00
|
|
|
splx(s);
|
|
|
|
break;
|
|
|
|
}
|
2007-02-22 12:30:33 +03:00
|
|
|
if ((error = in6_pcballoc(so, &raw6cbtable)) != 0) {
|
1999-06-28 10:36:47 +04:00
|
|
|
splx(s);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
splx(s);
|
|
|
|
in6p = sotoin6pcb(so);
|
1999-07-06 12:55:56 +04:00
|
|
|
in6p->in6p_ip6.ip6_nxt = (long)nam;
|
1999-06-28 10:36:47 +04:00
|
|
|
in6p->in6p_cksum = -1;
|
2002-06-09 18:43:10 +04:00
|
|
|
|
2008-12-17 23:51:31 +03:00
|
|
|
in6p->in6p_icmp6filt = malloc(sizeof(struct icmp6_filter),
|
|
|
|
M_PCB, M_NOWAIT);
|
1999-12-13 18:17:17 +03:00
|
|
|
if (in6p->in6p_icmp6filt == NULL) {
|
|
|
|
in6_pcbdetach(in6p);
|
|
|
|
error = ENOMEM;
|
|
|
|
break;
|
|
|
|
}
|
1999-06-28 10:36:47 +04:00
|
|
|
ICMP6_FILTER_SETPASSALL(in6p->in6p_icmp6filt);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case PRU_DISCONNECT:
|
|
|
|
if ((so->so_state & SS_ISCONNECTED) == 0) {
|
|
|
|
error = ENOTCONN;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
in6p->in6p_faddr = in6addr_any;
|
|
|
|
so->so_state &= ~SS_ISCONNECTED; /* XXX */
|
|
|
|
break;
|
|
|
|
|
|
|
|
case PRU_ABORT:
|
|
|
|
soisdisconnected(so);
|
|
|
|
/* Fallthrough */
|
|
|
|
case PRU_DETACH:
|
2007-02-22 12:30:33 +03:00
|
|
|
if (in6p == NULL)
|
1999-06-28 10:36:47 +04:00
|
|
|
panic("rip6_detach");
|
|
|
|
if (so == ip6_mrouter)
|
|
|
|
ip6_mrouter_done();
|
|
|
|
/* xxx: RSVP */
|
2007-02-22 12:30:33 +03:00
|
|
|
if (in6p->in6p_icmp6filt != NULL) {
|
2008-12-17 23:51:31 +03:00
|
|
|
free(in6p->in6p_icmp6filt, M_PCB);
|
1999-06-28 10:36:47 +04:00
|
|
|
in6p->in6p_icmp6filt = NULL;
|
|
|
|
}
|
|
|
|
in6_pcbdetach(in6p);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case PRU_BIND:
|
|
|
|
{
|
|
|
|
struct sockaddr_in6 *addr = mtod(nam, struct sockaddr_in6 *);
|
|
|
|
struct ifaddr *ia = NULL;
|
|
|
|
|
|
|
|
if (nam->m_len != sizeof(*addr)) {
|
|
|
|
error = EINVAL;
|
|
|
|
break;
|
|
|
|
}
|
2006-12-02 21:59:17 +03:00
|
|
|
if (TAILQ_EMPTY(&ifnet) || addr->sin6_family != AF_INET6) {
|
2000-07-07 19:54:16 +04:00
|
|
|
error = EADDRNOTAVAIL;
|
|
|
|
break;
|
|
|
|
}
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
if ((error = sa6_embedscope(addr, ip6_use_defzone)) != 0)
|
|
|
|
break;
|
2001-07-23 23:29:53 +04:00
|
|
|
|
2000-05-29 04:03:18 +04:00
|
|
|
/*
|
|
|
|
* we don't support mapped address here, it would confuse
|
|
|
|
* users so reject it
|
|
|
|
*/
|
|
|
|
if (IN6_IS_ADDR_V4MAPPED(&addr->sin6_addr)) {
|
|
|
|
error = EADDRNOTAVAIL;
|
|
|
|
break;
|
|
|
|
}
|
2000-07-07 19:54:16 +04:00
|
|
|
if (!IN6_IS_ADDR_UNSPECIFIED(&addr->sin6_addr) &&
|
|
|
|
(ia = ifa_ifwithaddr((struct sockaddr *)addr)) == 0) {
|
1999-06-28 10:36:47 +04:00
|
|
|
error = EADDRNOTAVAIL;
|
|
|
|
break;
|
|
|
|
}
|
2002-06-08 02:07:38 +04:00
|
|
|
if (ia && ((struct in6_ifaddr *)ia)->ia6_flags &
|
1999-06-28 10:36:47 +04:00
|
|
|
(IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|
|
|
|
|
IN6_IFF_DETACHED|IN6_IFF_DEPRECATED)) {
|
|
|
|
error = EADDRNOTAVAIL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
in6p->in6p_laddr = addr->sin6_addr;
|
|
|
|
break;
|
|
|
|
}
|
2001-10-18 11:44:33 +04:00
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
case PRU_CONNECT:
|
2001-12-18 06:04:02 +03:00
|
|
|
{
|
1999-06-28 10:36:47 +04:00
|
|
|
struct sockaddr_in6 *addr = mtod(nam, struct sockaddr_in6 *);
|
|
|
|
struct in6_addr *in6a = NULL;
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
struct ifnet *ifp = NULL;
|
|
|
|
int scope_ambiguous = 0;
|
1999-06-28 10:36:47 +04:00
|
|
|
|
|
|
|
if (nam->m_len != sizeof(*addr)) {
|
|
|
|
error = EINVAL;
|
|
|
|
break;
|
|
|
|
}
|
2006-12-02 21:59:17 +03:00
|
|
|
if (TAILQ_EMPTY(&ifnet)) {
|
1999-06-28 10:36:47 +04:00
|
|
|
error = EADDRNOTAVAIL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (addr->sin6_family != AF_INET6) {
|
|
|
|
error = EAFNOSUPPORT;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
/*
|
|
|
|
* Application should provide a proper zone ID or the use of
|
|
|
|
* default zone IDs should be enabled. Unfortunately, some
|
|
|
|
* applications do not behave as it should, so we need a
|
|
|
|
* workaround. Even if an appropriate ID is not determined,
|
|
|
|
* we'll see if we can determine the outgoing interface. If we
|
|
|
|
* can, determine the zone ID based on the interface below.
|
|
|
|
*/
|
|
|
|
if (addr->sin6_scope_id == 0 && !ip6_use_defzone)
|
|
|
|
scope_ambiguous = 1;
|
|
|
|
if ((error = sa6_embedscope(addr, ip6_use_defzone)) != 0)
|
2007-02-22 12:30:33 +03:00
|
|
|
return error;
|
2000-07-07 19:54:16 +04:00
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
/* Source address selection. XXX: need pcblookup? */
|
1999-12-13 18:17:17 +03:00
|
|
|
in6a = in6_selectsrc(addr, in6p->in6p_outputopts,
|
2010-07-08 05:22:28 +04:00
|
|
|
in6p->in6p_moptions, &in6p->in6p_route,
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
&in6p->in6p_laddr, &ifp, &error);
|
1999-12-13 18:17:17 +03:00
|
|
|
if (in6a == NULL) {
|
1999-06-28 10:36:47 +04:00
|
|
|
if (error == 0)
|
|
|
|
error = EADDRNOTAVAIL;
|
|
|
|
break;
|
|
|
|
}
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
/* XXX: see above */
|
|
|
|
if (ifp && scope_ambiguous &&
|
|
|
|
(error = in6_setscope(&addr->sin6_addr, ifp, NULL)) != 0) {
|
|
|
|
break;
|
|
|
|
}
|
1999-06-28 10:36:47 +04:00
|
|
|
in6p->in6p_laddr = *in6a;
|
|
|
|
in6p->in6p_faddr = addr->sin6_addr;
|
|
|
|
soisconnected(so);
|
|
|
|
break;
|
2001-12-18 06:04:02 +03:00
|
|
|
}
|
1999-06-28 10:36:47 +04:00
|
|
|
|
|
|
|
case PRU_CONNECT2:
|
|
|
|
error = EOPNOTSUPP;
|
|
|
|
break;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Mark the connection as being incapable of futther input.
|
|
|
|
*/
|
|
|
|
case PRU_SHUTDOWN:
|
|
|
|
socantsendmore(so);
|
|
|
|
break;
|
|
|
|
/*
|
|
|
|
* Ship a packet out. The appropriate raw output
|
|
|
|
* routine handles any messaging necessary.
|
|
|
|
*/
|
|
|
|
case PRU_SEND:
|
2001-12-18 06:04:02 +03:00
|
|
|
{
|
1999-06-28 10:36:47 +04:00
|
|
|
struct sockaddr_in6 tmp;
|
|
|
|
struct sockaddr_in6 *dst;
|
|
|
|
|
2000-07-07 19:54:16 +04:00
|
|
|
/* always copy sockaddr to avoid overwrites */
|
1999-06-28 10:36:47 +04:00
|
|
|
if (so->so_state & SS_ISCONNECTED) {
|
|
|
|
if (nam) {
|
|
|
|
error = EISCONN;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
/* XXX */
|
2007-11-07 02:40:38 +03:00
|
|
|
sockaddr_in6_init(&tmp, &in6p->in6p_faddr, 0, 0, 0);
|
1999-06-28 10:36:47 +04:00
|
|
|
dst = &tmp;
|
|
|
|
} else {
|
|
|
|
if (nam == NULL) {
|
|
|
|
error = ENOTCONN;
|
|
|
|
break;
|
|
|
|
}
|
2002-03-19 04:21:19 +03:00
|
|
|
if (nam->m_len != sizeof(tmp)) {
|
|
|
|
error = EINVAL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2000-07-07 19:54:16 +04:00
|
|
|
tmp = *mtod(nam, struct sockaddr_in6 *);
|
|
|
|
dst = &tmp;
|
2002-03-19 04:21:19 +03:00
|
|
|
|
|
|
|
if (dst->sin6_family != AF_INET6) {
|
|
|
|
error = EAFNOSUPPORT;
|
|
|
|
break;
|
|
|
|
}
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
error = rip6_output(m, so, dst, control);
|
|
|
|
m = NULL;
|
|
|
|
break;
|
2001-12-18 06:04:02 +03:00
|
|
|
}
|
1999-06-28 10:36:47 +04:00
|
|
|
|
|
|
|
case PRU_SENSE:
|
|
|
|
/*
|
|
|
|
* stat: don't bother with a blocksize
|
|
|
|
*/
|
2007-02-22 12:30:33 +03:00
|
|
|
return 0;
|
1999-06-28 10:36:47 +04:00
|
|
|
/*
|
|
|
|
* Not supported.
|
|
|
|
*/
|
|
|
|
case PRU_RCVOOB:
|
|
|
|
case PRU_RCVD:
|
|
|
|
case PRU_LISTEN:
|
|
|
|
case PRU_ACCEPT:
|
|
|
|
case PRU_SENDOOB:
|
|
|
|
error = EOPNOTSUPP;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case PRU_SOCKADDR:
|
|
|
|
in6_setsockaddr(in6p, nam);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case PRU_PEERADDR:
|
|
|
|
in6_setpeeraddr(in6p, nam);
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
panic("rip6_usrreq");
|
|
|
|
}
|
|
|
|
if (m != NULL)
|
|
|
|
m_freem(m);
|
2007-02-22 12:30:33 +03:00
|
|
|
return error;
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
2005-03-09 08:07:19 +03:00
|
|
|
|
2008-04-15 09:13:37 +04:00
|
|
|
static int
|
|
|
|
sysctl_net_inet6_raw6_stats(SYSCTLFN_ARGS)
|
|
|
|
{
|
|
|
|
|
2008-05-04 11:22:14 +04:00
|
|
|
return (NETSTAT_SYSCTL(rip6stat_percpu, RIP6_NSTATS));
|
2008-04-15 09:13:37 +04:00
|
|
|
}
|
|
|
|
|
2009-09-16 19:23:04 +04:00
|
|
|
static void
|
|
|
|
sysctl_net_inet6_raw6_setup(struct sysctllog **clog)
|
2005-03-09 08:07:19 +03:00
|
|
|
{
|
|
|
|
|
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT,
|
|
|
|
CTLTYPE_NODE, "net", NULL,
|
|
|
|
NULL, 0, NULL, 0,
|
|
|
|
CTL_NET, CTL_EOL);
|
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT,
|
|
|
|
CTLTYPE_NODE, "inet6", NULL,
|
|
|
|
NULL, 0, NULL, 0,
|
|
|
|
CTL_NET, PF_INET6, CTL_EOL);
|
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT,
|
|
|
|
CTLTYPE_NODE, "raw6",
|
|
|
|
SYSCTL_DESCR("Raw IPv6 settings"),
|
|
|
|
NULL, 0, NULL, 0,
|
|
|
|
CTL_NET, PF_INET6, IPPROTO_RAW, CTL_EOL);
|
|
|
|
|
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT,
|
2005-03-11 09:16:15 +03:00
|
|
|
CTLTYPE_STRUCT, "pcblist",
|
2005-03-09 08:07:19 +03:00
|
|
|
SYSCTL_DESCR("Raw IPv6 control block list"),
|
|
|
|
sysctl_inpcblist, 0, &raw6cbtable, 0,
|
|
|
|
CTL_NET, PF_INET6, IPPROTO_RAW,
|
|
|
|
CTL_CREATE, CTL_EOL);
|
2005-08-29 01:04:09 +04:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT,
|
|
|
|
CTLTYPE_STRUCT, "stats",
|
|
|
|
SYSCTL_DESCR("Raw IPv6 statistics"),
|
2008-04-15 09:13:37 +04:00
|
|
|
sysctl_net_inet6_raw6_stats, 0, NULL, 0,
|
2005-08-29 01:04:09 +04:00
|
|
|
CTL_NET, PF_INET6, IPPROTO_RAW, RAW6CTL_STATS,
|
|
|
|
CTL_EOL);
|
2005-03-09 08:07:19 +03:00
|
|
|
}
|