2021-04-04 01:28:12 +03:00
|
|
|
/* $NetBSD: icmp6.c,v 1.247.2.1 2021/04/03 22:29:02 thorpej Exp $ */
|
2001-06-22 17:01:49 +04:00
|
|
|
/* $KAME: icmp6.c,v 1.217 2001/06/20 15:03:29 jinmei Exp $ */
|
1999-07-04 01:24:45 +04:00
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
/*
|
|
|
|
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
|
|
|
|
* All rights reserved.
|
2000-05-09 15:51:12 +04:00
|
|
|
*
|
1999-06-28 10:36:47 +04:00
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* 3. Neither the name of the project nor the names of its contributors
|
|
|
|
* may be used to endorse or promote products derived from this software
|
|
|
|
* without specific prior written permission.
|
2000-05-09 15:51:12 +04:00
|
|
|
*
|
1999-06-28 10:36:47 +04:00
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Copyright (c) 1982, 1986, 1988, 1993
|
|
|
|
* The Regents of the University of California. All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
2003-08-07 20:26:28 +04:00
|
|
|
* 3. Neither the name of the University nor the names of its contributors
|
1999-06-28 10:36:47 +04:00
|
|
|
* may be used to endorse or promote products derived from this software
|
|
|
|
* without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*
|
|
|
|
* @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94
|
|
|
|
*/
|
|
|
|
|
2001-11-13 03:56:55 +03:00
|
|
|
#include <sys/cdefs.h>
|
2021-04-04 01:28:12 +03:00
|
|
|
__KERNEL_RCSID(0, "$NetBSD: icmp6.c,v 1.247.2.1 2021/04/03 22:29:02 thorpej Exp $");
|
2001-11-13 03:56:55 +03:00
|
|
|
|
2015-08-25 01:21:26 +03:00
|
|
|
#ifdef _KERNEL_OPT
|
2020-06-12 14:04:44 +03:00
|
|
|
#include "opt_compat_netbsd.h"
|
1999-06-28 10:36:47 +04:00
|
|
|
#include "opt_inet.h"
|
1999-07-10 02:57:15 +04:00
|
|
|
#include "opt_ipsec.h"
|
2015-08-25 01:21:26 +03:00
|
|
|
#endif
|
1999-06-28 10:36:47 +04:00
|
|
|
|
|
|
|
#include <sys/param.h>
|
|
|
|
#include <sys/systm.h>
|
2014-05-19 06:51:24 +04:00
|
|
|
#include <sys/kmem.h>
|
1999-06-28 10:36:47 +04:00
|
|
|
#include <sys/mbuf.h>
|
|
|
|
#include <sys/protosw.h>
|
|
|
|
#include <sys/socket.h>
|
|
|
|
#include <sys/socketvar.h>
|
|
|
|
#include <sys/time.h>
|
|
|
|
#include <sys/kernel.h>
|
|
|
|
#include <sys/syslog.h>
|
1999-12-13 18:17:17 +03:00
|
|
|
#include <sys/domain.h>
|
2001-10-29 10:02:30 +03:00
|
|
|
#include <sys/sysctl.h>
|
1999-06-28 10:36:47 +04:00
|
|
|
|
|
|
|
#include <net/if.h>
|
|
|
|
#include <net/route.h>
|
|
|
|
#include <net/if_dl.h>
|
|
|
|
#include <net/if_types.h>
|
2020-09-11 18:03:33 +03:00
|
|
|
#include <net/nd.h>
|
1999-06-28 10:36:47 +04:00
|
|
|
|
|
|
|
#include <netinet/in.h>
|
|
|
|
#include <netinet/in_var.h>
|
2000-02-06 15:49:37 +03:00
|
|
|
#include <netinet/ip6.h>
|
2017-02-02 05:52:10 +03:00
|
|
|
#include <netinet/wqinput.h>
|
1999-06-28 10:36:47 +04:00
|
|
|
#include <netinet6/ip6_var.h>
|
2008-04-15 07:57:04 +04:00
|
|
|
#include <netinet6/ip6_private.h>
|
2000-02-06 15:49:37 +03:00
|
|
|
#include <netinet/icmp6.h>
|
2008-04-15 07:57:04 +04:00
|
|
|
#include <netinet6/icmp6_private.h>
|
1999-06-28 10:36:47 +04:00
|
|
|
#include <netinet6/mld6_var.h>
|
|
|
|
#include <netinet6/in6_pcb.h>
|
|
|
|
#include <netinet6/in6_ifattach.h>
|
1999-07-31 22:41:15 +04:00
|
|
|
#include <netinet6/ip6protosw.h>
|
2020-09-11 18:03:33 +03:00
|
|
|
#include <netinet6/nd6.h>
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
#include <netinet6/scope6_var.h>
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2013-06-05 23:01:26 +04:00
|
|
|
#ifdef IPSEC
|
2007-02-10 12:43:05 +03:00
|
|
|
#include <netipsec/ipsec.h>
|
2017-08-02 05:18:17 +03:00
|
|
|
#include <netipsec/ipsec6.h>
|
2007-02-10 12:43:05 +03:00
|
|
|
#include <netipsec/key.h>
|
|
|
|
#endif
|
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
#include "faith.h"
|
2001-05-08 14:15:13 +04:00
|
|
|
#if defined(NFAITH) && 0 < NFAITH
|
|
|
|
#include <net/if_faith.h>
|
|
|
|
#endif
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2020-07-27 17:52:55 +03:00
|
|
|
/* Ensure that non packed structures are the desired size. */
|
|
|
|
__CTASSERT(sizeof(struct icmp6_hdr) == 8);
|
|
|
|
__CTASSERT(sizeof(struct icmp6_nodeinfo) == 16);
|
|
|
|
__CTASSERT(sizeof(struct icmp6_namelookup) == 20);
|
|
|
|
__CTASSERT(sizeof(struct icmp6_router_renum) == 16);
|
|
|
|
|
|
|
|
__CTASSERT(sizeof(struct nd_router_solicit) == 8);
|
|
|
|
__CTASSERT(sizeof(struct nd_router_advert) == 16);
|
|
|
|
__CTASSERT(sizeof(struct nd_neighbor_solicit) == 24);
|
|
|
|
__CTASSERT(sizeof(struct nd_neighbor_advert) == 24);
|
|
|
|
__CTASSERT(sizeof(struct nd_redirect) == 40);
|
|
|
|
__CTASSERT(sizeof(struct nd_opt_hdr) == 2);
|
|
|
|
__CTASSERT(sizeof(struct nd_opt_route_info) == 8);
|
|
|
|
__CTASSERT(sizeof(struct nd_opt_prefix_info) == 32);
|
|
|
|
__CTASSERT(sizeof(struct nd_opt_rd_hdr) == 8);
|
|
|
|
__CTASSERT(sizeof(struct nd_opt_mtu) == 8);
|
|
|
|
__CTASSERT(sizeof(struct nd_opt_nonce) == 2 + ND_OPT_NONCE_LEN);
|
|
|
|
__CTASSERT(sizeof(struct nd_opt_rdnss) == 8);
|
|
|
|
__CTASSERT(sizeof(struct nd_opt_dnssl) == 8);
|
|
|
|
|
|
|
|
__CTASSERT(sizeof(struct mld_hdr) == 24);
|
|
|
|
__CTASSERT(sizeof(struct ni_reply_fqdn) == 8);
|
|
|
|
__CTASSERT(sizeof(struct rr_pco_match) == 24);
|
|
|
|
__CTASSERT(sizeof(struct rr_pco_use) == 32);
|
|
|
|
__CTASSERT(sizeof(struct rr_result) == 24);
|
|
|
|
|
1999-12-13 18:17:17 +03:00
|
|
|
extern struct domain inet6domain;
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2008-04-15 07:57:04 +04:00
|
|
|
percpu_t *icmp6stat_percpu;
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2003-09-04 13:16:57 +04:00
|
|
|
extern struct inpcbtable raw6cbtable;
|
2000-07-06 16:36:18 +04:00
|
|
|
extern int icmp6errppslim;
|
|
|
|
static int icmp6errpps_count = 0;
|
2000-07-09 10:44:57 +04:00
|
|
|
static struct timeval icmp6errppslim_last;
|
2000-01-02 19:31:17 +03:00
|
|
|
extern int icmp6_nodeinfo;
|
2000-10-19 01:14:12 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* List of callbacks to notify when Path MTU changes are made.
|
|
|
|
*/
|
|
|
|
struct icmp6_mtudisc_callback {
|
|
|
|
LIST_ENTRY(icmp6_mtudisc_callback) mc_list;
|
2007-11-01 23:33:56 +03:00
|
|
|
void (*mc_func)(struct in6_addr *);
|
2000-10-19 01:14:12 +04:00
|
|
|
};
|
|
|
|
|
|
|
|
LIST_HEAD(, icmp6_mtudisc_callback) icmp6_mtudisc_callbacks =
|
|
|
|
LIST_HEAD_INITIALIZER(&icmp6_mtudisc_callbacks);
|
|
|
|
|
1999-07-22 16:56:56 +04:00
|
|
|
static struct rttimer_queue *icmp6_mtudisc_timeout_q = NULL;
|
|
|
|
extern int pmtu_expire;
|
|
|
|
|
2000-12-09 04:29:45 +03:00
|
|
|
/* XXX do these values make any sense? */
|
2001-02-08 19:07:39 +03:00
|
|
|
static int icmp6_mtudisc_hiwat = 1280;
|
|
|
|
static int icmp6_mtudisc_lowat = 256;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* keep track of # of redirect routes.
|
|
|
|
*/
|
|
|
|
static struct rttimer_queue *icmp6_redirect_timeout_q = NULL;
|
|
|
|
|
|
|
|
/* XXX experimental, turned off */
|
|
|
|
static int icmp6_redirect_hiwat = -1;
|
|
|
|
static int icmp6_redirect_lowat = -1;
|
2000-12-09 04:29:45 +03:00
|
|
|
|
2017-02-13 10:18:20 +03:00
|
|
|
/* Protect mtudisc and redirect stuffs */
|
|
|
|
static kmutex_t icmp6_mtx __cacheline_aligned;
|
|
|
|
|
2008-04-15 07:57:04 +04:00
|
|
|
static void icmp6_errcount(u_int, int, int);
|
2007-11-01 23:33:56 +03:00
|
|
|
static int icmp6_rip6_input(struct mbuf **, int);
|
2018-01-23 13:55:38 +03:00
|
|
|
static void icmp6_reflect(struct mbuf *, size_t);
|
2007-11-01 23:33:56 +03:00
|
|
|
static int icmp6_ratelimit(const struct in6_addr *, const int, const int);
|
2017-01-16 10:33:36 +03:00
|
|
|
static const char *icmp6_redirect_diag(char *, size_t, struct in6_addr *,
|
2018-01-23 13:55:38 +03:00
|
|
|
struct in6_addr *, struct in6_addr *);
|
|
|
|
static void icmp6_redirect_input(struct mbuf *, int);
|
2007-11-01 23:33:56 +03:00
|
|
|
static struct mbuf *ni6_input(struct mbuf *, int);
|
|
|
|
static struct mbuf *ni6_nametodns(const char *, int, int);
|
|
|
|
static int ni6_dnsmatch(const char *, int, const char *, int);
|
2018-01-23 13:55:38 +03:00
|
|
|
static int ni6_addrs(struct icmp6_nodeinfo *, struct ifnet **, char *,
|
|
|
|
struct psref *);
|
2007-11-01 23:33:56 +03:00
|
|
|
static int ni6_store_addrs(struct icmp6_nodeinfo *, struct icmp6_nodeinfo *,
|
2018-01-23 13:55:38 +03:00
|
|
|
struct ifnet *, int);
|
2007-11-01 23:33:56 +03:00
|
|
|
static int icmp6_notify_error(struct mbuf *, int, int, int);
|
|
|
|
static struct rtentry *icmp6_mtudisc_clone(struct sockaddr *);
|
|
|
|
static void icmp6_mtudisc_timeout(struct rtentry *, struct rttimer *);
|
|
|
|
static void icmp6_redirect_timeout(struct rtentry *, struct rttimer *);
|
2009-09-16 19:23:04 +04:00
|
|
|
static void sysctl_net_inet6_icmp6_setup(struct sysctllog **);
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2017-02-02 05:52:10 +03:00
|
|
|
/* workqueue-based pr_input */
|
|
|
|
static struct wqinput *icmp6_wqinput;
|
|
|
|
static void _icmp6_input(struct mbuf *m, int off, int proto);
|
2006-04-15 04:24:12 +04:00
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
void
|
2008-02-27 22:40:56 +03:00
|
|
|
icmp6_init(void)
|
1999-06-28 10:36:47 +04:00
|
|
|
{
|
2009-09-16 19:23:04 +04:00
|
|
|
|
|
|
|
sysctl_net_inet6_icmp6_setup(NULL);
|
2006-03-06 02:47:08 +03:00
|
|
|
mld_init();
|
2017-02-13 10:18:20 +03:00
|
|
|
|
|
|
|
mutex_init(&icmp6_mtx, MUTEX_DEFAULT, IPL_NONE);
|
|
|
|
mutex_enter(&icmp6_mtx);
|
1999-07-22 16:56:56 +04:00
|
|
|
icmp6_mtudisc_timeout_q = rt_timer_queue_create(pmtu_expire);
|
2001-02-08 19:07:39 +03:00
|
|
|
icmp6_redirect_timeout_q = rt_timer_queue_create(icmp6_redirtimeout);
|
2017-02-13 10:18:20 +03:00
|
|
|
mutex_exit(&icmp6_mtx);
|
2008-04-15 07:57:04 +04:00
|
|
|
|
|
|
|
icmp6stat_percpu = percpu_alloc(sizeof(uint64_t) * ICMP6_NSTATS);
|
2017-02-02 05:52:10 +03:00
|
|
|
|
|
|
|
icmp6_wqinput = wqinput_create("icmp6", _icmp6_input);
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
|
2000-07-06 16:36:18 +04:00
|
|
|
static void
|
2008-04-15 07:57:04 +04:00
|
|
|
icmp6_errcount(u_int base, int type, int code)
|
2000-07-06 16:36:18 +04:00
|
|
|
{
|
2001-02-10 07:14:26 +03:00
|
|
|
switch (type) {
|
2000-07-06 16:36:18 +04:00
|
|
|
case ICMP6_DST_UNREACH:
|
|
|
|
switch (code) {
|
|
|
|
case ICMP6_DST_UNREACH_NOROUTE:
|
2008-04-15 07:57:04 +04:00
|
|
|
ICMP6_STATINC(base + ICMP6_ERRSTAT_DST_UNREACH_NOROUTE);
|
2000-07-06 16:36:18 +04:00
|
|
|
return;
|
|
|
|
case ICMP6_DST_UNREACH_ADMIN:
|
2008-04-15 07:57:04 +04:00
|
|
|
ICMP6_STATINC(base + ICMP6_ERRSTAT_DST_UNREACH_ADMIN);
|
2000-07-06 16:36:18 +04:00
|
|
|
return;
|
|
|
|
case ICMP6_DST_UNREACH_BEYONDSCOPE:
|
2008-04-15 07:57:04 +04:00
|
|
|
ICMP6_STATINC(base +
|
|
|
|
ICMP6_ERRSTAT_DST_UNREACH_BEYONDSCOPE);
|
2000-07-06 16:36:18 +04:00
|
|
|
return;
|
|
|
|
case ICMP6_DST_UNREACH_ADDR:
|
2008-04-15 07:57:04 +04:00
|
|
|
ICMP6_STATINC(base + ICMP6_ERRSTAT_DST_UNREACH_ADDR);
|
2000-07-06 16:36:18 +04:00
|
|
|
return;
|
|
|
|
case ICMP6_DST_UNREACH_NOPORT:
|
2008-04-15 07:57:04 +04:00
|
|
|
ICMP6_STATINC(base + ICMP6_ERRSTAT_DST_UNREACH_NOPORT);
|
2000-07-06 16:36:18 +04:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case ICMP6_PACKET_TOO_BIG:
|
2008-04-15 07:57:04 +04:00
|
|
|
ICMP6_STATINC(base + ICMP6_ERRSTAT_PACKET_TOO_BIG);
|
2000-07-06 16:36:18 +04:00
|
|
|
return;
|
|
|
|
case ICMP6_TIME_EXCEEDED:
|
2001-02-10 07:14:26 +03:00
|
|
|
switch (code) {
|
2000-07-06 16:36:18 +04:00
|
|
|
case ICMP6_TIME_EXCEED_TRANSIT:
|
2008-04-15 07:57:04 +04:00
|
|
|
ICMP6_STATINC(base + ICMP6_ERRSTAT_TIME_EXCEED_TRANSIT);
|
2000-07-06 16:36:18 +04:00
|
|
|
return;
|
|
|
|
case ICMP6_TIME_EXCEED_REASSEMBLY:
|
2008-04-15 07:57:04 +04:00
|
|
|
ICMP6_STATINC(base +
|
|
|
|
ICMP6_ERRSTAT_TIME_EXCEED_REASSEMBLY);
|
2000-07-06 16:36:18 +04:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case ICMP6_PARAM_PROB:
|
2001-02-10 07:14:26 +03:00
|
|
|
switch (code) {
|
2000-07-06 16:36:18 +04:00
|
|
|
case ICMP6_PARAMPROB_HEADER:
|
2008-04-15 07:57:04 +04:00
|
|
|
ICMP6_STATINC(base + ICMP6_ERRSTAT_PARAMPROB_HEADER);
|
2000-07-06 16:36:18 +04:00
|
|
|
return;
|
|
|
|
case ICMP6_PARAMPROB_NEXTHEADER:
|
2008-04-15 07:57:04 +04:00
|
|
|
ICMP6_STATINC(base +
|
|
|
|
ICMP6_ERRSTAT_PARAMPROB_NEXTHEADER);
|
2000-07-06 16:36:18 +04:00
|
|
|
return;
|
|
|
|
case ICMP6_PARAMPROB_OPTION:
|
2008-04-15 07:57:04 +04:00
|
|
|
ICMP6_STATINC(base + ICMP6_ERRSTAT_PARAMPROB_OPTION);
|
2000-07-06 16:36:18 +04:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case ND_REDIRECT:
|
2008-04-15 07:57:04 +04:00
|
|
|
ICMP6_STATINC(base + ICMP6_ERRSTAT_REDIRECT);
|
2000-07-06 16:36:18 +04:00
|
|
|
return;
|
|
|
|
}
|
2008-04-15 07:57:04 +04:00
|
|
|
ICMP6_STATINC(base + ICMP6_ERRSTAT_UNKNOWN);
|
2000-07-06 16:36:18 +04:00
|
|
|
}
|
|
|
|
|
2000-10-19 01:14:12 +04:00
|
|
|
/*
|
|
|
|
* Register a Path MTU Discovery callback.
|
|
|
|
*/
|
|
|
|
void
|
2007-11-01 23:33:56 +03:00
|
|
|
icmp6_mtudisc_callback_register(void (*func)(struct in6_addr *))
|
2000-10-19 01:14:12 +04:00
|
|
|
{
|
2017-02-13 10:18:20 +03:00
|
|
|
struct icmp6_mtudisc_callback *mc, *new;
|
|
|
|
|
|
|
|
new = kmem_alloc(sizeof(*mc), KM_SLEEP);
|
2000-10-19 01:14:12 +04:00
|
|
|
|
2017-02-13 10:18:20 +03:00
|
|
|
mutex_enter(&icmp6_mtx);
|
2000-10-19 01:14:12 +04:00
|
|
|
for (mc = LIST_FIRST(&icmp6_mtudisc_callbacks); mc != NULL;
|
|
|
|
mc = LIST_NEXT(mc, mc_list)) {
|
2017-02-13 10:18:20 +03:00
|
|
|
if (mc->mc_func == func) {
|
|
|
|
mutex_exit(&icmp6_mtx);
|
|
|
|
kmem_free(new, sizeof(*mc));
|
2000-10-19 01:14:12 +04:00
|
|
|
return;
|
2017-02-13 10:18:20 +03:00
|
|
|
}
|
2000-10-19 01:14:12 +04:00
|
|
|
}
|
|
|
|
|
2017-02-13 10:18:20 +03:00
|
|
|
new->mc_func = func;
|
|
|
|
LIST_INSERT_HEAD(&icmp6_mtudisc_callbacks, new, mc_list);
|
|
|
|
mutex_exit(&icmp6_mtx);
|
2000-10-19 01:14:12 +04:00
|
|
|
}
|
|
|
|
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
/*
|
|
|
|
* A wrapper function for icmp6_error() necessary when the erroneous packet
|
|
|
|
* may not contain enough scope zone information.
|
|
|
|
*/
|
|
|
|
void
|
2018-01-23 13:55:38 +03:00
|
|
|
icmp6_error2(struct mbuf *m, int type, int code, int param,
|
2020-03-10 00:20:55 +03:00
|
|
|
struct ifnet *ifp, struct in6_addr *src)
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
{
|
|
|
|
struct ip6_hdr *ip6;
|
|
|
|
|
2018-01-23 10:02:57 +03:00
|
|
|
KASSERT(ifp != NULL);
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
|
|
|
|
if (m->m_len < sizeof(struct ip6_hdr)) {
|
|
|
|
m = m_pullup(m, sizeof(struct ip6_hdr));
|
|
|
|
if (m == NULL)
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
ip6 = mtod(m, struct ip6_hdr *);
|
|
|
|
|
|
|
|
if (in6_setscope(&ip6->ip6_src, ifp, NULL) != 0)
|
2018-01-23 10:02:57 +03:00
|
|
|
goto out;
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
if (in6_setscope(&ip6->ip6_dst, ifp, NULL) != 0)
|
2018-01-23 10:02:57 +03:00
|
|
|
goto out;
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
|
2020-03-10 00:20:55 +03:00
|
|
|
*src = ip6->ip6_src;
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
icmp6_error(m, type, code, param);
|
2018-01-23 10:02:57 +03:00
|
|
|
return;
|
|
|
|
|
|
|
|
out:
|
|
|
|
m_freem(m);
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
}
|
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
/*
|
|
|
|
* Generate an error packet of type error in response to bad IP6 packet.
|
|
|
|
*/
|
|
|
|
void
|
2007-05-23 21:14:59 +04:00
|
|
|
icmp6_error(struct mbuf *m, int type, int code, int param)
|
1999-06-28 10:36:47 +04:00
|
|
|
{
|
|
|
|
struct ip6_hdr *oip6, *nip6;
|
|
|
|
struct icmp6_hdr *icmp6;
|
2000-01-07 09:44:30 +03:00
|
|
|
u_int preplen;
|
1999-06-28 10:36:47 +04:00
|
|
|
int off;
|
2000-03-22 07:42:01 +03:00
|
|
|
int nxt;
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2008-04-15 07:57:04 +04:00
|
|
|
ICMP6_STATINC(ICMP6_STAT_ERROR);
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2000-07-06 16:36:18 +04:00
|
|
|
/* count per-type-code statistics */
|
2008-04-15 07:57:04 +04:00
|
|
|
icmp6_errcount(ICMP6_STAT_OUTERRHIST, type, code);
|
2000-07-06 16:36:18 +04:00
|
|
|
|
2000-02-26 11:39:18 +03:00
|
|
|
if (m->m_flags & M_DECRYPTED) {
|
2008-04-15 07:57:04 +04:00
|
|
|
ICMP6_STATINC(ICMP6_STAT_CANTERROR);
|
1999-06-28 10:36:47 +04:00
|
|
|
goto freeit;
|
2000-02-26 11:39:18 +03:00
|
|
|
}
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2007-06-13 09:03:19 +04:00
|
|
|
if (M_UNWRITABLE(m, sizeof(struct ip6_hdr)) &&
|
|
|
|
(m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL)
|
|
|
|
return;
|
1999-06-28 10:36:47 +04:00
|
|
|
oip6 = mtod(m, struct ip6_hdr *);
|
|
|
|
|
|
|
|
/*
|
2001-10-15 15:12:44 +04:00
|
|
|
* If the destination address of the erroneous packet is a multicast
|
|
|
|
* address, or the packet was sent using link-layer multicast,
|
|
|
|
* we should basically suppress sending an error (RFC 2463, Section
|
|
|
|
* 2.4).
|
|
|
|
* We have two exceptions (the item e.2 in that section):
|
2018-01-23 10:02:57 +03:00
|
|
|
* - the Packet Too Big message can be sent for path MTU discovery.
|
2001-10-15 15:12:44 +04:00
|
|
|
* - the Parameter Problem Message that can be allowed an icmp6 error
|
|
|
|
* in the option type field. This check has been done in
|
|
|
|
* ip6_unknown_opt(), so we can just check the type and code.
|
1999-06-28 10:36:47 +04:00
|
|
|
*/
|
|
|
|
if ((m->m_flags & (M_BCAST|M_MCAST) ||
|
|
|
|
IN6_IS_ADDR_MULTICAST(&oip6->ip6_dst)) &&
|
|
|
|
(type != ICMP6_PACKET_TOO_BIG &&
|
|
|
|
(type != ICMP6_PARAM_PROB ||
|
|
|
|
code != ICMP6_PARAMPROB_OPTION)))
|
|
|
|
goto freeit;
|
|
|
|
|
2001-10-15 15:12:44 +04:00
|
|
|
/*
|
|
|
|
* RFC 2463, 2.4 (e.5): source address check.
|
|
|
|
* XXX: the case of anycast source?
|
|
|
|
*/
|
1999-06-28 10:36:47 +04:00
|
|
|
if (IN6_IS_ADDR_UNSPECIFIED(&oip6->ip6_src) ||
|
|
|
|
IN6_IS_ADDR_MULTICAST(&oip6->ip6_src))
|
|
|
|
goto freeit;
|
|
|
|
|
|
|
|
/*
|
2000-03-22 07:42:01 +03:00
|
|
|
* If we are about to send ICMPv6 against ICMPv6 error/redirect,
|
|
|
|
* don't do it.
|
1999-06-28 10:36:47 +04:00
|
|
|
*/
|
2000-03-22 07:42:01 +03:00
|
|
|
nxt = -1;
|
2000-04-13 18:07:10 +04:00
|
|
|
off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt);
|
2000-03-22 07:42:01 +03:00
|
|
|
if (off >= 0 && nxt == IPPROTO_ICMPV6) {
|
1999-06-28 10:36:47 +04:00
|
|
|
struct icmp6_hdr *icp;
|
|
|
|
|
2000-03-22 07:42:01 +03:00
|
|
|
IP6_EXTHDR_GET(icp, struct icmp6_hdr *, m, off,
|
|
|
|
sizeof(*icp));
|
|
|
|
if (icp == NULL) {
|
2008-04-15 07:57:04 +04:00
|
|
|
ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
|
2000-03-22 07:42:01 +03:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (icp->icmp6_type < ICMP6_ECHO_REQUEST ||
|
|
|
|
icp->icmp6_type == ND_REDIRECT) {
|
|
|
|
/*
|
|
|
|
* ICMPv6 error
|
|
|
|
* Special case: for redirect (which is
|
|
|
|
* informational) we must not send icmp6 error.
|
|
|
|
*/
|
2008-04-15 07:57:04 +04:00
|
|
|
ICMP6_STATINC(ICMP6_STAT_CANTERROR);
|
2000-03-22 07:42:01 +03:00
|
|
|
goto freeit;
|
|
|
|
} else {
|
|
|
|
/* ICMPv6 informational - send the error */
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
2018-01-23 10:02:57 +03:00
|
|
|
} else {
|
2000-03-22 07:42:01 +03:00
|
|
|
/* non-ICMPv6 - send the error */
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
oip6 = mtod(m, struct ip6_hdr *); /* adjust pointer */
|
|
|
|
|
|
|
|
/* Finally, do rate limitation check. */
|
|
|
|
if (icmp6_ratelimit(&oip6->ip6_src, type, code)) {
|
2008-04-15 07:57:04 +04:00
|
|
|
ICMP6_STATINC(ICMP6_STAT_TOOFREQ);
|
1999-06-28 10:36:47 +04:00
|
|
|
goto freeit;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* OK, ICMP6 can be generated.
|
|
|
|
*/
|
|
|
|
|
|
|
|
if (m->m_pkthdr.len >= ICMPV6_PLD_MAXLEN)
|
|
|
|
m_adj(m, ICMPV6_PLD_MAXLEN - m->m_pkthdr.len);
|
|
|
|
|
2000-01-07 09:44:30 +03:00
|
|
|
preplen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
|
|
|
|
M_PREPEND(m, preplen, M_DONTWAIT);
|
2007-06-13 09:03:19 +04:00
|
|
|
if (m && M_UNWRITABLE(m, preplen))
|
2000-01-07 09:44:30 +03:00
|
|
|
m = m_pullup(m, preplen);
|
1999-06-28 10:36:47 +04:00
|
|
|
if (m == NULL) {
|
2016-04-01 11:12:00 +03:00
|
|
|
nd6log(LOG_DEBUG, "ENOBUFS in icmp6_error %d\n", __LINE__);
|
1999-06-28 10:36:47 +04:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
nip6 = mtod(m, struct ip6_hdr *);
|
|
|
|
nip6->ip6_src = oip6->ip6_src;
|
|
|
|
nip6->ip6_dst = oip6->ip6_dst;
|
|
|
|
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
in6_clearscope(&oip6->ip6_src);
|
|
|
|
in6_clearscope(&oip6->ip6_dst);
|
1999-06-28 10:36:47 +04:00
|
|
|
|
|
|
|
icmp6 = (struct icmp6_hdr *)(nip6 + 1);
|
|
|
|
icmp6->icmp6_type = type;
|
|
|
|
icmp6->icmp6_code = code;
|
1999-07-22 07:59:42 +04:00
|
|
|
icmp6->icmp6_pptr = htonl((u_int32_t)param);
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2001-04-04 10:28:40 +04:00
|
|
|
/*
|
|
|
|
* icmp6_reflect() is designed to be in the input path.
|
2006-03-03 17:07:06 +03:00
|
|
|
* icmp6_error() can be called from both input and output path,
|
2001-04-04 10:28:40 +04:00
|
|
|
* and if we are in output path rcvif could contain bogus value.
|
|
|
|
* clear m->m_pkthdr.rcvif for safety, we should have enough scope
|
|
|
|
* information in ip header (nip6).
|
|
|
|
*/
|
2016-06-10 16:27:10 +03:00
|
|
|
m_reset_rcvif(m);
|
2001-04-04 10:28:40 +04:00
|
|
|
|
2008-04-15 07:57:04 +04:00
|
|
|
ICMP6_STATINC(ICMP6_STAT_OUTHIST + type);
|
2018-01-23 10:02:57 +03:00
|
|
|
|
|
|
|
/* header order: IPv6 - ICMPv6 */
|
|
|
|
icmp6_reflect(m, sizeof(struct ip6_hdr));
|
2000-03-22 07:42:01 +03:00
|
|
|
|
|
|
|
return;
|
|
|
|
|
2018-01-23 10:02:57 +03:00
|
|
|
freeit:
|
2000-03-22 07:42:01 +03:00
|
|
|
/*
|
2006-03-03 17:07:06 +03:00
|
|
|
* If we can't tell whether or not we can generate ICMP6, free it.
|
2000-03-22 07:42:01 +03:00
|
|
|
*/
|
|
|
|
m_freem(m);
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Process a received ICMP6 message.
|
|
|
|
*/
|
2017-02-02 05:52:10 +03:00
|
|
|
static void
|
|
|
|
_icmp6_input(struct mbuf *m, int off, int proto)
|
1999-06-28 10:36:47 +04:00
|
|
|
{
|
2017-02-02 05:52:10 +03:00
|
|
|
struct mbuf *n;
|
1999-06-28 10:36:47 +04:00
|
|
|
struct ip6_hdr *ip6, *nip6;
|
|
|
|
struct icmp6_hdr *icmp6, *nicmp6;
|
2017-01-13 13:38:37 +03:00
|
|
|
int icmp6len = m->m_pkthdr.len - off;
|
2018-01-23 10:02:57 +03:00
|
|
|
int code, sum;
|
2016-06-10 16:31:43 +03:00
|
|
|
struct ifnet *rcvif;
|
|
|
|
struct psref psref;
|
2017-01-16 10:33:36 +03:00
|
|
|
char ip6buf[INET6_ADDRSTRLEN], ip6buf2[INET6_ADDRSTRLEN];
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2016-06-10 16:31:43 +03:00
|
|
|
rcvif = m_get_rcvif_psref(m, &psref);
|
2016-06-28 05:02:56 +03:00
|
|
|
if (__predict_false(rcvif == NULL))
|
|
|
|
goto freeit;
|
|
|
|
|
2006-04-15 04:24:12 +04:00
|
|
|
#define ICMP6_MAXLEN (sizeof(*nip6) + sizeof(*nicmp6) + 4)
|
|
|
|
KASSERT(ICMP6_MAXLEN < MCLBYTES);
|
2016-05-17 06:27:02 +03:00
|
|
|
icmp6_ifstat_inc(rcvif, ifs6_in_msg);
|
2001-12-07 13:10:43 +03:00
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
/*
|
|
|
|
* Locate icmp6 structure in mbuf, and check
|
|
|
|
* that not corrupted and of at least minimum length
|
|
|
|
*/
|
|
|
|
|
|
|
|
if (icmp6len < sizeof(struct icmp6_hdr)) {
|
2008-04-15 07:57:04 +04:00
|
|
|
ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
|
2016-05-17 06:27:02 +03:00
|
|
|
icmp6_ifstat_inc(rcvif, ifs6_in_error);
|
1999-06-28 10:36:47 +04:00
|
|
|
goto freeit;
|
|
|
|
}
|
|
|
|
|
2017-07-07 03:55:15 +03:00
|
|
|
if (m->m_len < sizeof(struct ip6_hdr)) {
|
|
|
|
m = m_pullup(m, sizeof(struct ip6_hdr));
|
|
|
|
if (m == NULL) {
|
|
|
|
ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
|
|
|
|
icmp6_ifstat_inc(rcvif, ifs6_in_error);
|
|
|
|
goto freeit;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-05-07 15:28:37 +04:00
|
|
|
ip6 = mtod(m, struct ip6_hdr *);
|
1999-12-13 18:17:17 +03:00
|
|
|
IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6));
|
|
|
|
if (icmp6 == NULL) {
|
2008-04-15 07:57:04 +04:00
|
|
|
ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
|
2016-08-19 15:26:01 +03:00
|
|
|
icmp6_ifstat_inc(rcvif, ifs6_in_error);
|
|
|
|
goto freeit;
|
1999-12-13 18:17:17 +03:00
|
|
|
}
|
2018-01-23 10:02:57 +03:00
|
|
|
|
2016-11-15 23:50:28 +03:00
|
|
|
/*
|
|
|
|
* Enforce alignment requirements that are violated in
|
|
|
|
* some cases, see kern/50766 for details.
|
|
|
|
*/
|
2021-04-04 01:28:12 +03:00
|
|
|
if (ACCESSIBLE_POINTER(icmp6, struct ip6_hdr) == 0) {
|
2016-11-15 23:50:28 +03:00
|
|
|
m = m_copyup(m, off + sizeof(struct icmp6_hdr), 0);
|
|
|
|
if (m == NULL) {
|
|
|
|
ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
|
|
|
|
icmp6_ifstat_inc(rcvif, ifs6_in_error);
|
|
|
|
goto freeit;
|
|
|
|
}
|
|
|
|
ip6 = mtod(m, struct ip6_hdr *);
|
2018-01-23 10:02:57 +03:00
|
|
|
icmp6 = (struct icmp6_hdr *)(mtod(m, char *) + off);
|
2016-11-15 23:50:28 +03:00
|
|
|
}
|
2021-04-04 01:28:12 +03:00
|
|
|
KASSERT(ACCESSIBLE_POINTER(icmp6, struct ip6_hdr));
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2016-08-19 15:26:01 +03:00
|
|
|
/*
|
|
|
|
* calculate the checksum
|
|
|
|
*/
|
1999-06-28 10:36:47 +04:00
|
|
|
if ((sum = in6_cksum(m, IPPROTO_ICMPV6, off, icmp6len)) != 0) {
|
2016-04-01 11:12:00 +03:00
|
|
|
nd6log(LOG_ERR, "ICMP6 checksum error(%d|%x) %s\n",
|
2017-01-16 18:44:46 +03:00
|
|
|
icmp6->icmp6_type, sum, IN6_PRINT(ip6buf, &ip6->ip6_src));
|
2008-04-15 07:57:04 +04:00
|
|
|
ICMP6_STATINC(ICMP6_STAT_CHECKSUM);
|
2016-05-17 06:27:02 +03:00
|
|
|
icmp6_ifstat_inc(rcvif, ifs6_in_error);
|
1999-06-28 10:36:47 +04:00
|
|
|
goto freeit;
|
|
|
|
}
|
|
|
|
|
|
|
|
#if defined(NFAITH) && 0 < NFAITH
|
2001-05-08 14:15:13 +04:00
|
|
|
if (faithprefix(&ip6->ip6_dst)) {
|
1999-06-28 10:36:47 +04:00
|
|
|
/*
|
|
|
|
* Deliver very specific ICMP6 type only.
|
2006-03-03 17:07:06 +03:00
|
|
|
* This is important to deliver TOOBIG. Otherwise PMTUD
|
1999-06-28 10:36:47 +04:00
|
|
|
* will not work.
|
|
|
|
*/
|
|
|
|
switch (icmp6->icmp6_type) {
|
|
|
|
case ICMP6_DST_UNREACH:
|
|
|
|
case ICMP6_PACKET_TOO_BIG:
|
|
|
|
case ICMP6_TIME_EXCEEDED:
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
goto freeit;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2016-08-19 15:26:01 +03:00
|
|
|
code = icmp6->icmp6_code;
|
2008-04-15 07:57:04 +04:00
|
|
|
ICMP6_STATINC(ICMP6_STAT_INHIST + icmp6->icmp6_type);
|
1999-06-28 10:36:47 +04:00
|
|
|
|
|
|
|
switch (icmp6->icmp6_type) {
|
|
|
|
case ICMP6_DST_UNREACH:
|
2016-05-17 06:27:02 +03:00
|
|
|
icmp6_ifstat_inc(rcvif, ifs6_in_dstunreach);
|
1999-06-28 10:36:47 +04:00
|
|
|
switch (code) {
|
|
|
|
case ICMP6_DST_UNREACH_NOROUTE:
|
|
|
|
code = PRC_UNREACH_NET;
|
|
|
|
break;
|
|
|
|
case ICMP6_DST_UNREACH_ADMIN:
|
2016-05-17 06:27:02 +03:00
|
|
|
icmp6_ifstat_inc(rcvif, ifs6_in_adminprohib);
|
2000-02-26 11:39:18 +03:00
|
|
|
code = PRC_UNREACH_PROTOCOL; /* is this a good code? */
|
|
|
|
break;
|
1999-06-28 10:36:47 +04:00
|
|
|
case ICMP6_DST_UNREACH_ADDR:
|
2000-02-26 11:39:18 +03:00
|
|
|
code = PRC_HOSTDEAD;
|
1999-06-28 10:36:47 +04:00
|
|
|
break;
|
2000-02-26 11:39:18 +03:00
|
|
|
case ICMP6_DST_UNREACH_BEYONDSCOPE:
|
|
|
|
/* I mean "source address was incorrect." */
|
2001-10-15 15:12:44 +04:00
|
|
|
code = PRC_UNREACH_NET;
|
2000-02-26 11:39:18 +03:00
|
|
|
break;
|
1999-06-28 10:36:47 +04:00
|
|
|
case ICMP6_DST_UNREACH_NOPORT:
|
|
|
|
code = PRC_UNREACH_PORT;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
goto badcode;
|
|
|
|
}
|
|
|
|
goto deliver;
|
|
|
|
|
|
|
|
case ICMP6_PACKET_TOO_BIG:
|
2016-05-17 06:27:02 +03:00
|
|
|
icmp6_ifstat_inc(rcvif, ifs6_in_pkttoobig);
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2008-10-03 12:23:06 +04:00
|
|
|
/*
|
|
|
|
* MTU is checked in icmp6_mtudisc.
|
|
|
|
*/
|
1999-06-28 10:36:47 +04:00
|
|
|
code = PRC_MSGSIZE;
|
|
|
|
|
2000-02-26 11:39:18 +03:00
|
|
|
/*
|
|
|
|
* Updating the path MTU will be done after examining
|
|
|
|
* intermediate extension headers.
|
|
|
|
*/
|
1999-06-28 10:36:47 +04:00
|
|
|
goto deliver;
|
|
|
|
|
|
|
|
case ICMP6_TIME_EXCEEDED:
|
2016-05-17 06:27:02 +03:00
|
|
|
icmp6_ifstat_inc(rcvif, ifs6_in_timeexceed);
|
1999-06-28 10:36:47 +04:00
|
|
|
switch (code) {
|
|
|
|
case ICMP6_TIME_EXCEED_TRANSIT:
|
2001-10-15 15:12:44 +04:00
|
|
|
code = PRC_TIMXCEED_INTRANS;
|
|
|
|
break;
|
1999-06-28 10:36:47 +04:00
|
|
|
case ICMP6_TIME_EXCEED_REASSEMBLY:
|
2001-10-15 15:12:44 +04:00
|
|
|
code = PRC_TIMXCEED_REASS;
|
1999-06-28 10:36:47 +04:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
goto badcode;
|
|
|
|
}
|
|
|
|
goto deliver;
|
|
|
|
|
|
|
|
case ICMP6_PARAM_PROB:
|
2016-05-17 06:27:02 +03:00
|
|
|
icmp6_ifstat_inc(rcvif, ifs6_in_paramprob);
|
1999-06-28 10:36:47 +04:00
|
|
|
switch (code) {
|
|
|
|
case ICMP6_PARAMPROB_NEXTHEADER:
|
|
|
|
code = PRC_UNREACH_PROTOCOL;
|
|
|
|
break;
|
|
|
|
case ICMP6_PARAMPROB_HEADER:
|
|
|
|
case ICMP6_PARAMPROB_OPTION:
|
|
|
|
code = PRC_PARAMPROB;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
goto badcode;
|
|
|
|
}
|
|
|
|
goto deliver;
|
|
|
|
|
|
|
|
case ICMP6_ECHO_REQUEST:
|
2016-05-17 06:27:02 +03:00
|
|
|
icmp6_ifstat_inc(rcvif, ifs6_in_echo);
|
1999-06-28 10:36:47 +04:00
|
|
|
if (code != 0)
|
|
|
|
goto badcode;
|
2001-10-15 15:12:44 +04:00
|
|
|
/*
|
|
|
|
* Copy mbuf to send to two data paths: userland socket(s),
|
|
|
|
* and to the querier (echo reply).
|
|
|
|
* m: a copy for socket, n: a copy for querier
|
2006-09-01 06:44:46 +04:00
|
|
|
*
|
|
|
|
* If the first mbuf is shared, or the first mbuf is too short,
|
|
|
|
* copy the first part of the data into a fresh mbuf.
|
|
|
|
* Otherwise, we will wrongly overwrite both copies.
|
2001-10-15 15:12:44 +04:00
|
|
|
*/
|
2018-04-29 10:05:13 +03:00
|
|
|
if ((n = m_copypacket(m, M_DONTWAIT)) == NULL) {
|
2001-10-15 15:12:44 +04:00
|
|
|
/* Give up local */
|
|
|
|
n = m;
|
|
|
|
m = NULL;
|
2018-04-26 10:28:21 +03:00
|
|
|
} else if (M_UNWRITABLE(n, off + sizeof(struct icmp6_hdr))) {
|
1999-06-28 10:36:47 +04:00
|
|
|
struct mbuf *n0 = n;
|
|
|
|
|
|
|
|
/*
|
2001-10-15 15:12:44 +04:00
|
|
|
* Prepare an internal mbuf. m_pullup() doesn't
|
1999-06-28 10:36:47 +04:00
|
|
|
* always copy the length we specified.
|
|
|
|
*/
|
2006-09-05 20:11:26 +04:00
|
|
|
if ((n = m_dup(n0, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
|
2001-10-15 15:12:44 +04:00
|
|
|
/* Give up local */
|
|
|
|
n = m;
|
|
|
|
m = NULL;
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
2006-09-01 06:44:46 +04:00
|
|
|
m_freem(n0);
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
2007-01-16 00:49:56 +03:00
|
|
|
IP6_EXTHDR_GET(nicmp6, struct icmp6_hdr *, n, off,
|
|
|
|
sizeof(*nicmp6));
|
2014-02-20 17:36:06 +04:00
|
|
|
if (nicmp6 == NULL)
|
|
|
|
goto freeit;
|
1999-06-28 10:36:47 +04:00
|
|
|
nicmp6->icmp6_type = ICMP6_ECHO_REPLY;
|
|
|
|
nicmp6->icmp6_code = 0;
|
1999-07-31 22:41:15 +04:00
|
|
|
if (n) {
|
2008-04-15 07:57:04 +04:00
|
|
|
uint64_t *icmp6s = ICMP6_STAT_GETREF();
|
|
|
|
icmp6s[ICMP6_STAT_REFLECT]++;
|
|
|
|
icmp6s[ICMP6_STAT_OUTHIST + ICMP6_ECHO_REPLY]++;
|
|
|
|
ICMP6_STAT_PUTREF();
|
2006-09-01 06:44:46 +04:00
|
|
|
icmp6_reflect(n, off);
|
1999-07-31 22:41:15 +04:00
|
|
|
}
|
2001-10-15 15:12:44 +04:00
|
|
|
if (!m)
|
|
|
|
goto freeit;
|
1999-06-28 10:36:47 +04:00
|
|
|
break;
|
|
|
|
|
|
|
|
case ICMP6_ECHO_REPLY:
|
2016-05-17 06:27:02 +03:00
|
|
|
icmp6_ifstat_inc(rcvif, ifs6_in_echoreply);
|
1999-06-28 10:36:47 +04:00
|
|
|
if (code != 0)
|
|
|
|
goto badcode;
|
|
|
|
break;
|
|
|
|
|
2003-06-06 12:13:43 +04:00
|
|
|
case MLD_LISTENER_QUERY:
|
|
|
|
case MLD_LISTENER_REPORT:
|
|
|
|
if (icmp6len < sizeof(struct mld_hdr))
|
1999-06-28 10:36:47 +04:00
|
|
|
goto badlen;
|
2003-06-06 12:13:43 +04:00
|
|
|
if (icmp6->icmp6_type == MLD_LISTENER_QUERY) /* XXX: ugly... */
|
2016-05-17 06:27:02 +03:00
|
|
|
icmp6_ifstat_inc(rcvif, ifs6_in_mldquery);
|
1999-12-13 18:17:17 +03:00
|
|
|
else
|
2016-05-17 06:27:02 +03:00
|
|
|
icmp6_ifstat_inc(rcvif, ifs6_in_mldreport);
|
2018-04-29 10:05:13 +03:00
|
|
|
if ((n = m_copypacket(m, M_DONTWAIT)) == NULL) {
|
2000-02-26 11:39:18 +03:00
|
|
|
/* give up local */
|
2006-03-06 02:47:08 +03:00
|
|
|
mld_input(m, off);
|
2000-02-26 11:39:18 +03:00
|
|
|
m = NULL;
|
|
|
|
goto freeit;
|
|
|
|
}
|
2006-03-06 02:47:08 +03:00
|
|
|
mld_input(n, off);
|
1999-06-28 10:36:47 +04:00
|
|
|
/* m stays. */
|
|
|
|
break;
|
|
|
|
|
2003-06-06 12:13:43 +04:00
|
|
|
case MLD_LISTENER_DONE:
|
2016-05-17 06:27:02 +03:00
|
|
|
icmp6_ifstat_inc(rcvif, ifs6_in_mlddone);
|
2003-06-06 12:13:43 +04:00
|
|
|
if (icmp6len < sizeof(struct mld_hdr)) /* necessary? */
|
1999-06-28 10:36:47 +04:00
|
|
|
goto badlen;
|
|
|
|
break; /* nothing to be done in kernel */
|
|
|
|
|
2003-06-06 12:13:43 +04:00
|
|
|
case MLD_MTRACE_RESP:
|
|
|
|
case MLD_MTRACE:
|
2001-12-21 11:54:52 +03:00
|
|
|
/* XXX: these two are experimental. not officially defined. */
|
1999-12-13 18:17:17 +03:00
|
|
|
/* XXX: per-interface statistics? */
|
2000-02-26 11:39:18 +03:00
|
|
|
break; /* just pass it to applications */
|
1999-12-13 18:17:17 +03:00
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
case ICMP6_WRUREQUEST: /* ICMP6_FQDN_QUERY */
|
|
|
|
{
|
|
|
|
enum { WRU, FQDN } mode;
|
|
|
|
|
2000-01-02 19:31:17 +03:00
|
|
|
if (!icmp6_nodeinfo)
|
|
|
|
break;
|
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
if (icmp6len == sizeof(struct icmp6_hdr) + 4)
|
|
|
|
mode = WRU;
|
2000-06-12 20:21:02 +04:00
|
|
|
else if (icmp6len >= sizeof(struct icmp6_nodeinfo))
|
1999-06-28 10:36:47 +04:00
|
|
|
mode = FQDN;
|
|
|
|
else
|
|
|
|
goto badlen;
|
|
|
|
|
|
|
|
if (mode == FQDN) {
|
2018-04-29 10:05:13 +03:00
|
|
|
n = m_copypacket(m, M_DONTWAIT);
|
2000-01-02 19:31:17 +03:00
|
|
|
if (n)
|
|
|
|
n = ni6_input(n, off);
|
|
|
|
} else {
|
1999-06-28 10:36:47 +04:00
|
|
|
u_char *p;
|
2006-04-15 04:24:12 +04:00
|
|
|
int maxhlen;
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2002-06-09 00:06:44 +04:00
|
|
|
if ((icmp6_nodeinfo & 5) != 5)
|
2001-10-15 15:12:44 +04:00
|
|
|
break;
|
|
|
|
|
2000-06-12 20:21:02 +04:00
|
|
|
if (code != 0)
|
|
|
|
goto badcode;
|
1999-06-28 10:36:47 +04:00
|
|
|
MGETHDR(n, M_DONTWAIT, m->m_type);
|
2006-04-15 04:24:12 +04:00
|
|
|
if (n && ICMP6_MAXLEN > MHLEN) {
|
2000-02-26 11:39:18 +03:00
|
|
|
MCLGET(n, M_DONTWAIT);
|
|
|
|
if ((n->m_flags & M_EXT) == 0) {
|
|
|
|
m_free(n);
|
|
|
|
n = NULL;
|
|
|
|
}
|
|
|
|
}
|
1999-06-28 10:36:47 +04:00
|
|
|
if (n == NULL) {
|
|
|
|
/* Give up remote */
|
|
|
|
break;
|
|
|
|
}
|
2016-06-10 16:27:10 +03:00
|
|
|
m_reset_rcvif(n);
|
2000-02-26 11:39:18 +03:00
|
|
|
n->m_len = 0;
|
2006-04-15 04:24:12 +04:00
|
|
|
maxhlen = M_TRAILINGSPACE(n) - ICMP6_MAXLEN;
|
2018-01-23 10:02:57 +03:00
|
|
|
if (maxhlen < 0) {
|
|
|
|
m_free(n);
|
2014-11-25 22:51:17 +03:00
|
|
|
break;
|
2018-01-23 10:02:57 +03:00
|
|
|
}
|
2000-02-26 11:39:18 +03:00
|
|
|
if (maxhlen > hostnamelen)
|
|
|
|
maxhlen = hostnamelen;
|
1999-06-28 10:36:47 +04:00
|
|
|
/*
|
|
|
|
* Copy IPv6 and ICMPv6 only.
|
|
|
|
*/
|
|
|
|
nip6 = mtod(n, struct ip6_hdr *);
|
2018-01-23 10:02:57 +03:00
|
|
|
memcpy(nip6, ip6, sizeof(struct ip6_hdr));
|
1999-06-28 10:36:47 +04:00
|
|
|
nicmp6 = (struct icmp6_hdr *)(nip6 + 1);
|
2018-01-23 10:02:57 +03:00
|
|
|
memcpy(nicmp6, icmp6, sizeof(struct icmp6_hdr));
|
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
p = (u_char *)(nicmp6 + 1);
|
2009-03-18 19:00:08 +03:00
|
|
|
memset(p, 0, 4);
|
2018-01-23 10:02:57 +03:00
|
|
|
memcpy(p + 4, hostname, maxhlen); /* meaningless TTL */
|
|
|
|
|
2018-12-22 17:07:53 +03:00
|
|
|
m_copy_pkthdr(n, m);
|
1999-06-28 10:36:47 +04:00
|
|
|
n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) +
|
2000-02-26 11:39:18 +03:00
|
|
|
sizeof(struct icmp6_hdr) + 4 + maxhlen;
|
1999-06-28 10:36:47 +04:00
|
|
|
nicmp6->icmp6_type = ICMP6_WRUREPLY;
|
|
|
|
nicmp6->icmp6_code = 0;
|
|
|
|
}
|
|
|
|
if (n) {
|
2008-04-15 07:57:04 +04:00
|
|
|
uint64_t *icmp6s = ICMP6_STAT_GETREF();
|
|
|
|
icmp6s[ICMP6_STAT_REFLECT]++;
|
|
|
|
icmp6s[ICMP6_STAT_OUTHIST + ICMP6_WRUREPLY]++;
|
|
|
|
ICMP6_STAT_PUTREF();
|
2018-01-23 10:02:57 +03:00
|
|
|
icmp6_reflect(n, sizeof(struct ip6_hdr));
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
case ICMP6_WRUREPLY:
|
|
|
|
if (code != 0)
|
|
|
|
goto badcode;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ND_ROUTER_SOLICIT:
|
2016-05-17 06:27:02 +03:00
|
|
|
icmp6_ifstat_inc(rcvif, ifs6_in_routersolicit);
|
2020-06-12 14:04:44 +03:00
|
|
|
/* FALLTHROUGH */
|
1999-06-28 10:36:47 +04:00
|
|
|
case ND_ROUTER_ADVERT:
|
2020-06-12 14:04:44 +03:00
|
|
|
if (icmp6->icmp6_type == ND_ROUTER_ADVERT)
|
|
|
|
icmp6_ifstat_inc(rcvif, ifs6_in_routeradvert);
|
1999-06-28 10:36:47 +04:00
|
|
|
if (code != 0)
|
|
|
|
goto badcode;
|
2020-06-12 14:04:44 +03:00
|
|
|
if ((icmp6->icmp6_type == ND_ROUTER_SOLICIT &&
|
|
|
|
icmp6len < sizeof(struct nd_router_solicit)) ||
|
|
|
|
(icmp6->icmp6_type == ND_ROUTER_ADVERT &&
|
|
|
|
icmp6len < sizeof(struct nd_router_advert)))
|
1999-06-28 10:36:47 +04:00
|
|
|
goto badlen;
|
2018-04-29 10:05:13 +03:00
|
|
|
if ((n = m_copypacket(m, M_DONTWAIT)) == NULL) {
|
2000-02-26 11:39:18 +03:00
|
|
|
/* give up local */
|
2020-06-12 14:04:44 +03:00
|
|
|
nd6_rtr_cache(m, off, icmp6len, icmp6->icmp6_type);
|
2000-02-26 11:39:18 +03:00
|
|
|
m = NULL;
|
|
|
|
goto freeit;
|
|
|
|
}
|
2020-06-12 14:04:44 +03:00
|
|
|
nd6_rtr_cache(n, off, icmp6len, icmp6->icmp6_type);
|
1999-06-28 10:36:47 +04:00
|
|
|
/* m stays. */
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ND_NEIGHBOR_SOLICIT:
|
2016-05-17 06:27:02 +03:00
|
|
|
icmp6_ifstat_inc(rcvif, ifs6_in_neighborsolicit);
|
1999-06-28 10:36:47 +04:00
|
|
|
if (code != 0)
|
|
|
|
goto badcode;
|
|
|
|
if (icmp6len < sizeof(struct nd_neighbor_solicit))
|
|
|
|
goto badlen;
|
2018-04-29 10:05:13 +03:00
|
|
|
if ((n = m_copypacket(m, M_DONTWAIT)) == NULL) {
|
2000-02-26 11:39:18 +03:00
|
|
|
/* give up local */
|
|
|
|
nd6_ns_input(m, off, icmp6len);
|
|
|
|
m = NULL;
|
|
|
|
goto freeit;
|
|
|
|
}
|
|
|
|
nd6_ns_input(n, off, icmp6len);
|
1999-06-28 10:36:47 +04:00
|
|
|
/* m stays. */
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ND_NEIGHBOR_ADVERT:
|
2016-05-17 06:27:02 +03:00
|
|
|
icmp6_ifstat_inc(rcvif, ifs6_in_neighboradvert);
|
1999-06-28 10:36:47 +04:00
|
|
|
if (code != 0)
|
|
|
|
goto badcode;
|
|
|
|
if (icmp6len < sizeof(struct nd_neighbor_advert))
|
|
|
|
goto badlen;
|
2018-04-29 10:05:13 +03:00
|
|
|
if ((n = m_copypacket(m, M_DONTWAIT)) == NULL) {
|
2000-02-26 11:39:18 +03:00
|
|
|
/* give up local */
|
|
|
|
nd6_na_input(m, off, icmp6len);
|
|
|
|
m = NULL;
|
|
|
|
goto freeit;
|
|
|
|
}
|
|
|
|
nd6_na_input(n, off, icmp6len);
|
1999-06-28 10:36:47 +04:00
|
|
|
/* m stays. */
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ND_REDIRECT:
|
2016-05-17 06:27:02 +03:00
|
|
|
icmp6_ifstat_inc(rcvif, ifs6_in_redirect);
|
1999-06-28 10:36:47 +04:00
|
|
|
if (code != 0)
|
|
|
|
goto badcode;
|
|
|
|
if (icmp6len < sizeof(struct nd_redirect))
|
|
|
|
goto badlen;
|
2018-04-29 10:05:13 +03:00
|
|
|
if ((n = m_copypacket(m, M_DONTWAIT)) == NULL) {
|
2000-02-26 11:39:18 +03:00
|
|
|
/* give up local */
|
|
|
|
icmp6_redirect_input(m, off);
|
|
|
|
m = NULL;
|
|
|
|
goto freeit;
|
|
|
|
}
|
|
|
|
icmp6_redirect_input(n, off);
|
1999-06-28 10:36:47 +04:00
|
|
|
/* m stays. */
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ICMP6_ROUTER_RENUMBERING:
|
|
|
|
if (code != ICMP6_ROUTER_RENUMBERING_COMMAND &&
|
|
|
|
code != ICMP6_ROUTER_RENUMBERING_RESULT)
|
|
|
|
goto badcode;
|
|
|
|
if (icmp6len < sizeof(struct icmp6_router_renum))
|
|
|
|
goto badlen;
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
2017-01-16 10:33:36 +03:00
|
|
|
nd6log(LOG_DEBUG,
|
|
|
|
"unknown type %d(src=%s, dst=%s, ifid=%d)\n",
|
|
|
|
icmp6->icmp6_type,
|
2017-01-16 18:44:46 +03:00
|
|
|
IN6_PRINT(ip6buf, &ip6->ip6_src),
|
|
|
|
IN6_PRINT(ip6buf2, &ip6->ip6_dst),
|
2016-05-17 06:27:02 +03:00
|
|
|
rcvif ? rcvif->if_index : 0);
|
1999-06-28 10:36:47 +04:00
|
|
|
if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST) {
|
|
|
|
/* ICMPv6 error: MUST deliver it by spec... */
|
|
|
|
code = PRC_NCMDS;
|
|
|
|
/* deliver */
|
|
|
|
} else {
|
|
|
|
/* ICMPv6 informational: MUST not deliver */
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
deliver:
|
2001-02-11 09:49:49 +03:00
|
|
|
if (icmp6_notify_error(m, off, icmp6len, code)) {
|
|
|
|
/* In this case, m should've been freed. */
|
2016-06-10 16:31:43 +03:00
|
|
|
m_put_rcvif_psref(rcvif, &psref);
|
2017-02-02 05:52:10 +03:00
|
|
|
return;
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
2001-02-11 09:49:49 +03:00
|
|
|
break;
|
|
|
|
|
|
|
|
badcode:
|
2008-04-15 07:57:04 +04:00
|
|
|
ICMP6_STATINC(ICMP6_STAT_BADCODE);
|
2001-02-11 09:49:49 +03:00
|
|
|
break;
|
|
|
|
|
|
|
|
badlen:
|
2008-04-15 07:57:04 +04:00
|
|
|
ICMP6_STATINC(ICMP6_STAT_BADLEN);
|
2001-02-11 09:49:49 +03:00
|
|
|
break;
|
|
|
|
}
|
2016-06-10 16:31:43 +03:00
|
|
|
m_put_rcvif_psref(rcvif, &psref);
|
2001-02-11 09:49:49 +03:00
|
|
|
|
|
|
|
/* deliver the packet to appropriate sockets */
|
2017-01-13 13:38:37 +03:00
|
|
|
icmp6_rip6_input(&m, off);
|
2001-02-11 09:49:49 +03:00
|
|
|
|
2017-02-02 05:52:10 +03:00
|
|
|
return;
|
2001-02-11 09:49:49 +03:00
|
|
|
|
2018-01-23 13:55:38 +03:00
|
|
|
freeit:
|
2016-06-10 16:31:43 +03:00
|
|
|
m_put_rcvif_psref(rcvif, &psref);
|
2001-02-11 09:49:49 +03:00
|
|
|
m_freem(m);
|
2017-02-02 05:52:10 +03:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
icmp6_input(struct mbuf **mp, int *offp, int proto)
|
|
|
|
{
|
|
|
|
|
|
|
|
wqinput_input(icmp6_wqinput, *mp, *offp, proto);
|
|
|
|
|
2001-02-11 09:49:49 +03:00
|
|
|
return IPPROTO_DONE;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2007-05-23 21:14:59 +04:00
|
|
|
icmp6_notify_error(struct mbuf *m, int off, int icmp6len, int code)
|
2001-02-11 09:49:49 +03:00
|
|
|
{
|
|
|
|
struct icmp6_hdr *icmp6;
|
|
|
|
struct ip6_hdr *eip6;
|
|
|
|
u_int32_t notifymtu;
|
|
|
|
struct sockaddr_in6 icmp6src, icmp6dst;
|
|
|
|
|
|
|
|
if (icmp6len < sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr)) {
|
2008-04-15 07:57:04 +04:00
|
|
|
ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
|
2001-02-11 09:49:49 +03:00
|
|
|
goto freeit;
|
|
|
|
}
|
|
|
|
IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off,
|
2018-01-23 13:55:38 +03:00
|
|
|
sizeof(*icmp6) + sizeof(struct ip6_hdr));
|
2001-02-11 09:49:49 +03:00
|
|
|
if (icmp6 == NULL) {
|
2008-04-15 07:57:04 +04:00
|
|
|
ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
|
2002-09-11 06:46:42 +04:00
|
|
|
return (-1);
|
2001-02-11 09:49:49 +03:00
|
|
|
}
|
|
|
|
eip6 = (struct ip6_hdr *)(icmp6 + 1);
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2001-02-11 09:49:49 +03:00
|
|
|
/* Detect the upper level protocol */
|
|
|
|
{
|
2019-10-06 05:30:58 +03:00
|
|
|
void *(*ctlfunc)(int, const struct sockaddr *, void *);
|
1999-06-28 10:36:47 +04:00
|
|
|
u_int8_t nxt = eip6->ip6_nxt;
|
|
|
|
int eoff = off + sizeof(struct icmp6_hdr) +
|
|
|
|
sizeof(struct ip6_hdr);
|
1999-12-13 18:17:17 +03:00
|
|
|
struct ip6ctlparam ip6cp;
|
2000-02-26 11:39:18 +03:00
|
|
|
struct in6_addr *finaldst = NULL;
|
2001-02-11 09:49:49 +03:00
|
|
|
int icmp6type = icmp6->icmp6_type;
|
2000-02-26 11:39:18 +03:00
|
|
|
struct ip6_frag *fh;
|
|
|
|
struct ip6_rthdr *rth;
|
2016-06-10 16:31:43 +03:00
|
|
|
struct ifnet *rcvif;
|
|
|
|
int s;
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2001-10-15 15:12:44 +04:00
|
|
|
while (1) { /* XXX: should avoid infinite loop explicitly? */
|
1999-06-28 10:36:47 +04:00
|
|
|
struct ip6_ext *eh;
|
|
|
|
|
2001-02-10 07:14:26 +03:00
|
|
|
switch (nxt) {
|
1999-06-28 10:36:47 +04:00
|
|
|
case IPPROTO_HOPOPTS:
|
|
|
|
case IPPROTO_DSTOPTS:
|
|
|
|
case IPPROTO_AH:
|
1999-12-13 18:17:17 +03:00
|
|
|
IP6_EXTHDR_GET(eh, struct ip6_ext *, m,
|
2018-04-14 17:59:58 +03:00
|
|
|
eoff, sizeof(*eh));
|
1999-12-13 18:17:17 +03:00
|
|
|
if (eh == NULL) {
|
2008-04-15 07:57:04 +04:00
|
|
|
ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
|
2002-09-11 06:46:42 +04:00
|
|
|
return (-1);
|
1999-12-13 18:17:17 +03:00
|
|
|
}
|
2002-06-09 18:43:10 +04:00
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
if (nxt == IPPROTO_AH)
|
|
|
|
eoff += (eh->ip6e_len + 2) << 2;
|
|
|
|
else
|
|
|
|
eoff += (eh->ip6e_len + 1) << 3;
|
|
|
|
nxt = eh->ip6e_nxt;
|
|
|
|
break;
|
2000-02-26 11:39:18 +03:00
|
|
|
case IPPROTO_ROUTING:
|
2018-04-14 11:03:33 +03:00
|
|
|
/* Ignore the option. */
|
2000-02-26 11:39:18 +03:00
|
|
|
IP6_EXTHDR_GET(rth, struct ip6_rthdr *, m,
|
2018-04-14 17:59:58 +03:00
|
|
|
eoff, sizeof(*rth));
|
2000-02-26 11:39:18 +03:00
|
|
|
if (rth == NULL) {
|
2008-04-15 07:57:04 +04:00
|
|
|
ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
|
2002-09-11 06:46:42 +04:00
|
|
|
return (-1);
|
2000-02-26 11:39:18 +03:00
|
|
|
}
|
2018-04-14 11:03:33 +03:00
|
|
|
|
|
|
|
eoff += (rth->ip6r_len + 1) << 3;
|
2000-02-26 11:39:18 +03:00
|
|
|
nxt = rth->ip6r_nxt;
|
|
|
|
break;
|
|
|
|
case IPPROTO_FRAGMENT:
|
|
|
|
IP6_EXTHDR_GET(fh, struct ip6_frag *, m,
|
2018-04-14 17:59:58 +03:00
|
|
|
eoff, sizeof(*fh));
|
2000-02-26 11:39:18 +03:00
|
|
|
if (fh == NULL) {
|
2008-04-15 07:57:04 +04:00
|
|
|
ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
|
2002-09-11 06:46:42 +04:00
|
|
|
return (-1);
|
2000-02-26 11:39:18 +03:00
|
|
|
}
|
|
|
|
/*
|
|
|
|
* Data after a fragment header is meaningless
|
|
|
|
* unless it is the first fragment, but
|
|
|
|
* we'll go to the notify label for path MTU
|
|
|
|
* discovery.
|
|
|
|
*/
|
|
|
|
if (fh->ip6f_offlg & IP6F_OFF_MASK)
|
|
|
|
goto notify;
|
|
|
|
|
|
|
|
eoff += sizeof(struct ip6_frag);
|
|
|
|
nxt = fh->ip6f_nxt;
|
|
|
|
break;
|
1999-06-28 10:36:47 +04:00
|
|
|
default:
|
2000-02-26 11:39:18 +03:00
|
|
|
/*
|
|
|
|
* This case includes ESP and the No Next
|
2001-10-15 15:12:44 +04:00
|
|
|
* Header. In such cases going to the notify
|
2000-02-26 11:39:18 +03:00
|
|
|
* label does not have any meaning
|
|
|
|
* (i.e. ctlfunc will be NULL), but we go
|
|
|
|
* anyway since we might have to update
|
|
|
|
* path MTU information.
|
|
|
|
*/
|
1999-06-28 10:36:47 +04:00
|
|
|
goto notify;
|
|
|
|
}
|
|
|
|
}
|
2001-02-11 09:49:49 +03:00
|
|
|
notify:
|
1999-12-13 18:17:17 +03:00
|
|
|
IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off,
|
2018-04-14 17:59:58 +03:00
|
|
|
sizeof(*icmp6) + sizeof(struct ip6_hdr));
|
1999-12-13 18:17:17 +03:00
|
|
|
if (icmp6 == NULL) {
|
2008-04-15 07:57:04 +04:00
|
|
|
ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
|
2002-09-11 06:46:42 +04:00
|
|
|
return (-1);
|
2001-02-11 09:49:49 +03:00
|
|
|
}
|
|
|
|
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
/*
|
|
|
|
* retrieve parameters from the inner IPv6 header, and convert
|
|
|
|
* them into sockaddr structures.
|
|
|
|
* XXX: there is no guarantee that the source or destination
|
|
|
|
* addresses of the inner packet are in the same scope zone as
|
|
|
|
* the addresses of the icmp packet. But there is no other
|
|
|
|
* way to determine the zone.
|
|
|
|
*/
|
2001-02-11 09:49:49 +03:00
|
|
|
eip6 = (struct ip6_hdr *)(icmp6 + 1);
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
|
2016-06-10 16:31:43 +03:00
|
|
|
rcvif = m_get_rcvif(m, &s);
|
2017-02-07 05:38:08 +03:00
|
|
|
if (__predict_false(rcvif == NULL))
|
|
|
|
goto freeit;
|
2007-10-24 10:37:20 +04:00
|
|
|
sockaddr_in6_init(&icmp6dst,
|
|
|
|
(finaldst == NULL) ? &eip6->ip6_dst : finaldst, 0, 0, 0);
|
2016-06-10 16:31:43 +03:00
|
|
|
if (in6_setscope(&icmp6dst.sin6_addr, rcvif, NULL)) {
|
|
|
|
m_put_rcvif(rcvif, &s);
|
2001-02-11 09:49:49 +03:00
|
|
|
goto freeit;
|
2016-06-10 16:31:43 +03:00
|
|
|
}
|
2007-10-24 10:37:20 +04:00
|
|
|
sockaddr_in6_init(&icmp6src, &eip6->ip6_src, 0, 0, 0);
|
2016-06-10 16:31:43 +03:00
|
|
|
if (in6_setscope(&icmp6src.sin6_addr, rcvif, NULL)) {
|
|
|
|
m_put_rcvif(rcvif, &s);
|
2001-02-11 09:49:49 +03:00
|
|
|
goto freeit;
|
2016-06-10 16:31:43 +03:00
|
|
|
}
|
|
|
|
m_put_rcvif(rcvif, &s);
|
|
|
|
|
2001-02-11 09:49:49 +03:00
|
|
|
icmp6src.sin6_flowinfo =
|
|
|
|
(eip6->ip6_flow & IPV6_FLOWLABEL_MASK);
|
|
|
|
|
2000-10-19 01:14:12 +04:00
|
|
|
if (finaldst == NULL)
|
2001-02-11 09:49:49 +03:00
|
|
|
finaldst = &eip6->ip6_dst;
|
2000-10-19 01:14:12 +04:00
|
|
|
ip6cp.ip6c_m = m;
|
|
|
|
ip6cp.ip6c_icmp6 = icmp6;
|
|
|
|
ip6cp.ip6c_ip6 = (struct ip6_hdr *)(icmp6 + 1);
|
|
|
|
ip6cp.ip6c_off = eoff;
|
|
|
|
ip6cp.ip6c_finaldst = finaldst;
|
2001-02-11 09:49:49 +03:00
|
|
|
ip6cp.ip6c_src = &icmp6src;
|
|
|
|
ip6cp.ip6c_nxt = nxt;
|
|
|
|
|
|
|
|
if (icmp6type == ICMP6_PACKET_TOO_BIG) {
|
|
|
|
notifymtu = ntohl(icmp6->icmp6_mtu);
|
|
|
|
ip6cp.ip6c_cmdarg = (void *)¬ifymtu;
|
|
|
|
}
|
2000-02-26 11:39:18 +03:00
|
|
|
|
2019-10-06 05:30:58 +03:00
|
|
|
ctlfunc = inet6sw[ip6_protox[nxt]].pr_ctlinput;
|
1999-12-13 18:17:17 +03:00
|
|
|
if (ctlfunc) {
|
2018-01-23 13:55:38 +03:00
|
|
|
(void)(*ctlfunc)(code, sin6tosa(&icmp6dst), &ip6cp);
|
1999-12-13 18:17:17 +03:00
|
|
|
}
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
2002-09-11 06:46:42 +04:00
|
|
|
return (0);
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2018-01-23 13:55:38 +03:00
|
|
|
freeit:
|
1999-06-28 10:36:47 +04:00
|
|
|
m_freem(m);
|
2002-09-11 06:46:42 +04:00
|
|
|
return (-1);
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
|
2000-10-19 01:14:12 +04:00
|
|
|
void
|
2007-05-23 21:14:59 +04:00
|
|
|
icmp6_mtudisc_update(struct ip6ctlparam *ip6cp, int validated)
|
2000-02-26 11:39:18 +03:00
|
|
|
{
|
2000-12-09 04:29:45 +03:00
|
|
|
unsigned long rtcount;
|
2000-10-19 01:14:12 +04:00
|
|
|
struct icmp6_mtudisc_callback *mc;
|
|
|
|
struct in6_addr *dst = ip6cp->ip6c_finaldst;
|
|
|
|
struct icmp6_hdr *icmp6 = ip6cp->ip6c_icmp6;
|
|
|
|
struct mbuf *m = ip6cp->ip6c_m; /* will be necessary for scope issue */
|
2000-02-26 11:39:18 +03:00
|
|
|
u_int mtu = ntohl(icmp6->icmp6_mtu);
|
|
|
|
struct rtentry *rt = NULL;
|
|
|
|
struct sockaddr_in6 sin6;
|
2016-06-10 16:31:43 +03:00
|
|
|
struct ifnet *rcvif;
|
|
|
|
int s;
|
2000-02-26 11:39:18 +03:00
|
|
|
|
2008-10-03 12:23:06 +04:00
|
|
|
/*
|
|
|
|
* The MTU should not be less than the minimal IPv6 MTU except for the
|
|
|
|
* hack in ip6_output/ip6_setpmtu where we always include a frag header.
|
2018-01-23 13:55:38 +03:00
|
|
|
* In that one case, the MTU might be less than 1280.
|
2008-10-03 12:23:06 +04:00
|
|
|
*/
|
|
|
|
if (__predict_false(mtu < IPV6_MMTU - sizeof(struct ip6_frag))) {
|
|
|
|
/* is the mtu even sane? */
|
|
|
|
if (mtu < sizeof(struct ip6_hdr) + sizeof(struct ip6_frag) + 8)
|
|
|
|
return;
|
|
|
|
if (!validated)
|
|
|
|
return;
|
|
|
|
mtu = IPV6_MMTU - sizeof(struct ip6_frag);
|
|
|
|
}
|
|
|
|
|
2000-12-09 04:29:45 +03:00
|
|
|
/*
|
|
|
|
* allow non-validated cases if memory is plenty, to make traffic
|
|
|
|
* from non-connected pcb happy.
|
|
|
|
*/
|
2017-02-13 10:18:20 +03:00
|
|
|
mutex_enter(&icmp6_mtx);
|
2000-12-09 04:29:45 +03:00
|
|
|
rtcount = rt_timer_count(icmp6_mtudisc_timeout_q);
|
|
|
|
if (validated) {
|
2017-02-13 10:18:20 +03:00
|
|
|
if (0 <= icmp6_mtudisc_hiwat && rtcount > icmp6_mtudisc_hiwat) {
|
|
|
|
mutex_exit(&icmp6_mtx);
|
2000-12-09 04:29:45 +03:00
|
|
|
return;
|
2018-01-23 10:02:57 +03:00
|
|
|
} else if (0 <= icmp6_mtudisc_lowat &&
|
2001-02-08 19:07:39 +03:00
|
|
|
rtcount > icmp6_mtudisc_lowat) {
|
2000-12-09 04:29:45 +03:00
|
|
|
/*
|
|
|
|
* XXX nuke a victim, install the new one.
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
} else {
|
2017-02-13 10:18:20 +03:00
|
|
|
if (0 <= icmp6_mtudisc_lowat && rtcount > icmp6_mtudisc_lowat) {
|
|
|
|
mutex_exit(&icmp6_mtx);
|
2000-12-09 04:29:45 +03:00
|
|
|
return;
|
2017-02-13 10:18:20 +03:00
|
|
|
}
|
2000-12-09 04:29:45 +03:00
|
|
|
}
|
2017-02-13 10:18:20 +03:00
|
|
|
mutex_exit(&icmp6_mtx);
|
2000-12-09 04:29:45 +03:00
|
|
|
|
2009-03-18 19:00:08 +03:00
|
|
|
memset(&sin6, 0, sizeof(sin6));
|
2000-02-26 11:39:18 +03:00
|
|
|
sin6.sin6_family = PF_INET6;
|
|
|
|
sin6.sin6_len = sizeof(struct sockaddr_in6);
|
|
|
|
sin6.sin6_addr = *dst;
|
2016-06-10 16:31:43 +03:00
|
|
|
rcvif = m_get_rcvif(m, &s);
|
2017-02-07 05:38:08 +03:00
|
|
|
if (__predict_false(rcvif == NULL))
|
|
|
|
return;
|
2016-06-10 16:31:43 +03:00
|
|
|
if (in6_setscope(&sin6.sin6_addr, rcvif, NULL)) {
|
|
|
|
m_put_rcvif(rcvif, &s);
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
return;
|
2016-06-10 16:31:43 +03:00
|
|
|
}
|
|
|
|
m_put_rcvif(rcvif, &s);
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
|
2016-07-15 10:40:09 +03:00
|
|
|
rt = icmp6_mtudisc_clone(sin6tosa(&sin6));
|
2000-02-26 11:39:18 +03:00
|
|
|
|
2002-05-31 08:26:19 +04:00
|
|
|
if (rt && (rt->rt_flags & RTF_HOST) &&
|
|
|
|
!(rt->rt_rmx.rmx_locks & RTV_MTU) &&
|
|
|
|
(rt->rt_rmx.rmx_mtu > mtu || rt->rt_rmx.rmx_mtu == 0)) {
|
2020-06-12 14:04:44 +03:00
|
|
|
if (mtu < rt->rt_ifp->if_mtu) {
|
2008-04-15 07:57:04 +04:00
|
|
|
ICMP6_STATINC(ICMP6_STAT_PMTUCHG);
|
2000-02-26 11:39:18 +03:00
|
|
|
rt->rt_rmx.rmx_mtu = mtu;
|
|
|
|
}
|
|
|
|
}
|
2014-06-06 05:02:47 +04:00
|
|
|
if (rt) {
|
Make the routing table and rtcaches MP-safe
See the following descriptions for details.
Proposed on tech-kern and tech-net
Overview
--------
We protect the routing table with a rwock and protect
rtcaches with another rwlock. Each rtentry is protected
from being freed or updated via reference counting and psref.
Global rwlocks
--------------
There are two rwlocks; one for the routing table (rt_lock) and
the other for rtcaches (rtcache_lock). rtcache_lock covers
all existing rtcaches; there may have room for optimizations
(future work).
The locking order is rtcache_lock first and rt_lock is next.
rtentry references
------------------
References to an rtentry is managed with reference counting
and psref. Either of the two mechanisms is used depending on
where a rtentry is obtained. Reference counting is used when
we obtain a rtentry from the routing table directly via
rtalloc1 and rtrequest{,1} while psref is used when we obtain
a rtentry from a rtcache via rtcache_* APIs. In both cases,
a caller can sleep/block with holding an obtained rtentry.
The reasons why we use two different mechanisms are (i) only
using reference counting hurts the performance due to atomic
instructions (rtcache case) (ii) ease of implementation;
applying psref to APIs such rtaloc1 and rtrequest{,1} requires
additional works (adding a local variable and an argument).
We will finally migrate to use only psref but we can do it
when we have a lockless routing table alternative.
Reference counting for rtentry
------------------------------
rt_refcnt now doesn't count permanent references such as for
rt_timers and rtcaches, instead it is used only for temporal
references when obtaining a rtentry via rtalloc1 and rtrequest{,1}.
We can do so because destroying a rtentry always involves
removing references of rt_timers and rtcaches to the rtentry
and we don't need to track such references. This also makes
it easy to wait for readers to release references on deleting
or updating a rtentry, i.e., we can simply wait until the
reference counter is 0 or 1. (If there are permanent references
the counter can be arbitrary.)
rt_ref increments a reference counter of a rtentry and rt_unref
decrements it. rt_ref is called inside APIs (rtalloc1 and
rtrequest{,1} so users don't need to care about it while
users must call rt_unref to an obtained rtentry after using it.
rtfree is removed and we use rt_unref and rt_free instead.
rt_unref now just decrements the counter of a given rtentry
and rt_free just tries to destroy a given rtentry.
See the next section for destructions of rtentries by rt_free.
Destructions of rtentries
-------------------------
We destroy a rtentry only when we call rtrequst{,1}(RTM_DELETE);
the original implementation can destroy in any rtfree where it's
the last reference. If we use reference counting or psref, it's
easy to understand if the place that a rtentry is destroyed is
fixed.
rt_free waits for references to a given rtentry to be released
before actually destroying the rtentry. rt_free uses a condition
variable (cv_wait) (and psref_target_destroy for psref) to wait.
Unfortunately rtrequst{,1}(RTM_DELETE) can be called in softint
that we cannot use cv_wait. In that case, we have to defer the
destruction to a workqueue.
rtentry#rt_cv, rtentry#rt_psref and global variables
(see rt_free_global) are added to conduct the procedure.
Updates of rtentries
--------------------
One difficulty to use refcnt/psref instead of rwlock for rtentry
is updates of rtentries. We need an additional mechanism to
prevent readers from seeing inconsistency of a rtentry being
updated.
We introduce RTF_UPDATING flag to rtentries that are updating.
While the flag is set to a rtentry, users cannot acquire the
rtentry. By doing so, we avoid users to see inconsistent
rtentries.
There are two options when a user tries to acquire a rtentry
with the RTF_UPDATING flag; if a user runs in softint context
the user fails to acquire a rtentry (NULL is returned).
Otherwise a user waits until the update completes by waiting
on cv.
The procedure of a updater is simpler to destruction of
a rtentry. Wait on cv (and psref) and after all readers left,
proceed with the update.
Global variables (see rt_update_global) are added to conduct
the procedure.
Currently we apply the mechanism to only RTM_CHANGE in
rtsock.c. We would have to apply other codes. See
"Known issues" section.
psref for rtentry
-----------------
When we obtain a rtentry from a rtcache via rtcache_* APIs,
psref is used to reference to the rtentry.
rtcache_ref acquires a reference to a rtentry with psref
and rtcache_unref releases the reference after using it.
rtcache_ref is called inside rtcache_* APIs and users don't
need to take care of it while users must call rtcache_unref
to release the reference.
struct psref and int bound that is needed for psref is
embedded into struct route. By doing so we don't need to
add local variables and additional argument to APIs.
However this adds another constraint to psref other than
reference counting one's; holding a reference of an rtentry
via a rtcache is allowed by just one caller at the same time.
So we must not acquire a rtentry via a rtcache twice and
avoid a recursive use of a rtcache. And also a rtcache must
be arranged to be used by a LWP/softint at the same time
somehow. For IP forwarding case, we have per-CPU rtcaches
used in softint so the constraint is guaranteed. For a h
rtcache of a PCB case, the constraint is guaranteed by the
solock of each PCB. Any other cases (pf, ipf, stf and ipsec)
are currently guaranteed by only the existence of the global
locks (softnet_lock and/or KERNEL_LOCK). If we've found the
cases that we cannot guarantee the constraint, we would need
to introduce other rtcache APIs that use simple reference
counting.
psref of rtcache is created with IPL_SOFTNET and so rtcache
shouldn't used at an IPL higher than IPL_SOFTNET.
Note that rtcache_free is used to invalidate a given rtcache.
We don't need another care by my change; just keep them as
they are.
Performance impact
------------------
When NET_MPSAFE is disabled the performance drop is 3% while
when it's enabled the drop is increased to 11%. The difference
comes from that currently we don't take any global locks and
don't use psref if NET_MPSAFE is disabled.
We can optimize the performance of the case of NET_MPSAFE
on by reducing lookups of rtcache that uses psref;
currently we do two lookups but we should be able to trim
one of two. This is a future work.
Known issues
------------
There are two known issues to be solved; one is that
a caller of rtrequest(RTM_ADD) may change rtentry (see rtinit).
We need to prevent new references during the update. Or
we may be able to remove the code (perhaps, need more
investigations).
The other is rtredirect that updates a rtentry. We need
to apply our update mechanism, however it's not easy because
rtredirect is called in softint and we cannot apply our
mechanism simply. One solution is to defer rtredirect to
a workqueue but it requires some code restructuring.
2016-12-12 06:55:57 +03:00
|
|
|
rt_unref(rt);
|
2001-10-15 15:12:44 +04:00
|
|
|
}
|
2000-10-19 01:14:12 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Notify protocols that the MTU for this destination
|
|
|
|
* has changed.
|
|
|
|
*/
|
2017-02-13 10:18:20 +03:00
|
|
|
mutex_enter(&icmp6_mtx);
|
2000-10-19 01:14:12 +04:00
|
|
|
for (mc = LIST_FIRST(&icmp6_mtudisc_callbacks); mc != NULL;
|
|
|
|
mc = LIST_NEXT(mc, mc_list))
|
|
|
|
(*mc->mc_func)(&sin6.sin6_addr);
|
2017-02-13 10:18:20 +03:00
|
|
|
mutex_exit(&icmp6_mtx);
|
2000-02-26 11:39:18 +03:00
|
|
|
}
|
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
/*
|
2000-08-03 18:31:04 +04:00
|
|
|
* Process a Node Information Query packet, based on
|
2000-11-11 03:46:36 +03:00
|
|
|
* draft-ietf-ipngwg-icmp-name-lookups-07.
|
2002-06-09 00:06:44 +04:00
|
|
|
*
|
2000-06-12 20:21:02 +04:00
|
|
|
* Spec incompatibilities:
|
|
|
|
* - IPv6 Subject address handling
|
|
|
|
* - IPv4 Subject address handling support missing
|
|
|
|
* - Proxy reply (answer even if it's not for me)
|
|
|
|
* - joins NI group address at in6_ifattach() time only, does not cope
|
|
|
|
* with hostname changes by sethostname(3)
|
1999-06-28 10:36:47 +04:00
|
|
|
*/
|
|
|
|
static struct mbuf *
|
2007-05-23 21:14:59 +04:00
|
|
|
ni6_input(struct mbuf *m, int off)
|
1999-06-28 10:36:47 +04:00
|
|
|
{
|
1999-12-13 18:17:17 +03:00
|
|
|
struct icmp6_nodeinfo *ni6, *nni6;
|
1999-06-28 10:36:47 +04:00
|
|
|
struct mbuf *n = NULL;
|
1999-12-13 18:17:17 +03:00
|
|
|
u_int16_t qtype;
|
2000-06-12 20:21:02 +04:00
|
|
|
int subjlen;
|
1999-06-28 10:36:47 +04:00
|
|
|
int replylen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo);
|
|
|
|
struct ni_reply_fqdn *fqdn;
|
|
|
|
int addrs; /* for NI_QTYPE_NODEADDR */
|
|
|
|
struct ifnet *ifp = NULL; /* for NI_QTYPE_NODEADDR */
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
struct sockaddr_in6 sin6; /* ip6_dst */
|
|
|
|
struct in6_addr in6_subj; /* subject address */
|
2000-06-12 20:21:02 +04:00
|
|
|
struct ip6_hdr *ip6;
|
|
|
|
int oldfqdn = 0; /* if 1, return pascal string (03 draft) */
|
2000-11-11 03:46:36 +03:00
|
|
|
char *subj = NULL;
|
2016-06-10 16:31:43 +03:00
|
|
|
struct ifnet *rcvif;
|
2016-08-01 06:15:30 +03:00
|
|
|
int s, ss;
|
|
|
|
struct ifaddr *ifa;
|
|
|
|
struct psref psref;
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2000-06-12 20:21:02 +04:00
|
|
|
ip6 = mtod(m, struct ip6_hdr *);
|
2000-01-02 19:31:17 +03:00
|
|
|
IP6_EXTHDR_GET(ni6, struct icmp6_nodeinfo *, m, off, sizeof(*ni6));
|
|
|
|
if (ni6 == NULL) {
|
|
|
|
/* m is already reclaimed */
|
1999-12-13 18:17:17 +03:00
|
|
|
return NULL;
|
2000-01-02 19:31:17 +03:00
|
|
|
}
|
2018-01-23 13:55:38 +03:00
|
|
|
KASSERT((m->m_flags & M_PKTHDR) != 0);
|
2000-06-12 20:21:02 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Validate IPv6 destination address.
|
|
|
|
*
|
2000-11-11 03:46:36 +03:00
|
|
|
* The Responder must discard the Query without further processing
|
|
|
|
* unless it is one of the Responder's unicast or anycast addresses, or
|
|
|
|
* a link-local scope multicast address which the Responder has joined.
|
|
|
|
* [icmp-name-lookups-07, Section 4.]
|
2000-06-12 20:21:02 +04:00
|
|
|
*/
|
2007-10-24 10:37:20 +04:00
|
|
|
sockaddr_in6_init(&sin6, &ip6->ip6_dst, 0, 0, 0);
|
2000-06-12 20:21:02 +04:00
|
|
|
/* XXX scopeid */
|
2016-08-01 06:15:30 +03:00
|
|
|
ss = pserialize_read_enter();
|
|
|
|
ifa = ifa_ifwithaddr(sin6tosa(&sin6));
|
2018-04-14 17:59:58 +03:00
|
|
|
if (ifa != NULL) {
|
2000-11-11 03:46:36 +03:00
|
|
|
; /* unicast/anycast, fine */
|
2018-04-14 17:59:58 +03:00
|
|
|
} else if (IN6_IS_ADDR_MC_LINKLOCAL(&sin6.sin6_addr)) {
|
2000-11-11 03:46:36 +03:00
|
|
|
; /* link-local multicast, fine */
|
2018-04-14 17:59:58 +03:00
|
|
|
} else {
|
2016-08-01 06:15:30 +03:00
|
|
|
pserialize_read_exit(ss);
|
2000-06-12 20:21:02 +04:00
|
|
|
goto bad;
|
2016-08-01 06:15:30 +03:00
|
|
|
}
|
|
|
|
pserialize_read_exit(ss);
|
2000-06-12 20:21:02 +04:00
|
|
|
|
|
|
|
/* validate query Subject field. */
|
2000-11-11 03:46:36 +03:00
|
|
|
qtype = ntohs(ni6->ni_qtype);
|
2000-06-12 20:21:02 +04:00
|
|
|
subjlen = m->m_pkthdr.len - off - sizeof(struct icmp6_nodeinfo);
|
|
|
|
switch (qtype) {
|
|
|
|
case NI_QTYPE_NOOP:
|
|
|
|
case NI_QTYPE_SUPTYPES:
|
2000-11-11 03:46:36 +03:00
|
|
|
/* 07 draft */
|
2000-08-03 18:31:04 +04:00
|
|
|
if (ni6->ni_code == ICMP6_NI_SUBJ_FQDN && subjlen == 0)
|
|
|
|
break;
|
2001-10-15 15:12:44 +04:00
|
|
|
/* FALLTHROUGH */
|
2000-06-12 20:21:02 +04:00
|
|
|
case NI_QTYPE_FQDN:
|
|
|
|
case NI_QTYPE_NODEADDR:
|
2006-03-06 02:47:08 +03:00
|
|
|
case NI_QTYPE_IPV4ADDR:
|
2000-06-12 20:21:02 +04:00
|
|
|
switch (ni6->ni_code) {
|
|
|
|
case ICMP6_NI_SUBJ_IPV6:
|
|
|
|
#if ICMP6_NI_SUBJ_IPV6 != 0
|
|
|
|
case 0:
|
|
|
|
#endif
|
|
|
|
/*
|
|
|
|
* backward compatibility - try to accept 03 draft
|
|
|
|
* format, where no Subject is present.
|
|
|
|
*/
|
2000-08-03 18:31:04 +04:00
|
|
|
if (qtype == NI_QTYPE_FQDN && ni6->ni_code == 0 &&
|
|
|
|
subjlen == 0) {
|
2000-06-12 20:21:02 +04:00
|
|
|
oldfqdn++;
|
|
|
|
break;
|
|
|
|
}
|
2000-08-03 18:31:04 +04:00
|
|
|
#if ICMP6_NI_SUBJ_IPV6 != 0
|
|
|
|
if (ni6->ni_code != ICMP6_NI_SUBJ_IPV6)
|
|
|
|
goto bad;
|
|
|
|
#endif
|
2000-06-12 20:21:02 +04:00
|
|
|
|
|
|
|
if (subjlen != sizeof(sin6.sin6_addr))
|
|
|
|
goto bad;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Validate Subject address.
|
|
|
|
*
|
2001-02-07 11:59:47 +03:00
|
|
|
* Not sure what exactly "address belongs to the node"
|
|
|
|
* means in the spec, is it just unicast, or what?
|
2000-06-12 20:21:02 +04:00
|
|
|
*
|
|
|
|
* At this moment we consider Subject address as
|
|
|
|
* "belong to the node" if the Subject address equals
|
|
|
|
* to the IPv6 destination address; validation for
|
|
|
|
* IPv6 destination address should have done enough
|
|
|
|
* check for us.
|
|
|
|
*
|
|
|
|
* We do not do proxy at this moment.
|
|
|
|
*/
|
|
|
|
/* m_pulldown instead of copy? */
|
|
|
|
m_copydata(m, off + sizeof(struct icmp6_nodeinfo),
|
2007-03-04 08:59:00 +03:00
|
|
|
subjlen, (void *)&in6_subj);
|
2016-06-10 16:31:43 +03:00
|
|
|
rcvif = m_get_rcvif(m, &s);
|
2017-02-07 05:38:08 +03:00
|
|
|
if (__predict_false(rcvif == NULL))
|
|
|
|
goto bad;
|
2016-06-10 16:31:43 +03:00
|
|
|
if (in6_setscope(&in6_subj, rcvif, NULL)) {
|
|
|
|
m_put_rcvif(rcvif, &s);
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
goto bad;
|
2016-06-10 16:31:43 +03:00
|
|
|
}
|
|
|
|
m_put_rcvif(rcvif, &s);
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
|
|
|
|
subj = (char *)&in6_subj;
|
|
|
|
if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &in6_subj))
|
2000-06-12 20:21:02 +04:00
|
|
|
break;
|
2000-11-11 03:46:36 +03:00
|
|
|
|
2000-06-12 20:21:02 +04:00
|
|
|
/*
|
|
|
|
* XXX if we are to allow other cases, we should really
|
|
|
|
* be careful about scope here.
|
|
|
|
* basically, we should disallow queries toward IPv6
|
|
|
|
* destination X with subject Y, if scope(X) > scope(Y).
|
|
|
|
* if we allow scope(X) > scope(Y), it will result in
|
|
|
|
* information leakage across scope boundary.
|
|
|
|
*/
|
|
|
|
goto bad;
|
|
|
|
|
|
|
|
case ICMP6_NI_SUBJ_FQDN:
|
|
|
|
/*
|
|
|
|
* Validate Subject name with gethostname(3).
|
|
|
|
*
|
|
|
|
* The behavior may need some debate, since:
|
|
|
|
* - we are not sure if the node has FQDN as
|
|
|
|
* hostname (returned by gethostname(3)).
|
|
|
|
* - the code does wildcard match for truncated names.
|
|
|
|
* however, we are not sure if we want to perform
|
|
|
|
* wildcard match, if gethostname(3) side has
|
|
|
|
* truncated hostname.
|
|
|
|
*/
|
|
|
|
n = ni6_nametodns(hostname, hostnamelen, 0);
|
|
|
|
if (!n || n->m_next || n->m_len == 0)
|
|
|
|
goto bad;
|
|
|
|
IP6_EXTHDR_GET(subj, char *, m,
|
|
|
|
off + sizeof(struct icmp6_nodeinfo), subjlen);
|
|
|
|
if (subj == NULL)
|
|
|
|
goto bad;
|
|
|
|
if (!ni6_dnsmatch(subj, subjlen, mtod(n, const char *),
|
2018-04-14 17:59:58 +03:00
|
|
|
n->m_len)) {
|
2000-06-12 20:21:02 +04:00
|
|
|
goto bad;
|
|
|
|
}
|
|
|
|
m_freem(n);
|
|
|
|
n = NULL;
|
|
|
|
break;
|
1999-12-13 18:17:17 +03:00
|
|
|
|
2000-11-11 03:46:36 +03:00
|
|
|
case ICMP6_NI_SUBJ_IPV4: /* XXX: to be implemented? */
|
2000-06-12 20:21:02 +04:00
|
|
|
default:
|
|
|
|
goto bad;
|
|
|
|
}
|
|
|
|
break;
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
|
2001-10-15 15:12:44 +04:00
|
|
|
/* refuse based on configuration. XXX ICMP6_NI_REFUSED? */
|
|
|
|
switch (qtype) {
|
|
|
|
case NI_QTYPE_FQDN:
|
|
|
|
if ((icmp6_nodeinfo & 1) == 0)
|
|
|
|
goto bad;
|
|
|
|
break;
|
|
|
|
case NI_QTYPE_NODEADDR:
|
2006-03-06 02:47:08 +03:00
|
|
|
case NI_QTYPE_IPV4ADDR:
|
2001-10-15 15:12:44 +04:00
|
|
|
if ((icmp6_nodeinfo & 2) == 0)
|
|
|
|
goto bad;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2000-11-11 03:46:36 +03:00
|
|
|
/* guess reply length */
|
|
|
|
switch (qtype) {
|
|
|
|
case NI_QTYPE_NOOP:
|
|
|
|
break; /* no reply data */
|
|
|
|
case NI_QTYPE_SUPTYPES:
|
|
|
|
replylen += sizeof(u_int32_t);
|
|
|
|
break;
|
|
|
|
case NI_QTYPE_FQDN:
|
2018-01-23 13:55:38 +03:00
|
|
|
/* will append an mbuf */
|
2000-11-11 03:46:36 +03:00
|
|
|
replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen);
|
|
|
|
break;
|
|
|
|
case NI_QTYPE_NODEADDR:
|
2018-01-23 13:55:38 +03:00
|
|
|
addrs = ni6_addrs(ni6, &ifp, subj, &psref);
|
|
|
|
replylen += addrs *
|
|
|
|
(sizeof(struct in6_addr) + sizeof(u_int32_t));
|
|
|
|
if (replylen > MCLBYTES)
|
2000-11-11 03:46:36 +03:00
|
|
|
replylen = MCLBYTES; /* XXX: will truncate pkt later */
|
|
|
|
break;
|
2006-03-06 02:47:08 +03:00
|
|
|
case NI_QTYPE_IPV4ADDR:
|
|
|
|
/* unsupported - should respond with unknown Qtype? */
|
|
|
|
goto bad;
|
2000-11-11 03:46:36 +03:00
|
|
|
default:
|
|
|
|
/*
|
|
|
|
* XXX: We must return a reply with the ICMP6 code
|
2001-10-15 15:12:44 +04:00
|
|
|
* `unknown Qtype' in this case. However we regard the case
|
2000-11-11 03:46:36 +03:00
|
|
|
* as an FQDN query for backward compatibility.
|
|
|
|
* Older versions set a random value to this field,
|
|
|
|
* so it rarely varies in the defined qtypes.
|
|
|
|
* But the mechanism is not reliable...
|
|
|
|
* maybe we should obsolete older versions.
|
|
|
|
*/
|
|
|
|
qtype = NI_QTYPE_FQDN;
|
2018-01-23 13:55:38 +03:00
|
|
|
/* will append an mbuf */
|
2000-11-11 03:46:36 +03:00
|
|
|
replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen);
|
|
|
|
oldfqdn++;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* allocate an mbuf to reply. */
|
1999-06-28 10:36:47 +04:00
|
|
|
MGETHDR(n, M_DONTWAIT, m->m_type);
|
2000-01-02 19:31:17 +03:00
|
|
|
if (n == NULL) {
|
2018-01-23 13:55:38 +03:00
|
|
|
goto bad;
|
2000-01-02 19:31:17 +03:00
|
|
|
}
|
2018-12-22 16:11:37 +03:00
|
|
|
m_move_pkthdr(n, m);
|
1999-06-28 10:36:47 +04:00
|
|
|
if (replylen > MHLEN) {
|
2000-06-12 20:21:02 +04:00
|
|
|
if (replylen > MCLBYTES) {
|
2001-02-07 11:59:47 +03:00
|
|
|
/*
|
|
|
|
* XXX: should we try to allocate more? But MCLBYTES
|
|
|
|
* is probably much larger than IPV6_MMTU...
|
|
|
|
*/
|
1999-06-28 10:36:47 +04:00
|
|
|
goto bad;
|
2000-06-12 20:21:02 +04:00
|
|
|
}
|
1999-06-28 10:36:47 +04:00
|
|
|
MCLGET(n, M_DONTWAIT);
|
|
|
|
if ((n->m_flags & M_EXT) == 0) {
|
|
|
|
goto bad;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
n->m_pkthdr.len = n->m_len = replylen;
|
|
|
|
|
|
|
|
/* copy mbuf header and IPv6 + Node Information base headers */
|
2007-03-04 08:59:00 +03:00
|
|
|
bcopy(mtod(m, void *), mtod(n, void *), sizeof(struct ip6_hdr));
|
2000-05-09 15:51:12 +04:00
|
|
|
nni6 = (struct icmp6_nodeinfo *)(mtod(n, struct ip6_hdr *) + 1);
|
2007-03-04 08:59:00 +03:00
|
|
|
bcopy((void *)ni6, (void *)nni6, sizeof(struct icmp6_nodeinfo));
|
1999-06-28 10:36:47 +04:00
|
|
|
|
|
|
|
/* qtype dependent procedure */
|
|
|
|
switch (qtype) {
|
2000-06-12 20:21:02 +04:00
|
|
|
case NI_QTYPE_NOOP:
|
2000-08-03 18:31:04 +04:00
|
|
|
nni6->ni_code = ICMP6_NI_SUCCESS;
|
2000-06-12 20:21:02 +04:00
|
|
|
nni6->ni_flags = 0;
|
|
|
|
break;
|
|
|
|
case NI_QTYPE_SUPTYPES:
|
2000-08-19 12:15:53 +04:00
|
|
|
{
|
|
|
|
u_int32_t v;
|
2000-08-03 18:31:04 +04:00
|
|
|
nni6->ni_code = ICMP6_NI_SUCCESS;
|
|
|
|
nni6->ni_flags = htons(0x0000); /* raw bitmap */
|
|
|
|
/* supports NOOP, SUPTYPES, FQDN, and NODEADDR */
|
2000-08-19 12:15:53 +04:00
|
|
|
v = (u_int32_t)htonl(0x0000000f);
|
2018-02-12 15:52:12 +03:00
|
|
|
memcpy(nni6 + 1, &v, sizeof(u_int32_t));
|
2000-06-12 20:21:02 +04:00
|
|
|
break;
|
2000-08-19 12:15:53 +04:00
|
|
|
}
|
2000-06-12 20:21:02 +04:00
|
|
|
case NI_QTYPE_FQDN:
|
2000-08-03 18:31:04 +04:00
|
|
|
nni6->ni_code = ICMP6_NI_SUCCESS;
|
2007-03-04 08:59:00 +03:00
|
|
|
fqdn = (struct ni_reply_fqdn *)(mtod(n, char *) +
|
2018-04-14 17:59:58 +03:00
|
|
|
sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo));
|
2000-06-12 20:21:02 +04:00
|
|
|
nni6->ni_flags = 0; /* XXX: meaningless TTL */
|
|
|
|
fqdn->ni_fqdn_ttl = 0; /* ditto. */
|
|
|
|
/*
|
|
|
|
* XXX do we really have FQDN in variable "hostname"?
|
|
|
|
*/
|
|
|
|
n->m_next = ni6_nametodns(hostname, hostnamelen, oldfqdn);
|
|
|
|
if (n->m_next == NULL)
|
|
|
|
goto bad;
|
|
|
|
/* XXX we assume that n->m_next is not a chain */
|
|
|
|
if (n->m_next->m_next != NULL)
|
|
|
|
goto bad;
|
|
|
|
n->m_pkthdr.len += n->m_next->m_len;
|
|
|
|
break;
|
|
|
|
case NI_QTYPE_NODEADDR:
|
|
|
|
{
|
|
|
|
int lenlim, copied;
|
|
|
|
|
2000-08-03 18:31:04 +04:00
|
|
|
nni6->ni_code = ICMP6_NI_SUCCESS;
|
2018-04-14 17:59:58 +03:00
|
|
|
n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) +
|
|
|
|
sizeof(struct icmp6_nodeinfo);
|
2000-08-19 12:15:53 +04:00
|
|
|
lenlim = M_TRAILINGSPACE(n);
|
2000-06-12 20:21:02 +04:00
|
|
|
copied = ni6_store_addrs(ni6, nni6, ifp, lenlim);
|
2016-08-01 06:15:30 +03:00
|
|
|
if_put(ifp, &psref);
|
|
|
|
ifp = NULL;
|
2018-04-14 17:59:58 +03:00
|
|
|
/* update mbuf length */
|
2000-06-12 20:21:02 +04:00
|
|
|
n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) +
|
2018-04-14 17:59:58 +03:00
|
|
|
sizeof(struct icmp6_nodeinfo) + copied;
|
2000-06-12 20:21:02 +04:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
2018-01-23 13:55:38 +03:00
|
|
|
panic("%s: impossible", __func__);
|
|
|
|
break;
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
nni6->ni_type = ICMP6_NI_REPLY;
|
2000-01-02 19:31:17 +03:00
|
|
|
m_freem(m);
|
2018-01-23 13:55:38 +03:00
|
|
|
return n;
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2018-01-23 13:55:38 +03:00
|
|
|
bad:
|
2016-08-01 06:15:30 +03:00
|
|
|
if_put(ifp, &psref);
|
2000-01-02 19:31:17 +03:00
|
|
|
m_freem(m);
|
1999-06-28 10:36:47 +04:00
|
|
|
if (n)
|
|
|
|
m_freem(n);
|
2018-01-23 13:55:38 +03:00
|
|
|
return NULL;
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
|
2002-05-24 13:13:59 +04:00
|
|
|
#define isupper(x) ('A' <= (x) && (x) <= 'Z')
|
|
|
|
#define isalpha(x) (('A' <= (x) && (x) <= 'Z') || ('a' <= (x) && (x) <= 'z'))
|
|
|
|
#define isalnum(x) (isalpha(x) || ('0' <= (x) && (x) <= '9'))
|
|
|
|
#define tolower(x) (isupper(x) ? (x) + 'a' - 'A' : (x))
|
|
|
|
|
2000-06-12 20:21:02 +04:00
|
|
|
/*
|
|
|
|
* make a mbuf with DNS-encoded string. no compression support.
|
|
|
|
*
|
|
|
|
* XXX names with less than 2 dots (like "foo" or "foo.section") will be
|
|
|
|
* treated as truncated name (two \0 at the end). this is a wild guess.
|
2007-05-23 21:14:59 +04:00
|
|
|
*
|
|
|
|
* old - return pascal string if non-zero
|
2000-06-12 20:21:02 +04:00
|
|
|
*/
|
|
|
|
static struct mbuf *
|
2007-05-23 21:14:59 +04:00
|
|
|
ni6_nametodns(const char *name, int namelen, int old)
|
2000-06-12 20:21:02 +04:00
|
|
|
{
|
|
|
|
struct mbuf *m;
|
|
|
|
char *cp, *ep;
|
|
|
|
const char *p, *q;
|
|
|
|
int i, len, nterm;
|
|
|
|
|
|
|
|
if (old)
|
|
|
|
len = namelen + 1;
|
|
|
|
else
|
|
|
|
len = MCLBYTES;
|
|
|
|
|
|
|
|
/* because MAXHOSTNAMELEN is usually 256, we use cluster mbuf */
|
|
|
|
MGET(m, M_DONTWAIT, MT_DATA);
|
|
|
|
if (m && len > MLEN) {
|
|
|
|
MCLGET(m, M_DONTWAIT);
|
|
|
|
if ((m->m_flags & M_EXT) == 0)
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
if (!m)
|
|
|
|
goto fail;
|
|
|
|
m->m_next = NULL;
|
|
|
|
|
|
|
|
if (old) {
|
|
|
|
m->m_len = len;
|
|
|
|
*mtod(m, char *) = namelen;
|
2018-02-12 15:52:12 +03:00
|
|
|
memcpy(mtod(m, char *) + 1, name, namelen);
|
2000-06-12 20:21:02 +04:00
|
|
|
return m;
|
|
|
|
} else {
|
|
|
|
m->m_len = 0;
|
|
|
|
cp = mtod(m, char *);
|
|
|
|
ep = mtod(m, char *) + M_TRAILINGSPACE(m);
|
|
|
|
|
|
|
|
/* if not certain about my name, return empty buffer */
|
|
|
|
if (namelen == 0)
|
|
|
|
return m;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* guess if it looks like shortened hostname, or FQDN.
|
|
|
|
* shortened hostname needs two trailing "\0".
|
|
|
|
*/
|
|
|
|
i = 0;
|
|
|
|
for (p = name; p < name + namelen; p++) {
|
2016-08-27 00:48:31 +03:00
|
|
|
if (*p == '.')
|
2000-06-12 20:21:02 +04:00
|
|
|
i++;
|
|
|
|
}
|
|
|
|
if (i < 2)
|
|
|
|
nterm = 2;
|
|
|
|
else
|
|
|
|
nterm = 1;
|
|
|
|
|
|
|
|
p = name;
|
|
|
|
while (cp < ep && p < name + namelen) {
|
|
|
|
i = 0;
|
|
|
|
for (q = p; q < name + namelen && *q && *q != '.'; q++)
|
|
|
|
i++;
|
|
|
|
/* result does not fit into mbuf */
|
|
|
|
if (cp + i + 1 >= ep)
|
|
|
|
goto fail;
|
2001-10-15 15:12:44 +04:00
|
|
|
/*
|
|
|
|
* DNS label length restriction, RFC1035 page 8.
|
|
|
|
* "i == 0" case is included here to avoid returning
|
|
|
|
* 0-length label on "foo..bar".
|
|
|
|
*/
|
|
|
|
if (i <= 0 || i >= 64)
|
2000-06-12 20:21:02 +04:00
|
|
|
goto fail;
|
|
|
|
*cp++ = i;
|
2002-05-24 13:13:59 +04:00
|
|
|
if (!isalpha(p[0]) || !isalnum(p[i - 1]))
|
|
|
|
goto fail;
|
|
|
|
while (i > 0) {
|
|
|
|
if (!isalnum(*p) && *p != '-')
|
|
|
|
goto fail;
|
2002-07-10 09:05:01 +04:00
|
|
|
if (isupper(*p)) {
|
|
|
|
*cp++ = tolower(*p);
|
|
|
|
p++;
|
|
|
|
} else
|
2002-05-24 13:13:59 +04:00
|
|
|
*cp++ = *p++;
|
|
|
|
i--;
|
|
|
|
}
|
2000-06-12 20:21:02 +04:00
|
|
|
p = q;
|
|
|
|
if (p < name + namelen && *p == '.')
|
|
|
|
p++;
|
|
|
|
}
|
|
|
|
/* termination */
|
|
|
|
if (cp + nterm >= ep)
|
|
|
|
goto fail;
|
|
|
|
while (nterm-- > 0)
|
|
|
|
*cp++ = '\0';
|
|
|
|
m->m_len = cp - mtod(m, char *);
|
|
|
|
return m;
|
|
|
|
}
|
|
|
|
|
|
|
|
panic("should not reach here");
|
2001-10-15 15:12:44 +04:00
|
|
|
/* NOTREACHED */
|
2000-06-12 20:21:02 +04:00
|
|
|
|
2018-01-23 13:55:38 +03:00
|
|
|
fail:
|
2000-06-12 20:21:02 +04:00
|
|
|
if (m)
|
|
|
|
m_freem(m);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* check if two DNS-encoded string matches. takes care of truncated
|
|
|
|
* form (with \0\0 at the end). no compression support.
|
2000-08-03 18:31:04 +04:00
|
|
|
* XXX upper/lowercase match (see RFC2065)
|
2000-06-12 20:21:02 +04:00
|
|
|
*/
|
|
|
|
static int
|
2007-05-23 21:14:59 +04:00
|
|
|
ni6_dnsmatch(const char *a, int alen, const char *b, int blen)
|
2000-06-12 20:21:02 +04:00
|
|
|
{
|
|
|
|
const char *a0, *b0;
|
|
|
|
int l;
|
|
|
|
|
|
|
|
/* simplest case - need validation? */
|
2009-03-18 18:14:29 +03:00
|
|
|
if (alen == blen && memcmp(a, b, alen) == 0)
|
2000-06-12 20:21:02 +04:00
|
|
|
return 1;
|
|
|
|
|
|
|
|
a0 = a;
|
|
|
|
b0 = b;
|
|
|
|
|
|
|
|
/* termination is mandatory */
|
|
|
|
if (alen < 2 || blen < 2)
|
|
|
|
return 0;
|
|
|
|
if (a0[alen - 1] != '\0' || b0[blen - 1] != '\0')
|
|
|
|
return 0;
|
|
|
|
alen--;
|
|
|
|
blen--;
|
|
|
|
|
|
|
|
while (a - a0 < alen && b - b0 < blen) {
|
|
|
|
if (a - a0 + 1 > alen || b - b0 + 1 > blen)
|
|
|
|
return 0;
|
|
|
|
|
2000-06-13 21:31:37 +04:00
|
|
|
if ((signed char)a[0] < 0 || (signed char)b[0] < 0)
|
2000-06-12 20:21:02 +04:00
|
|
|
return 0;
|
|
|
|
/* we don't support compression yet */
|
|
|
|
if (a[0] >= 64 || b[0] >= 64)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* truncated case */
|
|
|
|
if (a[0] == 0 && a - a0 == alen - 1)
|
|
|
|
return 1;
|
|
|
|
if (b[0] == 0 && b - b0 == blen - 1)
|
|
|
|
return 1;
|
|
|
|
if (a[0] == 0 || b[0] == 0)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (a[0] != b[0])
|
|
|
|
return 0;
|
|
|
|
l = a[0];
|
|
|
|
if (a - a0 + 1 + l > alen || b - b0 + 1 + l > blen)
|
|
|
|
return 0;
|
2009-03-18 18:14:29 +03:00
|
|
|
if (memcmp(a + 1, b + 1, l) != 0)
|
2000-06-12 20:21:02 +04:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
a += 1 + l;
|
|
|
|
b += 1 + l;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (a - a0 == alen && b - b0 == blen)
|
|
|
|
return 1;
|
|
|
|
else
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
/*
|
|
|
|
* calculate the number of addresses to be returned in the node info reply.
|
|
|
|
*/
|
|
|
|
static int
|
2018-01-23 13:55:38 +03:00
|
|
|
ni6_addrs(struct icmp6_nodeinfo *ni6, struct ifnet **ifpp, char *subj,
|
|
|
|
struct psref *psref)
|
1999-06-28 10:36:47 +04:00
|
|
|
{
|
2001-02-07 11:59:47 +03:00
|
|
|
struct ifnet *ifp;
|
2016-07-05 06:40:52 +03:00
|
|
|
struct in6_ifaddr *ia6;
|
2001-02-07 11:59:47 +03:00
|
|
|
struct ifaddr *ifa;
|
2000-11-11 03:46:36 +03:00
|
|
|
struct sockaddr_in6 *subj_ip6 = NULL; /* XXX pedant */
|
1999-06-28 10:36:47 +04:00
|
|
|
int addrs = 0, addrsofif, iffound = 0;
|
2000-11-11 03:46:36 +03:00
|
|
|
int niflags = ni6->ni_flags;
|
2016-08-01 06:15:30 +03:00
|
|
|
int s;
|
2000-11-11 03:46:36 +03:00
|
|
|
|
|
|
|
if ((niflags & NI_NODEADDR_FLAG_ALL) == 0) {
|
2001-02-10 07:14:26 +03:00
|
|
|
switch (ni6->ni_code) {
|
2000-11-11 03:46:36 +03:00
|
|
|
case ICMP6_NI_SUBJ_IPV6:
|
|
|
|
if (subj == NULL) /* must be impossible... */
|
2018-04-14 17:59:58 +03:00
|
|
|
return 0;
|
2000-11-11 03:46:36 +03:00
|
|
|
subj_ip6 = (struct sockaddr_in6 *)subj;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
/*
|
|
|
|
* XXX: we only support IPv6 subject address for
|
|
|
|
* this Qtype.
|
|
|
|
*/
|
2018-04-14 17:59:58 +03:00
|
|
|
return 0;
|
2000-11-11 03:46:36 +03:00
|
|
|
}
|
|
|
|
}
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2016-08-01 06:15:30 +03:00
|
|
|
s = pserialize_read_enter();
|
2016-05-12 05:24:16 +03:00
|
|
|
IFNET_READER_FOREACH(ifp) {
|
1999-06-28 10:36:47 +04:00
|
|
|
addrsofif = 0;
|
2016-07-07 12:32:01 +03:00
|
|
|
IFADDR_READER_FOREACH(ifa, ifp) {
|
1999-06-28 10:36:47 +04:00
|
|
|
if (ifa->ifa_addr->sa_family != AF_INET6)
|
|
|
|
continue;
|
2016-07-05 06:40:52 +03:00
|
|
|
ia6 = (struct in6_ifaddr *)ifa;
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2000-11-11 03:46:36 +03:00
|
|
|
if ((niflags & NI_NODEADDR_FLAG_ALL) == 0 &&
|
|
|
|
IN6_ARE_ADDR_EQUAL(&subj_ip6->sin6_addr,
|
2018-04-14 17:59:58 +03:00
|
|
|
&ia6->ia_addr.sin6_addr))
|
1999-06-28 10:36:47 +04:00
|
|
|
iffound = 1;
|
|
|
|
|
2000-02-28 16:48:50 +03:00
|
|
|
/*
|
|
|
|
* IPv4-mapped addresses can only be returned by a
|
|
|
|
* Node Information proxy, since they represent
|
|
|
|
* addresses of IPv4-only nodes, which perforce do
|
|
|
|
* not implement this protocol.
|
2000-11-11 03:46:36 +03:00
|
|
|
* [icmp-name-lookups-07, Section 5.4]
|
2000-02-28 16:48:50 +03:00
|
|
|
* So we don't support NI_NODEADDR_FLAG_COMPAT in
|
|
|
|
* this function at this moment.
|
|
|
|
*/
|
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
/* What do we have to do about ::1? */
|
2016-07-05 06:40:52 +03:00
|
|
|
switch (in6_addrscope(&ia6->ia_addr.sin6_addr)) {
|
2000-11-11 03:46:36 +03:00
|
|
|
case IPV6_ADDR_SCOPE_LINKLOCAL:
|
2001-02-07 11:59:47 +03:00
|
|
|
if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0)
|
2000-11-11 03:46:36 +03:00
|
|
|
continue;
|
1999-06-28 10:36:47 +04:00
|
|
|
break;
|
2000-11-11 03:46:36 +03:00
|
|
|
case IPV6_ADDR_SCOPE_SITELOCAL:
|
2001-02-07 11:59:47 +03:00
|
|
|
if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0)
|
2000-11-11 03:46:36 +03:00
|
|
|
continue;
|
|
|
|
break;
|
|
|
|
case IPV6_ADDR_SCOPE_GLOBAL:
|
2001-02-07 11:59:47 +03:00
|
|
|
if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0)
|
2000-11-11 03:46:36 +03:00
|
|
|
continue;
|
1999-06-28 10:36:47 +04:00
|
|
|
break;
|
2000-11-11 03:46:36 +03:00
|
|
|
default:
|
|
|
|
continue;
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
2000-11-11 03:46:36 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* check if anycast is okay.
|
2001-10-15 15:12:44 +04:00
|
|
|
* XXX: just experimental. not in the spec.
|
2000-11-11 03:46:36 +03:00
|
|
|
*/
|
2016-07-05 06:40:52 +03:00
|
|
|
if ((ia6->ia6_flags & IN6_IFF_ANYCAST) != 0 &&
|
2000-11-11 03:46:36 +03:00
|
|
|
(niflags & NI_NODEADDR_FLAG_ANYCAST) == 0)
|
|
|
|
continue; /* we need only unicast addresses */
|
|
|
|
|
|
|
|
addrsofif++; /* count the address */
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
if (iffound) {
|
2017-02-17 06:57:17 +03:00
|
|
|
if_acquire(ifp, psref);
|
2016-08-01 06:15:30 +03:00
|
|
|
pserialize_read_exit(s);
|
1999-06-28 10:36:47 +04:00
|
|
|
*ifpp = ifp;
|
2018-04-14 17:59:58 +03:00
|
|
|
return addrsofif;
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
addrs += addrsofif;
|
|
|
|
}
|
2016-08-01 06:15:30 +03:00
|
|
|
pserialize_read_exit(s);
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2018-04-14 17:59:58 +03:00
|
|
|
return addrs;
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2018-01-23 13:55:38 +03:00
|
|
|
ni6_store_addrs(struct icmp6_nodeinfo *ni6,
|
2007-05-23 21:14:59 +04:00
|
|
|
struct icmp6_nodeinfo *nni6, struct ifnet *ifp0,
|
|
|
|
int resid)
|
1999-06-28 10:36:47 +04:00
|
|
|
{
|
2016-08-01 06:15:30 +03:00
|
|
|
struct ifnet *ifp;
|
2016-07-05 06:40:52 +03:00
|
|
|
struct in6_ifaddr *ia6;
|
2001-02-07 11:59:47 +03:00
|
|
|
struct ifaddr *ifa;
|
2000-11-11 03:46:36 +03:00
|
|
|
struct ifnet *ifp_dep = NULL;
|
|
|
|
int copied = 0, allow_deprecated = 0;
|
1999-06-28 10:36:47 +04:00
|
|
|
u_char *cp = (u_char *)(nni6 + 1);
|
2000-11-11 03:46:36 +03:00
|
|
|
int niflags = ni6->ni_flags;
|
|
|
|
u_int32_t ltime;
|
2016-08-01 06:15:30 +03:00
|
|
|
int s;
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2000-11-11 03:46:36 +03:00
|
|
|
if (ifp0 == NULL && !(niflags & NI_NODEADDR_FLAG_ALL))
|
2018-04-14 17:59:58 +03:00
|
|
|
return 0; /* needless to copy */
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2016-08-01 06:15:30 +03:00
|
|
|
s = pserialize_read_enter();
|
|
|
|
ifp = ifp0 ? ifp0 : IFNET_READER_FIRST();
|
2018-01-23 13:55:38 +03:00
|
|
|
again:
|
2000-11-11 03:46:36 +03:00
|
|
|
|
2016-05-12 05:24:16 +03:00
|
|
|
for (; ifp; ifp = IFNET_READER_NEXT(ifp))
|
1999-06-28 10:36:47 +04:00
|
|
|
{
|
2016-07-07 12:32:01 +03:00
|
|
|
IFADDR_READER_FOREACH(ifa, ifp) {
|
1999-06-28 10:36:47 +04:00
|
|
|
if (ifa->ifa_addr->sa_family != AF_INET6)
|
|
|
|
continue;
|
2016-07-05 06:40:52 +03:00
|
|
|
ia6 = (struct in6_ifaddr *)ifa;
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2016-07-05 06:40:52 +03:00
|
|
|
if ((ia6->ia6_flags & IN6_IFF_DEPRECATED) != 0 &&
|
2000-11-11 03:46:36 +03:00
|
|
|
allow_deprecated == 0) {
|
|
|
|
/*
|
|
|
|
* prefererred address should be put before
|
|
|
|
* deprecated addresses.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* record the interface for later search */
|
|
|
|
if (ifp_dep == NULL)
|
|
|
|
ifp_dep = ifp;
|
|
|
|
|
|
|
|
continue;
|
1999-12-13 18:17:17 +03:00
|
|
|
}
|
2016-07-05 06:40:52 +03:00
|
|
|
else if ((ia6->ia6_flags & IN6_IFF_DEPRECATED) == 0 &&
|
2000-11-11 03:46:36 +03:00
|
|
|
allow_deprecated != 0)
|
|
|
|
continue; /* we now collect deprecated addrs */
|
1999-06-28 10:36:47 +04:00
|
|
|
|
|
|
|
/* What do we have to do about ::1? */
|
2016-07-05 06:40:52 +03:00
|
|
|
switch (in6_addrscope(&ia6->ia_addr.sin6_addr)) {
|
2000-11-11 03:46:36 +03:00
|
|
|
case IPV6_ADDR_SCOPE_LINKLOCAL:
|
2001-02-07 11:59:47 +03:00
|
|
|
if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0)
|
2000-11-11 03:46:36 +03:00
|
|
|
continue;
|
1999-06-28 10:36:47 +04:00
|
|
|
break;
|
2000-11-11 03:46:36 +03:00
|
|
|
case IPV6_ADDR_SCOPE_SITELOCAL:
|
2001-02-07 11:59:47 +03:00
|
|
|
if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0)
|
2000-11-11 03:46:36 +03:00
|
|
|
continue;
|
|
|
|
break;
|
|
|
|
case IPV6_ADDR_SCOPE_GLOBAL:
|
2001-02-07 11:59:47 +03:00
|
|
|
if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0)
|
2000-11-11 03:46:36 +03:00
|
|
|
continue;
|
1999-06-28 10:36:47 +04:00
|
|
|
break;
|
2000-11-11 03:46:36 +03:00
|
|
|
default:
|
|
|
|
continue;
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
|
2000-11-11 03:46:36 +03:00
|
|
|
/*
|
|
|
|
* check if anycast is okay.
|
2001-10-15 15:12:44 +04:00
|
|
|
* XXX: just experimental. not in the spec.
|
2000-11-11 03:46:36 +03:00
|
|
|
*/
|
2016-07-05 06:40:52 +03:00
|
|
|
if ((ia6->ia6_flags & IN6_IFF_ANYCAST) != 0 &&
|
2000-11-11 03:46:36 +03:00
|
|
|
(niflags & NI_NODEADDR_FLAG_ANYCAST) == 0)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* now we can copy the address */
|
|
|
|
if (resid < sizeof(struct in6_addr) +
|
|
|
|
sizeof(u_int32_t)) {
|
|
|
|
/*
|
|
|
|
* We give up much more copy.
|
|
|
|
* Set the truncate flag and return.
|
|
|
|
*/
|
2006-03-06 02:47:08 +03:00
|
|
|
nni6->ni_flags |= NI_NODEADDR_FLAG_TRUNCATE;
|
2016-08-01 06:15:30 +03:00
|
|
|
goto out;
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
2000-11-11 03:46:36 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Set the TTL of the address.
|
|
|
|
* The TTL value should be one of the following
|
|
|
|
* according to the specification:
|
|
|
|
*
|
|
|
|
* 1. The remaining lifetime of a DHCP lease on the
|
|
|
|
* address, or
|
|
|
|
* 2. The remaining Valid Lifetime of a prefix from
|
|
|
|
* which the address was derived through Stateless
|
|
|
|
* Autoconfiguration.
|
|
|
|
*
|
|
|
|
* Note that we currently do not support stateful
|
|
|
|
* address configuration by DHCPv6, so the former
|
|
|
|
* case can't happen.
|
2001-10-15 15:12:44 +04:00
|
|
|
*
|
|
|
|
* TTL must be 2^31 > TTL >= 0.
|
2000-11-11 03:46:36 +03:00
|
|
|
*/
|
2016-07-05 06:40:52 +03:00
|
|
|
if (ia6->ia6_lifetime.ia6t_expire == 0)
|
2000-11-11 03:46:36 +03:00
|
|
|
ltime = ND6_INFINITE_LIFETIME;
|
|
|
|
else {
|
2016-07-05 06:40:52 +03:00
|
|
|
if (ia6->ia6_lifetime.ia6t_expire >
|
2015-08-07 11:11:33 +03:00
|
|
|
time_uptime)
|
2016-07-05 06:40:52 +03:00
|
|
|
ltime = ia6->ia6_lifetime.ia6t_expire -
|
2015-08-07 11:11:33 +03:00
|
|
|
time_uptime;
|
2000-11-11 03:46:36 +03:00
|
|
|
else
|
|
|
|
ltime = 0;
|
|
|
|
}
|
2001-10-15 15:12:44 +04:00
|
|
|
if (ltime > 0x7fffffff)
|
|
|
|
ltime = 0x7fffffff;
|
|
|
|
ltime = htonl(ltime);
|
2002-06-09 18:43:10 +04:00
|
|
|
|
2018-02-12 15:52:12 +03:00
|
|
|
memcpy(cp, <ime, sizeof(u_int32_t));
|
2000-11-11 03:46:36 +03:00
|
|
|
cp += sizeof(u_int32_t);
|
|
|
|
|
|
|
|
/* copy the address itself */
|
2016-07-05 06:40:52 +03:00
|
|
|
bcopy(&ia6->ia_addr.sin6_addr, cp,
|
2000-11-11 03:46:36 +03:00
|
|
|
sizeof(struct in6_addr));
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
in6_clearscope((struct in6_addr *)cp); /* XXX */
|
2000-11-11 03:46:36 +03:00
|
|
|
cp += sizeof(struct in6_addr);
|
2002-06-09 18:43:10 +04:00
|
|
|
|
2000-11-11 03:46:36 +03:00
|
|
|
resid -= (sizeof(struct in6_addr) + sizeof(u_int32_t));
|
2006-03-06 02:47:08 +03:00
|
|
|
copied += (sizeof(struct in6_addr) + sizeof(u_int32_t));
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
if (ifp0) /* we need search only on the specified IF */
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2000-11-11 03:46:36 +03:00
|
|
|
if (allow_deprecated == 0 && ifp_dep != NULL) {
|
|
|
|
ifp = ifp_dep;
|
|
|
|
allow_deprecated = 1;
|
|
|
|
|
|
|
|
goto again;
|
|
|
|
}
|
2016-08-01 06:15:30 +03:00
|
|
|
out:
|
|
|
|
pserialize_read_exit(s);
|
2018-04-14 17:59:58 +03:00
|
|
|
return copied;
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* XXX almost dup'ed code with rip6_input.
|
|
|
|
*/
|
|
|
|
static int
|
2007-05-23 21:14:59 +04:00
|
|
|
icmp6_rip6_input(struct mbuf **mp, int off)
|
1999-06-28 10:36:47 +04:00
|
|
|
{
|
|
|
|
struct mbuf *m = *mp;
|
2001-02-07 11:59:47 +03:00
|
|
|
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
|
2003-09-04 13:16:57 +04:00
|
|
|
struct inpcb_hdr *inph;
|
2001-02-07 11:59:47 +03:00
|
|
|
struct in6pcb *in6p;
|
1999-06-28 10:36:47 +04:00
|
|
|
struct in6pcb *last = NULL;
|
|
|
|
struct sockaddr_in6 rip6src;
|
|
|
|
struct icmp6_hdr *icmp6;
|
2018-04-12 10:28:10 +03:00
|
|
|
struct mbuf *n, *opts = NULL;
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2000-02-26 11:39:18 +03:00
|
|
|
IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6));
|
|
|
|
if (icmp6 == NULL) {
|
|
|
|
/* m is already reclaimed */
|
|
|
|
return IPPROTO_DONE;
|
|
|
|
}
|
1999-06-28 10:36:47 +04:00
|
|
|
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
/*
|
|
|
|
* XXX: the address may have embedded scope zone ID, which should be
|
|
|
|
* hidden from applications.
|
|
|
|
*/
|
2007-10-24 10:37:20 +04:00
|
|
|
sockaddr_in6_init(&rip6src, &ip6->ip6_src, 0, 0, 0);
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
if (sa6_recoverscope(&rip6src)) {
|
|
|
|
m_freem(m);
|
2018-04-14 17:59:58 +03:00
|
|
|
return IPPROTO_DONE;
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
}
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2013-11-23 18:20:21 +04:00
|
|
|
TAILQ_FOREACH(inph, &raw6cbtable.inpt_queue, inph_queue) {
|
2003-09-04 13:16:57 +04:00
|
|
|
in6p = (struct in6pcb *)inph;
|
|
|
|
if (in6p->in6p_af != AF_INET6)
|
|
|
|
continue;
|
2003-08-25 04:11:52 +04:00
|
|
|
if (in6p->in6p_ip6.ip6_nxt != IPPROTO_ICMPV6)
|
1999-06-28 10:36:47 +04:00
|
|
|
continue;
|
1999-12-13 18:17:17 +03:00
|
|
|
if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) &&
|
2018-04-14 17:59:58 +03:00
|
|
|
!IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst))
|
1999-06-28 10:36:47 +04:00
|
|
|
continue;
|
1999-12-13 18:17:17 +03:00
|
|
|
if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) &&
|
2018-04-14 17:59:58 +03:00
|
|
|
!IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src))
|
1999-06-28 10:36:47 +04:00
|
|
|
continue;
|
2018-04-12 10:28:10 +03:00
|
|
|
if (in6p->in6p_icmp6filt &&
|
|
|
|
ICMP6_FILTER_WILLBLOCK(icmp6->icmp6_type,
|
|
|
|
in6p->in6p_icmp6filt))
|
1999-06-28 10:36:47 +04:00
|
|
|
continue;
|
2018-04-12 10:28:10 +03:00
|
|
|
|
|
|
|
if (last == NULL) {
|
|
|
|
;
|
|
|
|
}
|
2017-08-02 05:18:17 +03:00
|
|
|
#ifdef IPSEC
|
2018-04-12 10:28:10 +03:00
|
|
|
else if (ipsec_used && ipsec_in_reject(m, last)) {
|
|
|
|
/* do not inject data into pcb */
|
|
|
|
}
|
2018-01-23 12:21:59 +03:00
|
|
|
#endif
|
2018-04-29 10:05:13 +03:00
|
|
|
else if ((n = m_copypacket(m, M_DONTWAIT)) != NULL) {
|
2018-04-12 10:28:10 +03:00
|
|
|
if (last->in6p_flags & IN6P_CONTROLOPTS)
|
|
|
|
ip6_savecontrol(last, &opts, ip6, n);
|
|
|
|
/* strip intermediate headers */
|
|
|
|
m_adj(n, off);
|
|
|
|
if (sbappendaddr(&last->in6p_socket->so_rcv,
|
|
|
|
sin6tosa(&rip6src), n, opts) == 0) {
|
|
|
|
soroverflow(last->in6p_socket);
|
|
|
|
m_freem(n);
|
|
|
|
if (opts)
|
|
|
|
m_freem(opts);
|
|
|
|
} else {
|
|
|
|
sorwakeup(last->in6p_socket);
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
2018-04-12 10:28:10 +03:00
|
|
|
opts = NULL;
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
2018-04-12 10:28:10 +03:00
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
last = in6p;
|
|
|
|
}
|
2018-01-23 12:21:59 +03:00
|
|
|
|
2017-08-02 05:18:17 +03:00
|
|
|
#ifdef IPSEC
|
2018-02-26 12:04:29 +03:00
|
|
|
if (ipsec_used && last && ipsec_in_reject(m, last)) {
|
2017-08-02 05:18:17 +03:00
|
|
|
m_freem(m);
|
2018-01-23 12:21:59 +03:00
|
|
|
IP6_STATDEC(IP6_STAT_DELIVERED);
|
|
|
|
/* do not inject data into pcb */
|
|
|
|
} else
|
|
|
|
#endif
|
1999-06-28 10:36:47 +04:00
|
|
|
if (last) {
|
|
|
|
if (last->in6p_flags & IN6P_CONTROLOPTS)
|
|
|
|
ip6_savecontrol(last, &opts, ip6, m);
|
|
|
|
/* strip intermediate headers */
|
|
|
|
m_adj(m, off);
|
|
|
|
if (sbappendaddr(&last->in6p_socket->so_rcv,
|
2016-07-15 10:40:09 +03:00
|
|
|
sin6tosa(&rip6src), m, opts) == 0) {
|
2018-03-21 17:23:54 +03:00
|
|
|
soroverflow(last->in6p_socket);
|
1999-06-28 10:36:47 +04:00
|
|
|
m_freem(m);
|
|
|
|
if (opts)
|
|
|
|
m_freem(opts);
|
2018-04-14 17:59:58 +03:00
|
|
|
} else {
|
1999-06-28 10:36:47 +04:00
|
|
|
sorwakeup(last->in6p_socket);
|
2018-04-14 17:59:58 +03:00
|
|
|
}
|
1999-06-28 10:36:47 +04:00
|
|
|
} else {
|
|
|
|
m_freem(m);
|
2008-04-15 07:57:04 +04:00
|
|
|
IP6_STATDEC(IP6_STAT_DELIVERED);
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
return IPPROTO_DONE;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Reflect the ip6 packet back to the source.
|
2000-01-02 19:31:17 +03:00
|
|
|
* OFF points to the icmp6 header, counted from the top of the mbuf.
|
2001-06-22 17:01:49 +04:00
|
|
|
*
|
|
|
|
* Note: RFC 1885 required that an echo reply should be truncated if it
|
|
|
|
* did not fit in with (return) path MTU, and KAME code supported the
|
|
|
|
* behavior. However, as a clarification after the RFC, this limitation
|
|
|
|
* was removed in a revised version of the spec, RFC 2463. We had kept the
|
|
|
|
* old behavior, with a (non-default) ifdef block, while the new version of
|
|
|
|
* the spec was an internet-draft status, and even after the new RFC was
|
|
|
|
* published. But it would rather make sense to clean the obsoleted part
|
|
|
|
* up, and to make the code simpler at this stage.
|
1999-06-28 10:36:47 +04:00
|
|
|
*/
|
2018-01-23 13:55:38 +03:00
|
|
|
static void
|
KNF: de-__P, bzero -> memset, bcmp -> memcmp. Remove extraneous
parentheses in return statements.
Cosmetic: don't open-code TAILQ_FOREACH().
Cosmetic: change types of variables to avoid oodles of casts: in
in6_src.c, avoid casts by changing several route_in6 pointers
to struct route pointers. Remove unnecessary casts to caddr_t
elsewhere.
Pave the way for eliminating address family-specific route caches:
soon, struct route will not embed a sockaddr, but it will hold
a reference to an external sockaddr, instead. We will set the
destination sockaddr using rtcache_setdst(). (I created a stub
for it, but it isn't used anywhere, yet.) rtcache_free() will
free the sockaddr. I have extracted from rtcache_free() a helper
subroutine, rtcache_clear(). rtcache_clear() will "forget" a
cached route, but it will not forget the destination by releasing
the sockaddr. I use rtcache_clear() instead of rtcache_free()
in rtcache_update(), because rtcache_update() is not supposed
to forget the destination.
Constify:
1 Introduce const accessor for route->ro_dst, rtcache_getdst().
2 Constify the 'dst' argument to ifnet->if_output(). This
led me to constify a lot of code called by output routines.
3 Constify the sockaddr argument to protosw->pr_ctlinput. This
led me to constify a lot of code called by ctlinput routines.
4 Introduce const macros for converting from a generic sockaddr
to family-specific sockaddrs, e.g., sockaddr_in: satocsin6,
satocsin, et cetera.
2007-02-18 01:34:07 +03:00
|
|
|
icmp6_reflect(struct mbuf *m, size_t off)
|
1999-06-28 10:36:47 +04:00
|
|
|
{
|
1999-12-13 18:17:17 +03:00
|
|
|
struct ip6_hdr *ip6;
|
1999-06-28 10:36:47 +04:00
|
|
|
struct icmp6_hdr *icmp6;
|
2007-10-29 19:54:42 +03:00
|
|
|
const struct in6_ifaddr *ia;
|
|
|
|
const struct ip6aux *ip6a;
|
1999-12-13 18:17:17 +03:00
|
|
|
int plen;
|
|
|
|
int type, code;
|
|
|
|
struct ifnet *outif = NULL;
|
2007-10-29 19:54:42 +03:00
|
|
|
struct in6_addr origdst;
|
2016-06-10 16:31:43 +03:00
|
|
|
struct ifnet *rcvif;
|
|
|
|
int s;
|
2016-10-31 07:16:25 +03:00
|
|
|
bool ip6_src_filled = false;
|
1999-06-28 10:36:47 +04:00
|
|
|
|
1999-12-13 18:17:17 +03:00
|
|
|
/* too short to reflect */
|
|
|
|
if (off < sizeof(struct ip6_hdr)) {
|
2016-04-01 11:12:00 +03:00
|
|
|
nd6log(LOG_DEBUG,
|
2001-02-07 11:59:47 +03:00
|
|
|
"sanity fail: off=%lx, sizeof(ip6)=%lx in %s:%d\n",
|
|
|
|
(u_long)off, (u_long)sizeof(struct ip6_hdr),
|
2016-04-01 11:12:00 +03:00
|
|
|
__FILE__, __LINE__);
|
1999-12-13 18:17:17 +03:00
|
|
|
goto bad;
|
|
|
|
}
|
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
/*
|
|
|
|
* If there are extra headers between IPv6 and ICMPv6, strip
|
|
|
|
* off that header first.
|
|
|
|
*/
|
2017-03-14 07:24:04 +03:00
|
|
|
CTASSERT(sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) <= MHLEN);
|
1999-12-13 18:17:17 +03:00
|
|
|
if (off > sizeof(struct ip6_hdr)) {
|
|
|
|
size_t l;
|
|
|
|
struct ip6_hdr nip6;
|
|
|
|
|
|
|
|
l = off - sizeof(struct ip6_hdr);
|
2007-03-04 08:59:00 +03:00
|
|
|
m_copydata(m, 0, sizeof(nip6), (void *)&nip6);
|
1999-12-13 18:17:17 +03:00
|
|
|
m_adj(m, l);
|
|
|
|
l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
|
|
|
|
if (m->m_len < l) {
|
|
|
|
if ((m = m_pullup(m, l)) == NULL)
|
|
|
|
return;
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
2018-02-12 15:52:12 +03:00
|
|
|
memcpy(mtod(m, void *), (void *)&nip6, sizeof(nip6));
|
2018-01-23 10:02:57 +03:00
|
|
|
} else {
|
|
|
|
size_t l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
|
1999-12-13 18:17:17 +03:00
|
|
|
if (m->m_len < l) {
|
|
|
|
if ((m = m_pullup(m, l)) == NULL)
|
|
|
|
return;
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
}
|
2018-01-23 10:02:57 +03:00
|
|
|
|
1999-12-13 18:17:17 +03:00
|
|
|
plen = m->m_pkthdr.len - sizeof(struct ip6_hdr);
|
|
|
|
ip6 = mtod(m, struct ip6_hdr *);
|
|
|
|
ip6->ip6_nxt = IPPROTO_ICMPV6;
|
1999-06-28 10:36:47 +04:00
|
|
|
icmp6 = (struct icmp6_hdr *)(ip6 + 1);
|
1999-12-13 18:17:17 +03:00
|
|
|
type = icmp6->icmp6_type; /* keep type for statistics */
|
|
|
|
code = icmp6->icmp6_code; /* ditto. */
|
1999-06-28 10:36:47 +04:00
|
|
|
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
origdst = ip6->ip6_dst;
|
1999-06-28 10:36:47 +04:00
|
|
|
/*
|
|
|
|
* ip6_input() drops a packet if its src is multicast.
|
|
|
|
* So, the src is never multicast.
|
|
|
|
*/
|
|
|
|
ip6->ip6_dst = ip6->ip6_src;
|
|
|
|
|
|
|
|
/*
|
2001-10-15 15:12:44 +04:00
|
|
|
* If the incoming packet was addressed directly to us (i.e. unicast),
|
1999-06-28 10:36:47 +04:00
|
|
|
* use dst as the src for the reply.
|
2006-03-03 17:07:06 +03:00
|
|
|
* The IN6_IFF_NOTREADY case should be VERY rare, but is possible
|
2000-02-26 11:39:18 +03:00
|
|
|
* (for example) when we encounter an error while forwarding procedure
|
|
|
|
* destined to a duplicated address of ours.
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
* Note that ip6_getdstifaddr() may fail if we are in an error handling
|
|
|
|
* procedure of an outgoing packet of our own, in which case we need
|
|
|
|
* to search in the ifaddr list.
|
1999-06-28 10:36:47 +04:00
|
|
|
*/
|
2018-04-14 17:59:58 +03:00
|
|
|
if (IN6_IS_ADDR_MULTICAST(&origdst)) {
|
2007-10-29 19:54:42 +03:00
|
|
|
;
|
2018-04-14 17:59:58 +03:00
|
|
|
} else if ((ip6a = ip6_getdstifaddr(m)) != NULL) {
|
2007-10-29 19:54:42 +03:00
|
|
|
if ((ip6a->ip6a_flags &
|
2016-10-31 07:16:25 +03:00
|
|
|
(IN6_IFF_ANYCAST|IN6_IFF_NOTREADY)) == 0) {
|
|
|
|
ip6->ip6_src = ip6a->ip6a_src;
|
|
|
|
ip6_src_filled = true;
|
|
|
|
}
|
2007-10-29 19:54:42 +03:00
|
|
|
} else {
|
|
|
|
union {
|
|
|
|
struct sockaddr_in6 sin6;
|
|
|
|
struct sockaddr sa;
|
|
|
|
} u;
|
2016-08-01 06:15:30 +03:00
|
|
|
int _s;
|
|
|
|
struct ifaddr *ifa;
|
2007-10-29 19:54:42 +03:00
|
|
|
|
|
|
|
sockaddr_in6_init(&u.sin6, &origdst, 0, 0, 0);
|
|
|
|
|
2016-08-01 06:15:30 +03:00
|
|
|
_s = pserialize_read_enter();
|
|
|
|
ifa = ifa_ifwithaddr(&u.sa);
|
2007-10-29 19:54:42 +03:00
|
|
|
|
2016-08-01 06:15:30 +03:00
|
|
|
if (ifa != NULL) {
|
|
|
|
ia = ifatoia6(ifa);
|
|
|
|
if ((ia->ia6_flags &
|
2016-10-31 07:16:25 +03:00
|
|
|
(IN6_IFF_ANYCAST|IN6_IFF_NOTREADY)) == 0) {
|
|
|
|
ip6->ip6_src = ia->ia_addr.sin6_addr;
|
|
|
|
ip6_src_filled = true;
|
|
|
|
}
|
2016-08-01 06:15:30 +03:00
|
|
|
}
|
|
|
|
pserialize_read_exit(_s);
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
|
2016-10-31 07:16:25 +03:00
|
|
|
if (!ip6_src_filled) {
|
2001-02-08 19:07:39 +03:00
|
|
|
int e;
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
struct sockaddr_in6 sin6;
|
Eliminate address family-specific route caches (struct route, struct
route_in6, struct route_iso), replacing all caches with a struct
route.
The principle benefit of this change is that all of the protocol
families can benefit from route cache-invalidation, which is
necessary for correct routing. Route-cache invalidation fixes an
ancient PR, kern/3508, at long last; it fixes various other PRs,
also.
Discussions with and ideas from Joerg Sonnenberger influenced this
work tremendously. Of course, all design oversights and bugs are
mine.
DETAILS
1 I added to each address family a pool of sockaddrs. I have
introduced routines for allocating, copying, and duplicating,
and freeing sockaddrs:
struct sockaddr *sockaddr_alloc(sa_family_t af, int flags);
struct sockaddr *sockaddr_copy(struct sockaddr *dst,
const struct sockaddr *src);
struct sockaddr *sockaddr_dup(const struct sockaddr *src, int flags);
void sockaddr_free(struct sockaddr *sa);
sockaddr_alloc() returns either a sockaddr from the pool belonging
to the specified family, or NULL if the pool is exhausted. The
returned sockaddr has the right size for that family; sa_family
and sa_len fields are initialized to the family and sockaddr
length---e.g., sa_family = AF_INET and sa_len = sizeof(struct
sockaddr_in). sockaddr_free() puts the given sockaddr back into
its family's pool.
sockaddr_dup() and sockaddr_copy() work analogously to strdup()
and strcpy(), respectively. sockaddr_copy() KASSERTs that the
family of the destination and source sockaddrs are alike.
The 'flags' argumet for sockaddr_alloc() and sockaddr_dup() is
passed directly to pool_get(9).
2 I added routines for initializing sockaddrs in each address
family, sockaddr_in_init(), sockaddr_in6_init(), sockaddr_iso_init(),
etc. They are fairly self-explanatory.
3 structs route_in6 and route_iso are no more. All protocol families
use struct route. I have changed the route cache, 'struct route',
so that it does not contain storage space for a sockaddr. Instead,
struct route points to a sockaddr coming from the pool the sockaddr
belongs to. I added a new method to struct route, rtcache_setdst(),
for setting the cache destination:
int rtcache_setdst(struct route *, const struct sockaddr *);
rtcache_setdst() returns 0 on success, or ENOMEM if no memory is
available to create the sockaddr storage.
It is now possible for rtcache_getdst() to return NULL if, say,
rtcache_setdst() failed. I check the return value for NULL
everywhere in the kernel.
4 Each routing domain (struct domain) has a list of live route
caches, dom_rtcache. rtflushall(sa_family_t af) looks up the
domain indicated by 'af', walks the domain's list of route caches
and invalidates each one.
2007-05-03 00:40:22 +04:00
|
|
|
struct route ro;
|
2001-02-08 19:07:39 +03:00
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
/*
|
2000-02-26 11:39:18 +03:00
|
|
|
* This case matches to multicasts, our anycast, or unicasts
|
2001-10-15 15:12:44 +04:00
|
|
|
* that we do not own. Select a source address based on the
|
2001-02-08 19:07:39 +03:00
|
|
|
* source address of the erroneous packet.
|
1999-06-28 10:36:47 +04:00
|
|
|
*/
|
2007-10-24 10:37:20 +04:00
|
|
|
/* zone ID should be embedded */
|
|
|
|
sockaddr_in6_init(&sin6, &ip6->ip6_dst, 0, 0, 0);
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
|
2007-01-29 09:02:26 +03:00
|
|
|
memset(&ro, 0, sizeof(ro));
|
2016-10-31 07:16:25 +03:00
|
|
|
e = in6_selectsrc(&sin6, NULL, NULL, &ro, NULL, NULL, NULL,
|
|
|
|
&ip6->ip6_src);
|
Eliminate address family-specific route caches (struct route, struct
route_in6, struct route_iso), replacing all caches with a struct
route.
The principle benefit of this change is that all of the protocol
families can benefit from route cache-invalidation, which is
necessary for correct routing. Route-cache invalidation fixes an
ancient PR, kern/3508, at long last; it fixes various other PRs,
also.
Discussions with and ideas from Joerg Sonnenberger influenced this
work tremendously. Of course, all design oversights and bugs are
mine.
DETAILS
1 I added to each address family a pool of sockaddrs. I have
introduced routines for allocating, copying, and duplicating,
and freeing sockaddrs:
struct sockaddr *sockaddr_alloc(sa_family_t af, int flags);
struct sockaddr *sockaddr_copy(struct sockaddr *dst,
const struct sockaddr *src);
struct sockaddr *sockaddr_dup(const struct sockaddr *src, int flags);
void sockaddr_free(struct sockaddr *sa);
sockaddr_alloc() returns either a sockaddr from the pool belonging
to the specified family, or NULL if the pool is exhausted. The
returned sockaddr has the right size for that family; sa_family
and sa_len fields are initialized to the family and sockaddr
length---e.g., sa_family = AF_INET and sa_len = sizeof(struct
sockaddr_in). sockaddr_free() puts the given sockaddr back into
its family's pool.
sockaddr_dup() and sockaddr_copy() work analogously to strdup()
and strcpy(), respectively. sockaddr_copy() KASSERTs that the
family of the destination and source sockaddrs are alike.
The 'flags' argumet for sockaddr_alloc() and sockaddr_dup() is
passed directly to pool_get(9).
2 I added routines for initializing sockaddrs in each address
family, sockaddr_in_init(), sockaddr_in6_init(), sockaddr_iso_init(),
etc. They are fairly self-explanatory.
3 structs route_in6 and route_iso are no more. All protocol families
use struct route. I have changed the route cache, 'struct route',
so that it does not contain storage space for a sockaddr. Instead,
struct route points to a sockaddr coming from the pool the sockaddr
belongs to. I added a new method to struct route, rtcache_setdst(),
for setting the cache destination:
int rtcache_setdst(struct route *, const struct sockaddr *);
rtcache_setdst() returns 0 on success, or ENOMEM if no memory is
available to create the sockaddr storage.
It is now possible for rtcache_getdst() to return NULL if, say,
rtcache_setdst() failed. I check the return value for NULL
everywhere in the kernel.
4 Each routing domain (struct domain) has a list of live route
caches, dom_rtcache. rtflushall(sa_family_t af) looks up the
domain indicated by 'af', walks the domain's list of route caches
and invalidates each one.
2007-05-03 00:40:22 +04:00
|
|
|
rtcache_free(&ro);
|
2016-10-31 07:16:25 +03:00
|
|
|
if (e != 0) {
|
2017-01-16 10:33:36 +03:00
|
|
|
char ip6buf[INET6_ADDRSTRLEN];
|
2016-04-01 11:12:00 +03:00
|
|
|
nd6log(LOG_DEBUG,
|
|
|
|
"source can't be determined: "
|
2001-02-08 19:07:39 +03:00
|
|
|
"dst=%s, error=%d\n",
|
2017-01-16 18:44:46 +03:00
|
|
|
IN6_PRINT(ip6buf, &sin6.sin6_addr), e);
|
2001-02-08 19:07:39 +03:00
|
|
|
goto bad;
|
|
|
|
}
|
|
|
|
}
|
1999-06-28 10:36:47 +04:00
|
|
|
|
|
|
|
ip6->ip6_flow = 0;
|
1999-12-15 09:28:43 +03:00
|
|
|
ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
|
|
|
|
ip6->ip6_vfc |= IPV6_VERSION;
|
1999-06-28 10:36:47 +04:00
|
|
|
ip6->ip6_nxt = IPPROTO_ICMPV6;
|
2016-06-10 16:31:43 +03:00
|
|
|
rcvif = m_get_rcvif(m, &s);
|
|
|
|
if (rcvif) {
|
1999-06-28 10:36:47 +04:00
|
|
|
/* XXX: This may not be the outgoing interface */
|
2016-06-10 16:31:43 +03:00
|
|
|
ip6->ip6_hlim = ND_IFINFO(rcvif)->chlim;
|
2018-04-14 17:59:58 +03:00
|
|
|
} else {
|
2001-06-01 09:54:19 +04:00
|
|
|
ip6->ip6_hlim = ip6_defhlim;
|
2018-04-14 17:59:58 +03:00
|
|
|
}
|
2016-06-10 16:31:43 +03:00
|
|
|
m_put_rcvif(rcvif, &s);
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2006-07-12 02:13:56 +04:00
|
|
|
m->m_pkthdr.csum_flags = 0;
|
1999-06-28 10:36:47 +04:00
|
|
|
icmp6->icmp6_cksum = 0;
|
|
|
|
icmp6->icmp6_cksum = in6_cksum(m, IPPROTO_ICMPV6,
|
2018-04-14 17:59:58 +03:00
|
|
|
sizeof(struct ip6_hdr), plen);
|
1999-06-28 10:36:47 +04:00
|
|
|
|
|
|
|
/*
|
2001-10-15 15:12:44 +04:00
|
|
|
* XXX option handling
|
1999-06-28 10:36:47 +04:00
|
|
|
*/
|
|
|
|
|
|
|
|
m->m_flags &= ~(M_BCAST|M_MCAST);
|
|
|
|
|
2001-12-20 10:26:36 +03:00
|
|
|
/*
|
|
|
|
* To avoid a "too big" situation at an intermediate router
|
|
|
|
* and the path MTU discovery process, specify the IPV6_MINMTU flag.
|
|
|
|
* Note that only echo and node information replies are affected,
|
|
|
|
* since the length of ICMP6 errors is limited to the minimum MTU.
|
|
|
|
*/
|
2018-04-14 17:59:58 +03:00
|
|
|
if (ip6_output(m, NULL, NULL, IPV6_MINMTU, NULL, NULL, &outif) != 0 &&
|
|
|
|
outif)
|
2001-12-07 13:10:43 +03:00
|
|
|
icmp6_ifstat_inc(outif, ifs6_out_error);
|
1999-12-13 18:17:17 +03:00
|
|
|
if (outif)
|
|
|
|
icmp6_ifoutstat_inc(outif, type, code);
|
1999-06-28 10:36:47 +04:00
|
|
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
bad:
|
|
|
|
m_freem(m);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
1999-12-13 18:17:17 +03:00
|
|
|
static const char *
|
2018-01-23 13:55:38 +03:00
|
|
|
icmp6_redirect_diag(char *buf, size_t buflen, struct in6_addr *src6,
|
|
|
|
struct in6_addr *dst6, struct in6_addr *tgt6)
|
1999-10-01 14:16:16 +04:00
|
|
|
{
|
2017-01-16 10:33:36 +03:00
|
|
|
char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
|
|
|
|
char ip6buft[INET6_ADDRSTRLEN];
|
|
|
|
|
|
|
|
snprintf(buf, buflen, "(src=%s dst=%s tgt=%s)",
|
2017-01-16 18:44:46 +03:00
|
|
|
IN6_PRINT(ip6bufs, src6), IN6_PRINT(ip6bufd, dst6),
|
|
|
|
IN6_PRINT(ip6buft, tgt6));
|
1999-12-13 18:17:17 +03:00
|
|
|
return buf;
|
1999-10-01 14:16:16 +04:00
|
|
|
}
|
|
|
|
|
2018-01-23 13:55:38 +03:00
|
|
|
static void
|
2007-05-23 21:14:59 +04:00
|
|
|
icmp6_redirect_input(struct mbuf *m, int off)
|
1999-06-28 10:36:47 +04:00
|
|
|
{
|
2016-06-10 16:31:43 +03:00
|
|
|
struct ifnet *ifp;
|
1999-06-28 10:36:47 +04:00
|
|
|
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
|
2000-02-26 11:39:18 +03:00
|
|
|
struct nd_redirect *nd_rd;
|
2018-04-14 20:55:47 +03:00
|
|
|
int icmp6len = m->m_pkthdr.len - off;
|
1999-06-28 10:36:47 +04:00
|
|
|
char *lladdr = NULL;
|
|
|
|
int lladdrlen = 0;
|
|
|
|
struct rtentry *rt = NULL;
|
|
|
|
int is_router;
|
|
|
|
int is_onlink;
|
|
|
|
struct in6_addr src6 = ip6->ip6_src;
|
2000-02-26 11:39:18 +03:00
|
|
|
struct in6_addr redtgt6;
|
|
|
|
struct in6_addr reddst6;
|
1999-06-28 10:36:47 +04:00
|
|
|
union nd_opts ndopts;
|
2016-06-10 16:31:43 +03:00
|
|
|
struct psref psref;
|
2017-01-16 10:33:36 +03:00
|
|
|
char ip6buf[INET6_ADDRSTRLEN];
|
|
|
|
char diagbuf[256];
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2016-06-10 16:31:43 +03:00
|
|
|
ifp = m_get_rcvif_psref(m, &psref);
|
2016-05-17 06:24:46 +03:00
|
|
|
if (ifp == NULL)
|
|
|
|
goto freeit;
|
1999-06-28 10:36:47 +04:00
|
|
|
|
|
|
|
/* XXX if we are router, we don't update route by icmp6 redirect */
|
|
|
|
if (ip6_forwarding)
|
2000-02-26 11:39:18 +03:00
|
|
|
goto freeit;
|
1999-06-28 10:36:47 +04:00
|
|
|
if (!icmp6_rediraccept)
|
2000-02-26 11:39:18 +03:00
|
|
|
goto freeit;
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2000-02-26 11:39:18 +03:00
|
|
|
IP6_EXTHDR_GET(nd_rd, struct nd_redirect *, m, off, icmp6len);
|
|
|
|
if (nd_rd == NULL) {
|
2008-04-15 07:57:04 +04:00
|
|
|
ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
|
2016-06-10 16:31:43 +03:00
|
|
|
m_put_rcvif_psref(ifp, &psref);
|
2000-02-26 11:39:18 +03:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
redtgt6 = nd_rd->nd_rd_target;
|
|
|
|
reddst6 = nd_rd->nd_rd_dst;
|
|
|
|
|
2016-05-17 06:27:02 +03:00
|
|
|
if (in6_setscope(&redtgt6, ifp, NULL) ||
|
|
|
|
in6_setscope(&reddst6, ifp, NULL)) {
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
goto freeit;
|
|
|
|
}
|
2000-02-28 17:30:36 +03:00
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
/* validation */
|
|
|
|
if (!IN6_IS_ADDR_LINKLOCAL(&src6)) {
|
2016-04-01 11:12:00 +03:00
|
|
|
nd6log(LOG_ERR,
|
|
|
|
"ICMP6 redirect sent from %s rejected; "
|
2017-01-16 18:44:46 +03:00
|
|
|
"must be from linklocal\n", IN6_PRINT(ip6buf, &src6));
|
2001-02-07 11:59:47 +03:00
|
|
|
goto bad;
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
if (ip6->ip6_hlim != 255) {
|
2016-04-01 11:12:00 +03:00
|
|
|
nd6log(LOG_ERR,
|
|
|
|
"ICMP6 redirect sent from %s rejected; "
|
|
|
|
"hlim=%d (must be 255)\n",
|
2017-01-16 18:44:46 +03:00
|
|
|
IN6_PRINT(ip6buf, &src6), ip6->ip6_hlim);
|
2001-02-07 11:59:47 +03:00
|
|
|
goto bad;
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
2018-01-23 13:55:38 +03:00
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
{
|
|
|
|
/* ip6->ip6_src must be equal to gw for icmp6->icmp6_reddst */
|
|
|
|
struct sockaddr_in6 sin6;
|
|
|
|
struct in6_addr *gw6;
|
|
|
|
|
2007-10-24 10:37:20 +04:00
|
|
|
sockaddr_in6_init(&sin6, &reddst6, 0, 0, 0);
|
2016-07-15 10:40:09 +03:00
|
|
|
rt = rtalloc1(sin6tosa(&sin6), 0);
|
1999-06-28 10:36:47 +04:00
|
|
|
if (rt) {
|
2000-09-16 14:12:22 +04:00
|
|
|
if (rt->rt_gateway == NULL ||
|
|
|
|
rt->rt_gateway->sa_family != AF_INET6) {
|
2016-04-01 11:12:00 +03:00
|
|
|
nd6log(LOG_ERR,
|
2000-09-16 14:12:22 +04:00
|
|
|
"ICMP6 redirect rejected; no route "
|
|
|
|
"with inet6 gateway found for redirect dst: %s\n",
|
2017-01-16 10:33:36 +03:00
|
|
|
icmp6_redirect_diag(diagbuf, sizeof(diagbuf),
|
|
|
|
&src6, &reddst6, &redtgt6));
|
Make the routing table and rtcaches MP-safe
See the following descriptions for details.
Proposed on tech-kern and tech-net
Overview
--------
We protect the routing table with a rwock and protect
rtcaches with another rwlock. Each rtentry is protected
from being freed or updated via reference counting and psref.
Global rwlocks
--------------
There are two rwlocks; one for the routing table (rt_lock) and
the other for rtcaches (rtcache_lock). rtcache_lock covers
all existing rtcaches; there may have room for optimizations
(future work).
The locking order is rtcache_lock first and rt_lock is next.
rtentry references
------------------
References to an rtentry is managed with reference counting
and psref. Either of the two mechanisms is used depending on
where a rtentry is obtained. Reference counting is used when
we obtain a rtentry from the routing table directly via
rtalloc1 and rtrequest{,1} while psref is used when we obtain
a rtentry from a rtcache via rtcache_* APIs. In both cases,
a caller can sleep/block with holding an obtained rtentry.
The reasons why we use two different mechanisms are (i) only
using reference counting hurts the performance due to atomic
instructions (rtcache case) (ii) ease of implementation;
applying psref to APIs such rtaloc1 and rtrequest{,1} requires
additional works (adding a local variable and an argument).
We will finally migrate to use only psref but we can do it
when we have a lockless routing table alternative.
Reference counting for rtentry
------------------------------
rt_refcnt now doesn't count permanent references such as for
rt_timers and rtcaches, instead it is used only for temporal
references when obtaining a rtentry via rtalloc1 and rtrequest{,1}.
We can do so because destroying a rtentry always involves
removing references of rt_timers and rtcaches to the rtentry
and we don't need to track such references. This also makes
it easy to wait for readers to release references on deleting
or updating a rtentry, i.e., we can simply wait until the
reference counter is 0 or 1. (If there are permanent references
the counter can be arbitrary.)
rt_ref increments a reference counter of a rtentry and rt_unref
decrements it. rt_ref is called inside APIs (rtalloc1 and
rtrequest{,1} so users don't need to care about it while
users must call rt_unref to an obtained rtentry after using it.
rtfree is removed and we use rt_unref and rt_free instead.
rt_unref now just decrements the counter of a given rtentry
and rt_free just tries to destroy a given rtentry.
See the next section for destructions of rtentries by rt_free.
Destructions of rtentries
-------------------------
We destroy a rtentry only when we call rtrequst{,1}(RTM_DELETE);
the original implementation can destroy in any rtfree where it's
the last reference. If we use reference counting or psref, it's
easy to understand if the place that a rtentry is destroyed is
fixed.
rt_free waits for references to a given rtentry to be released
before actually destroying the rtentry. rt_free uses a condition
variable (cv_wait) (and psref_target_destroy for psref) to wait.
Unfortunately rtrequst{,1}(RTM_DELETE) can be called in softint
that we cannot use cv_wait. In that case, we have to defer the
destruction to a workqueue.
rtentry#rt_cv, rtentry#rt_psref and global variables
(see rt_free_global) are added to conduct the procedure.
Updates of rtentries
--------------------
One difficulty to use refcnt/psref instead of rwlock for rtentry
is updates of rtentries. We need an additional mechanism to
prevent readers from seeing inconsistency of a rtentry being
updated.
We introduce RTF_UPDATING flag to rtentries that are updating.
While the flag is set to a rtentry, users cannot acquire the
rtentry. By doing so, we avoid users to see inconsistent
rtentries.
There are two options when a user tries to acquire a rtentry
with the RTF_UPDATING flag; if a user runs in softint context
the user fails to acquire a rtentry (NULL is returned).
Otherwise a user waits until the update completes by waiting
on cv.
The procedure of a updater is simpler to destruction of
a rtentry. Wait on cv (and psref) and after all readers left,
proceed with the update.
Global variables (see rt_update_global) are added to conduct
the procedure.
Currently we apply the mechanism to only RTM_CHANGE in
rtsock.c. We would have to apply other codes. See
"Known issues" section.
psref for rtentry
-----------------
When we obtain a rtentry from a rtcache via rtcache_* APIs,
psref is used to reference to the rtentry.
rtcache_ref acquires a reference to a rtentry with psref
and rtcache_unref releases the reference after using it.
rtcache_ref is called inside rtcache_* APIs and users don't
need to take care of it while users must call rtcache_unref
to release the reference.
struct psref and int bound that is needed for psref is
embedded into struct route. By doing so we don't need to
add local variables and additional argument to APIs.
However this adds another constraint to psref other than
reference counting one's; holding a reference of an rtentry
via a rtcache is allowed by just one caller at the same time.
So we must not acquire a rtentry via a rtcache twice and
avoid a recursive use of a rtcache. And also a rtcache must
be arranged to be used by a LWP/softint at the same time
somehow. For IP forwarding case, we have per-CPU rtcaches
used in softint so the constraint is guaranteed. For a h
rtcache of a PCB case, the constraint is guaranteed by the
solock of each PCB. Any other cases (pf, ipf, stf and ipsec)
are currently guaranteed by only the existence of the global
locks (softnet_lock and/or KERNEL_LOCK). If we've found the
cases that we cannot guarantee the constraint, we would need
to introduce other rtcache APIs that use simple reference
counting.
psref of rtcache is created with IPL_SOFTNET and so rtcache
shouldn't used at an IPL higher than IPL_SOFTNET.
Note that rtcache_free is used to invalidate a given rtcache.
We don't need another care by my change; just keep them as
they are.
Performance impact
------------------
When NET_MPSAFE is disabled the performance drop is 3% while
when it's enabled the drop is increased to 11%. The difference
comes from that currently we don't take any global locks and
don't use psref if NET_MPSAFE is disabled.
We can optimize the performance of the case of NET_MPSAFE
on by reducing lookups of rtcache that uses psref;
currently we do two lookups but we should be able to trim
one of two. This is a future work.
Known issues
------------
There are two known issues to be solved; one is that
a caller of rtrequest(RTM_ADD) may change rtentry (see rtinit).
We need to prevent new references during the update. Or
we may be able to remove the code (perhaps, need more
investigations).
The other is rtredirect that updates a rtentry. We need
to apply our update mechanism, however it's not easy because
rtredirect is called in softint and we cannot apply our
mechanism simply. One solution is to defer rtredirect to
a workqueue but it requires some code restructuring.
2016-12-12 06:55:57 +03:00
|
|
|
rt_unref(rt);
|
2001-02-07 11:59:47 +03:00
|
|
|
goto bad;
|
2000-09-16 14:12:22 +04:00
|
|
|
}
|
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
gw6 = &(((struct sockaddr_in6 *)rt->rt_gateway)->sin6_addr);
|
2009-03-18 18:14:29 +03:00
|
|
|
if (memcmp(&src6, gw6, sizeof(struct in6_addr)) != 0) {
|
2016-04-01 11:12:00 +03:00
|
|
|
nd6log(LOG_ERR,
|
|
|
|
"ICMP6 redirect rejected; "
|
|
|
|
"not equal to gw-for-src=%s (must be same): %s\n",
|
2017-01-16 18:44:46 +03:00
|
|
|
IN6_PRINT(ip6buf, gw6),
|
2017-01-16 10:33:36 +03:00
|
|
|
icmp6_redirect_diag(diagbuf, sizeof(diagbuf),
|
|
|
|
&src6, &reddst6, &redtgt6));
|
Make the routing table and rtcaches MP-safe
See the following descriptions for details.
Proposed on tech-kern and tech-net
Overview
--------
We protect the routing table with a rwock and protect
rtcaches with another rwlock. Each rtentry is protected
from being freed or updated via reference counting and psref.
Global rwlocks
--------------
There are two rwlocks; one for the routing table (rt_lock) and
the other for rtcaches (rtcache_lock). rtcache_lock covers
all existing rtcaches; there may have room for optimizations
(future work).
The locking order is rtcache_lock first and rt_lock is next.
rtentry references
------------------
References to an rtentry is managed with reference counting
and psref. Either of the two mechanisms is used depending on
where a rtentry is obtained. Reference counting is used when
we obtain a rtentry from the routing table directly via
rtalloc1 and rtrequest{,1} while psref is used when we obtain
a rtentry from a rtcache via rtcache_* APIs. In both cases,
a caller can sleep/block with holding an obtained rtentry.
The reasons why we use two different mechanisms are (i) only
using reference counting hurts the performance due to atomic
instructions (rtcache case) (ii) ease of implementation;
applying psref to APIs such rtaloc1 and rtrequest{,1} requires
additional works (adding a local variable and an argument).
We will finally migrate to use only psref but we can do it
when we have a lockless routing table alternative.
Reference counting for rtentry
------------------------------
rt_refcnt now doesn't count permanent references such as for
rt_timers and rtcaches, instead it is used only for temporal
references when obtaining a rtentry via rtalloc1 and rtrequest{,1}.
We can do so because destroying a rtentry always involves
removing references of rt_timers and rtcaches to the rtentry
and we don't need to track such references. This also makes
it easy to wait for readers to release references on deleting
or updating a rtentry, i.e., we can simply wait until the
reference counter is 0 or 1. (If there are permanent references
the counter can be arbitrary.)
rt_ref increments a reference counter of a rtentry and rt_unref
decrements it. rt_ref is called inside APIs (rtalloc1 and
rtrequest{,1} so users don't need to care about it while
users must call rt_unref to an obtained rtentry after using it.
rtfree is removed and we use rt_unref and rt_free instead.
rt_unref now just decrements the counter of a given rtentry
and rt_free just tries to destroy a given rtentry.
See the next section for destructions of rtentries by rt_free.
Destructions of rtentries
-------------------------
We destroy a rtentry only when we call rtrequst{,1}(RTM_DELETE);
the original implementation can destroy in any rtfree where it's
the last reference. If we use reference counting or psref, it's
easy to understand if the place that a rtentry is destroyed is
fixed.
rt_free waits for references to a given rtentry to be released
before actually destroying the rtentry. rt_free uses a condition
variable (cv_wait) (and psref_target_destroy for psref) to wait.
Unfortunately rtrequst{,1}(RTM_DELETE) can be called in softint
that we cannot use cv_wait. In that case, we have to defer the
destruction to a workqueue.
rtentry#rt_cv, rtentry#rt_psref and global variables
(see rt_free_global) are added to conduct the procedure.
Updates of rtentries
--------------------
One difficulty to use refcnt/psref instead of rwlock for rtentry
is updates of rtentries. We need an additional mechanism to
prevent readers from seeing inconsistency of a rtentry being
updated.
We introduce RTF_UPDATING flag to rtentries that are updating.
While the flag is set to a rtentry, users cannot acquire the
rtentry. By doing so, we avoid users to see inconsistent
rtentries.
There are two options when a user tries to acquire a rtentry
with the RTF_UPDATING flag; if a user runs in softint context
the user fails to acquire a rtentry (NULL is returned).
Otherwise a user waits until the update completes by waiting
on cv.
The procedure of a updater is simpler to destruction of
a rtentry. Wait on cv (and psref) and after all readers left,
proceed with the update.
Global variables (see rt_update_global) are added to conduct
the procedure.
Currently we apply the mechanism to only RTM_CHANGE in
rtsock.c. We would have to apply other codes. See
"Known issues" section.
psref for rtentry
-----------------
When we obtain a rtentry from a rtcache via rtcache_* APIs,
psref is used to reference to the rtentry.
rtcache_ref acquires a reference to a rtentry with psref
and rtcache_unref releases the reference after using it.
rtcache_ref is called inside rtcache_* APIs and users don't
need to take care of it while users must call rtcache_unref
to release the reference.
struct psref and int bound that is needed for psref is
embedded into struct route. By doing so we don't need to
add local variables and additional argument to APIs.
However this adds another constraint to psref other than
reference counting one's; holding a reference of an rtentry
via a rtcache is allowed by just one caller at the same time.
So we must not acquire a rtentry via a rtcache twice and
avoid a recursive use of a rtcache. And also a rtcache must
be arranged to be used by a LWP/softint at the same time
somehow. For IP forwarding case, we have per-CPU rtcaches
used in softint so the constraint is guaranteed. For a h
rtcache of a PCB case, the constraint is guaranteed by the
solock of each PCB. Any other cases (pf, ipf, stf and ipsec)
are currently guaranteed by only the existence of the global
locks (softnet_lock and/or KERNEL_LOCK). If we've found the
cases that we cannot guarantee the constraint, we would need
to introduce other rtcache APIs that use simple reference
counting.
psref of rtcache is created with IPL_SOFTNET and so rtcache
shouldn't used at an IPL higher than IPL_SOFTNET.
Note that rtcache_free is used to invalidate a given rtcache.
We don't need another care by my change; just keep them as
they are.
Performance impact
------------------
When NET_MPSAFE is disabled the performance drop is 3% while
when it's enabled the drop is increased to 11%. The difference
comes from that currently we don't take any global locks and
don't use psref if NET_MPSAFE is disabled.
We can optimize the performance of the case of NET_MPSAFE
on by reducing lookups of rtcache that uses psref;
currently we do two lookups but we should be able to trim
one of two. This is a future work.
Known issues
------------
There are two known issues to be solved; one is that
a caller of rtrequest(RTM_ADD) may change rtentry (see rtinit).
We need to prevent new references during the update. Or
we may be able to remove the code (perhaps, need more
investigations).
The other is rtredirect that updates a rtentry. We need
to apply our update mechanism, however it's not easy because
rtredirect is called in softint and we cannot apply our
mechanism simply. One solution is to defer rtredirect to
a workqueue but it requires some code restructuring.
2016-12-12 06:55:57 +03:00
|
|
|
rt_unref(rt);
|
2001-02-07 11:59:47 +03:00
|
|
|
goto bad;
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
} else {
|
2016-04-01 11:12:00 +03:00
|
|
|
nd6log(LOG_ERR, "ICMP6 redirect rejected; "
|
|
|
|
"no route found for redirect dst: %s\n",
|
2017-01-16 10:33:36 +03:00
|
|
|
icmp6_redirect_diag(diagbuf, sizeof(diagbuf),
|
|
|
|
&src6, &reddst6, &redtgt6));
|
2001-02-07 11:59:47 +03:00
|
|
|
goto bad;
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
Make the routing table and rtcaches MP-safe
See the following descriptions for details.
Proposed on tech-kern and tech-net
Overview
--------
We protect the routing table with a rwock and protect
rtcaches with another rwlock. Each rtentry is protected
from being freed or updated via reference counting and psref.
Global rwlocks
--------------
There are two rwlocks; one for the routing table (rt_lock) and
the other for rtcaches (rtcache_lock). rtcache_lock covers
all existing rtcaches; there may have room for optimizations
(future work).
The locking order is rtcache_lock first and rt_lock is next.
rtentry references
------------------
References to an rtentry is managed with reference counting
and psref. Either of the two mechanisms is used depending on
where a rtentry is obtained. Reference counting is used when
we obtain a rtentry from the routing table directly via
rtalloc1 and rtrequest{,1} while psref is used when we obtain
a rtentry from a rtcache via rtcache_* APIs. In both cases,
a caller can sleep/block with holding an obtained rtentry.
The reasons why we use two different mechanisms are (i) only
using reference counting hurts the performance due to atomic
instructions (rtcache case) (ii) ease of implementation;
applying psref to APIs such rtaloc1 and rtrequest{,1} requires
additional works (adding a local variable and an argument).
We will finally migrate to use only psref but we can do it
when we have a lockless routing table alternative.
Reference counting for rtentry
------------------------------
rt_refcnt now doesn't count permanent references such as for
rt_timers and rtcaches, instead it is used only for temporal
references when obtaining a rtentry via rtalloc1 and rtrequest{,1}.
We can do so because destroying a rtentry always involves
removing references of rt_timers and rtcaches to the rtentry
and we don't need to track such references. This also makes
it easy to wait for readers to release references on deleting
or updating a rtentry, i.e., we can simply wait until the
reference counter is 0 or 1. (If there are permanent references
the counter can be arbitrary.)
rt_ref increments a reference counter of a rtentry and rt_unref
decrements it. rt_ref is called inside APIs (rtalloc1 and
rtrequest{,1} so users don't need to care about it while
users must call rt_unref to an obtained rtentry after using it.
rtfree is removed and we use rt_unref and rt_free instead.
rt_unref now just decrements the counter of a given rtentry
and rt_free just tries to destroy a given rtentry.
See the next section for destructions of rtentries by rt_free.
Destructions of rtentries
-------------------------
We destroy a rtentry only when we call rtrequst{,1}(RTM_DELETE);
the original implementation can destroy in any rtfree where it's
the last reference. If we use reference counting or psref, it's
easy to understand if the place that a rtentry is destroyed is
fixed.
rt_free waits for references to a given rtentry to be released
before actually destroying the rtentry. rt_free uses a condition
variable (cv_wait) (and psref_target_destroy for psref) to wait.
Unfortunately rtrequst{,1}(RTM_DELETE) can be called in softint
that we cannot use cv_wait. In that case, we have to defer the
destruction to a workqueue.
rtentry#rt_cv, rtentry#rt_psref and global variables
(see rt_free_global) are added to conduct the procedure.
Updates of rtentries
--------------------
One difficulty to use refcnt/psref instead of rwlock for rtentry
is updates of rtentries. We need an additional mechanism to
prevent readers from seeing inconsistency of a rtentry being
updated.
We introduce RTF_UPDATING flag to rtentries that are updating.
While the flag is set to a rtentry, users cannot acquire the
rtentry. By doing so, we avoid users to see inconsistent
rtentries.
There are two options when a user tries to acquire a rtentry
with the RTF_UPDATING flag; if a user runs in softint context
the user fails to acquire a rtentry (NULL is returned).
Otherwise a user waits until the update completes by waiting
on cv.
The procedure of a updater is simpler to destruction of
a rtentry. Wait on cv (and psref) and after all readers left,
proceed with the update.
Global variables (see rt_update_global) are added to conduct
the procedure.
Currently we apply the mechanism to only RTM_CHANGE in
rtsock.c. We would have to apply other codes. See
"Known issues" section.
psref for rtentry
-----------------
When we obtain a rtentry from a rtcache via rtcache_* APIs,
psref is used to reference to the rtentry.
rtcache_ref acquires a reference to a rtentry with psref
and rtcache_unref releases the reference after using it.
rtcache_ref is called inside rtcache_* APIs and users don't
need to take care of it while users must call rtcache_unref
to release the reference.
struct psref and int bound that is needed for psref is
embedded into struct route. By doing so we don't need to
add local variables and additional argument to APIs.
However this adds another constraint to psref other than
reference counting one's; holding a reference of an rtentry
via a rtcache is allowed by just one caller at the same time.
So we must not acquire a rtentry via a rtcache twice and
avoid a recursive use of a rtcache. And also a rtcache must
be arranged to be used by a LWP/softint at the same time
somehow. For IP forwarding case, we have per-CPU rtcaches
used in softint so the constraint is guaranteed. For a h
rtcache of a PCB case, the constraint is guaranteed by the
solock of each PCB. Any other cases (pf, ipf, stf and ipsec)
are currently guaranteed by only the existence of the global
locks (softnet_lock and/or KERNEL_LOCK). If we've found the
cases that we cannot guarantee the constraint, we would need
to introduce other rtcache APIs that use simple reference
counting.
psref of rtcache is created with IPL_SOFTNET and so rtcache
shouldn't used at an IPL higher than IPL_SOFTNET.
Note that rtcache_free is used to invalidate a given rtcache.
We don't need another care by my change; just keep them as
they are.
Performance impact
------------------
When NET_MPSAFE is disabled the performance drop is 3% while
when it's enabled the drop is increased to 11%. The difference
comes from that currently we don't take any global locks and
don't use psref if NET_MPSAFE is disabled.
We can optimize the performance of the case of NET_MPSAFE
on by reducing lookups of rtcache that uses psref;
currently we do two lookups but we should be able to trim
one of two. This is a future work.
Known issues
------------
There are two known issues to be solved; one is that
a caller of rtrequest(RTM_ADD) may change rtentry (see rtinit).
We need to prevent new references during the update. Or
we may be able to remove the code (perhaps, need more
investigations).
The other is rtredirect that updates a rtentry. We need
to apply our update mechanism, however it's not easy because
rtredirect is called in softint and we cannot apply our
mechanism simply. One solution is to defer rtredirect to
a workqueue but it requires some code restructuring.
2016-12-12 06:55:57 +03:00
|
|
|
rt_unref(rt);
|
1999-06-28 10:36:47 +04:00
|
|
|
rt = NULL;
|
|
|
|
}
|
2018-01-23 13:55:38 +03:00
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
if (IN6_IS_ADDR_MULTICAST(&reddst6)) {
|
2016-04-01 11:12:00 +03:00
|
|
|
nd6log(LOG_ERR, "ICMP6 redirect rejected; "
|
|
|
|
"redirect dst must be unicast: %s\n",
|
2017-01-16 10:33:36 +03:00
|
|
|
icmp6_redirect_diag(diagbuf, sizeof(diagbuf),
|
|
|
|
&src6, &reddst6, &redtgt6));
|
2001-02-07 11:59:47 +03:00
|
|
|
goto bad;
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
is_router = is_onlink = 0;
|
|
|
|
if (IN6_IS_ADDR_LINKLOCAL(&redtgt6))
|
|
|
|
is_router = 1; /* router case */
|
2009-03-18 18:14:29 +03:00
|
|
|
if (memcmp(&redtgt6, &reddst6, sizeof(redtgt6)) == 0)
|
1999-06-28 10:36:47 +04:00
|
|
|
is_onlink = 1; /* on-link destination case */
|
|
|
|
if (!is_router && !is_onlink) {
|
2016-04-01 11:12:00 +03:00
|
|
|
nd6log(LOG_ERR, "ICMP6 redirect rejected; "
|
|
|
|
"neither router case nor onlink case: %s\n",
|
2017-01-16 10:33:36 +03:00
|
|
|
icmp6_redirect_diag(diagbuf, sizeof(diagbuf),
|
|
|
|
&src6, &reddst6, &redtgt6));
|
2001-02-07 11:59:47 +03:00
|
|
|
goto bad;
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
/* validation passed */
|
|
|
|
|
|
|
|
icmp6len -= sizeof(*nd_rd);
|
|
|
|
nd6_option_init(nd_rd + 1, icmp6len, &ndopts);
|
|
|
|
if (nd6_options(&ndopts) < 0) {
|
2016-04-01 11:12:00 +03:00
|
|
|
nd6log(LOG_INFO, "invalid ND option, rejected: %s\n",
|
2017-01-16 10:33:36 +03:00
|
|
|
icmp6_redirect_diag(diagbuf, sizeof(diagbuf),
|
|
|
|
&src6, &reddst6, &redtgt6));
|
2001-02-07 11:59:47 +03:00
|
|
|
/* nd6_options have incremented stats */
|
2000-02-26 11:39:18 +03:00
|
|
|
goto freeit;
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
if (ndopts.nd_opts_tgt_lladdr) {
|
|
|
|
lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1);
|
|
|
|
lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
|
2016-04-01 11:12:00 +03:00
|
|
|
nd6log(LOG_INFO, "lladdrlen mismatch for %s "
|
|
|
|
"(if %d, icmp6 packet %d): %s\n",
|
2017-01-16 18:44:46 +03:00
|
|
|
IN6_PRINT(ip6buf, &redtgt6),
|
2017-01-16 10:33:36 +03:00
|
|
|
ifp->if_addrlen, lladdrlen - 2,
|
|
|
|
icmp6_redirect_diag(diagbuf, sizeof(diagbuf),
|
|
|
|
&src6, &reddst6, &redtgt6));
|
2001-02-07 11:59:47 +03:00
|
|
|
goto bad;
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* RFC 2461 8.3 */
|
1999-07-31 22:41:15 +04:00
|
|
|
nd6_cache_lladdr(ifp, &redtgt6, lladdr, lladdrlen, ND_REDIRECT,
|
2018-04-14 17:59:58 +03:00
|
|
|
is_onlink ? ND_REDIRECT_ONLINK : ND_REDIRECT_ROUTER);
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2016-06-10 16:31:43 +03:00
|
|
|
m_put_rcvif_psref(ifp, &psref);
|
|
|
|
ifp = NULL;
|
|
|
|
|
2001-10-15 15:12:44 +04:00
|
|
|
if (!is_onlink) { /* better router case. perform rtredirect. */
|
1999-06-28 10:36:47 +04:00
|
|
|
/* perform rtredirect */
|
|
|
|
struct sockaddr_in6 sdst;
|
|
|
|
struct sockaddr_in6 sgw;
|
|
|
|
struct sockaddr_in6 ssrc;
|
2001-02-08 19:07:39 +03:00
|
|
|
unsigned long rtcount;
|
|
|
|
struct rtentry *newrt = NULL;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* do not install redirect route, if the number of entries
|
|
|
|
* is too much (> hiwat). note that, the node (= host) will
|
|
|
|
* work just fine even if we do not install redirect route
|
|
|
|
* (there will be additional hops, though).
|
|
|
|
*/
|
2017-02-13 10:18:20 +03:00
|
|
|
mutex_enter(&icmp6_mtx);
|
2001-02-08 19:07:39 +03:00
|
|
|
rtcount = rt_timer_count(icmp6_redirect_timeout_q);
|
2017-02-13 10:18:20 +03:00
|
|
|
if (0 <= ip6_maxdynroutes && rtcount >= ip6_maxdynroutes) {
|
|
|
|
mutex_exit(&icmp6_mtx);
|
2012-06-23 07:13:41 +04:00
|
|
|
goto freeit;
|
2017-02-13 10:18:20 +03:00
|
|
|
}
|
|
|
|
if (0 <= icmp6_redirect_hiwat && rtcount > icmp6_redirect_hiwat) {
|
|
|
|
mutex_exit(&icmp6_mtx);
|
2016-05-17 06:24:46 +03:00
|
|
|
goto freeit;
|
2017-02-13 10:18:20 +03:00
|
|
|
} else if (0 <= icmp6_redirect_lowat &&
|
2001-02-08 19:07:39 +03:00
|
|
|
rtcount > icmp6_redirect_lowat) {
|
|
|
|
/*
|
|
|
|
* XXX nuke a victim, install the new one.
|
|
|
|
*/
|
|
|
|
}
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2009-03-18 19:00:08 +03:00
|
|
|
memset(&sdst, 0, sizeof(sdst));
|
|
|
|
memset(&sgw, 0, sizeof(sgw));
|
|
|
|
memset(&ssrc, 0, sizeof(ssrc));
|
1999-06-28 10:36:47 +04:00
|
|
|
sdst.sin6_family = sgw.sin6_family = ssrc.sin6_family = AF_INET6;
|
|
|
|
sdst.sin6_len = sgw.sin6_len = ssrc.sin6_len =
|
2018-04-14 17:59:58 +03:00
|
|
|
sizeof(struct sockaddr_in6);
|
1999-06-28 10:36:47 +04:00
|
|
|
bcopy(&redtgt6, &sgw.sin6_addr, sizeof(struct in6_addr));
|
|
|
|
bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
|
|
|
|
bcopy(&src6, &ssrc.sin6_addr, sizeof(struct in6_addr));
|
2016-07-15 10:40:09 +03:00
|
|
|
rtredirect(sin6tosa(&sdst), sin6tosa(&sgw), NULL,
|
2018-01-23 13:55:38 +03:00
|
|
|
RTF_GATEWAY | RTF_HOST, sin6tosa(&ssrc), &newrt);
|
2001-02-08 19:07:39 +03:00
|
|
|
|
|
|
|
if (newrt) {
|
|
|
|
(void)rt_timer_add(newrt, icmp6_redirect_timeout,
|
|
|
|
icmp6_redirect_timeout_q);
|
Make the routing table and rtcaches MP-safe
See the following descriptions for details.
Proposed on tech-kern and tech-net
Overview
--------
We protect the routing table with a rwock and protect
rtcaches with another rwlock. Each rtentry is protected
from being freed or updated via reference counting and psref.
Global rwlocks
--------------
There are two rwlocks; one for the routing table (rt_lock) and
the other for rtcaches (rtcache_lock). rtcache_lock covers
all existing rtcaches; there may have room for optimizations
(future work).
The locking order is rtcache_lock first and rt_lock is next.
rtentry references
------------------
References to an rtentry is managed with reference counting
and psref. Either of the two mechanisms is used depending on
where a rtentry is obtained. Reference counting is used when
we obtain a rtentry from the routing table directly via
rtalloc1 and rtrequest{,1} while psref is used when we obtain
a rtentry from a rtcache via rtcache_* APIs. In both cases,
a caller can sleep/block with holding an obtained rtentry.
The reasons why we use two different mechanisms are (i) only
using reference counting hurts the performance due to atomic
instructions (rtcache case) (ii) ease of implementation;
applying psref to APIs such rtaloc1 and rtrequest{,1} requires
additional works (adding a local variable and an argument).
We will finally migrate to use only psref but we can do it
when we have a lockless routing table alternative.
Reference counting for rtentry
------------------------------
rt_refcnt now doesn't count permanent references such as for
rt_timers and rtcaches, instead it is used only for temporal
references when obtaining a rtentry via rtalloc1 and rtrequest{,1}.
We can do so because destroying a rtentry always involves
removing references of rt_timers and rtcaches to the rtentry
and we don't need to track such references. This also makes
it easy to wait for readers to release references on deleting
or updating a rtentry, i.e., we can simply wait until the
reference counter is 0 or 1. (If there are permanent references
the counter can be arbitrary.)
rt_ref increments a reference counter of a rtentry and rt_unref
decrements it. rt_ref is called inside APIs (rtalloc1 and
rtrequest{,1} so users don't need to care about it while
users must call rt_unref to an obtained rtentry after using it.
rtfree is removed and we use rt_unref and rt_free instead.
rt_unref now just decrements the counter of a given rtentry
and rt_free just tries to destroy a given rtentry.
See the next section for destructions of rtentries by rt_free.
Destructions of rtentries
-------------------------
We destroy a rtentry only when we call rtrequst{,1}(RTM_DELETE);
the original implementation can destroy in any rtfree where it's
the last reference. If we use reference counting or psref, it's
easy to understand if the place that a rtentry is destroyed is
fixed.
rt_free waits for references to a given rtentry to be released
before actually destroying the rtentry. rt_free uses a condition
variable (cv_wait) (and psref_target_destroy for psref) to wait.
Unfortunately rtrequst{,1}(RTM_DELETE) can be called in softint
that we cannot use cv_wait. In that case, we have to defer the
destruction to a workqueue.
rtentry#rt_cv, rtentry#rt_psref and global variables
(see rt_free_global) are added to conduct the procedure.
Updates of rtentries
--------------------
One difficulty to use refcnt/psref instead of rwlock for rtentry
is updates of rtentries. We need an additional mechanism to
prevent readers from seeing inconsistency of a rtentry being
updated.
We introduce RTF_UPDATING flag to rtentries that are updating.
While the flag is set to a rtentry, users cannot acquire the
rtentry. By doing so, we avoid users to see inconsistent
rtentries.
There are two options when a user tries to acquire a rtentry
with the RTF_UPDATING flag; if a user runs in softint context
the user fails to acquire a rtentry (NULL is returned).
Otherwise a user waits until the update completes by waiting
on cv.
The procedure of a updater is simpler to destruction of
a rtentry. Wait on cv (and psref) and after all readers left,
proceed with the update.
Global variables (see rt_update_global) are added to conduct
the procedure.
Currently we apply the mechanism to only RTM_CHANGE in
rtsock.c. We would have to apply other codes. See
"Known issues" section.
psref for rtentry
-----------------
When we obtain a rtentry from a rtcache via rtcache_* APIs,
psref is used to reference to the rtentry.
rtcache_ref acquires a reference to a rtentry with psref
and rtcache_unref releases the reference after using it.
rtcache_ref is called inside rtcache_* APIs and users don't
need to take care of it while users must call rtcache_unref
to release the reference.
struct psref and int bound that is needed for psref is
embedded into struct route. By doing so we don't need to
add local variables and additional argument to APIs.
However this adds another constraint to psref other than
reference counting one's; holding a reference of an rtentry
via a rtcache is allowed by just one caller at the same time.
So we must not acquire a rtentry via a rtcache twice and
avoid a recursive use of a rtcache. And also a rtcache must
be arranged to be used by a LWP/softint at the same time
somehow. For IP forwarding case, we have per-CPU rtcaches
used in softint so the constraint is guaranteed. For a h
rtcache of a PCB case, the constraint is guaranteed by the
solock of each PCB. Any other cases (pf, ipf, stf and ipsec)
are currently guaranteed by only the existence of the global
locks (softnet_lock and/or KERNEL_LOCK). If we've found the
cases that we cannot guarantee the constraint, we would need
to introduce other rtcache APIs that use simple reference
counting.
psref of rtcache is created with IPL_SOFTNET and so rtcache
shouldn't used at an IPL higher than IPL_SOFTNET.
Note that rtcache_free is used to invalidate a given rtcache.
We don't need another care by my change; just keep them as
they are.
Performance impact
------------------
When NET_MPSAFE is disabled the performance drop is 3% while
when it's enabled the drop is increased to 11%. The difference
comes from that currently we don't take any global locks and
don't use psref if NET_MPSAFE is disabled.
We can optimize the performance of the case of NET_MPSAFE
on by reducing lookups of rtcache that uses psref;
currently we do two lookups but we should be able to trim
one of two. This is a future work.
Known issues
------------
There are two known issues to be solved; one is that
a caller of rtrequest(RTM_ADD) may change rtentry (see rtinit).
We need to prevent new references during the update. Or
we may be able to remove the code (perhaps, need more
investigations).
The other is rtredirect that updates a rtentry. We need
to apply our update mechanism, however it's not easy because
rtredirect is called in softint and we cannot apply our
mechanism simply. One solution is to defer rtredirect to
a workqueue but it requires some code restructuring.
2016-12-12 06:55:57 +03:00
|
|
|
rt_unref(newrt);
|
2001-02-08 19:07:39 +03:00
|
|
|
}
|
2017-02-13 10:18:20 +03:00
|
|
|
mutex_exit(&icmp6_mtx);
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
/* finally update cached route in each socket via pfctlinput */
|
2001-12-21 11:54:52 +03:00
|
|
|
{
|
|
|
|
struct sockaddr_in6 sdst;
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2007-10-24 10:37:20 +04:00
|
|
|
sockaddr_in6_init(&sdst, &reddst6, 0, 0, 0);
|
2016-07-15 10:40:09 +03:00
|
|
|
pfctlinput(PRC_REDIRECT_HOST, sin6tosa(&sdst));
|
2013-06-05 23:01:26 +04:00
|
|
|
#if defined(IPSEC)
|
2014-05-30 05:39:03 +04:00
|
|
|
if (ipsec_used)
|
2016-07-15 10:40:09 +03:00
|
|
|
key_sa_routechange(sin6tosa(&sdst));
|
1999-06-28 10:36:47 +04:00
|
|
|
#endif
|
2001-12-21 11:54:52 +03:00
|
|
|
}
|
2000-02-26 11:39:18 +03:00
|
|
|
|
2018-01-23 13:55:38 +03:00
|
|
|
freeit:
|
2016-06-10 16:31:43 +03:00
|
|
|
if (ifp != NULL)
|
|
|
|
m_put_rcvif_psref(ifp, &psref);
|
2000-02-26 11:39:18 +03:00
|
|
|
m_freem(m);
|
2001-02-07 11:59:47 +03:00
|
|
|
return;
|
|
|
|
|
2018-01-23 13:55:38 +03:00
|
|
|
bad:
|
2016-06-10 16:31:43 +03:00
|
|
|
m_put_rcvif_psref(ifp, &psref);
|
2008-04-15 07:57:04 +04:00
|
|
|
ICMP6_STATINC(ICMP6_STAT_BADREDIRECT);
|
2001-02-07 11:59:47 +03:00
|
|
|
m_freem(m);
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2007-05-23 21:14:59 +04:00
|
|
|
icmp6_redirect_output(struct mbuf *m0, struct rtentry *rt)
|
1999-06-28 10:36:47 +04:00
|
|
|
{
|
|
|
|
struct ifnet *ifp; /* my outgoing interface */
|
|
|
|
struct in6_addr *ifp_ll6;
|
2002-03-05 11:13:56 +03:00
|
|
|
struct in6_addr *nexthop;
|
1999-06-28 10:36:47 +04:00
|
|
|
struct ip6_hdr *sip6; /* m0 as struct ip6_hdr */
|
|
|
|
struct mbuf *m = NULL; /* newly allocated one */
|
|
|
|
struct ip6_hdr *ip6; /* m as struct ip6_hdr */
|
|
|
|
struct nd_redirect *nd_rd;
|
|
|
|
size_t maxlen;
|
|
|
|
u_char *p;
|
2000-05-09 15:51:12 +04:00
|
|
|
struct sockaddr_in6 src_sa;
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2008-04-15 07:57:04 +04:00
|
|
|
icmp6_errcount(ICMP6_STAT_OUTERRHIST, ND_REDIRECT, 0);
|
2000-07-06 16:36:18 +04:00
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
/* if we are not router, we don't send icmp6 redirect */
|
2003-06-24 11:54:47 +04:00
|
|
|
if (!ip6_forwarding)
|
1999-06-28 10:36:47 +04:00
|
|
|
goto fail;
|
|
|
|
|
|
|
|
/* sanity check */
|
2016-12-11 10:35:42 +03:00
|
|
|
KASSERT(m0 != NULL);
|
|
|
|
KASSERT(rt != NULL);
|
|
|
|
|
|
|
|
ifp = rt->rt_ifp;
|
1999-07-06 16:23:19 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Address check:
|
|
|
|
* the source address must identify a neighbor, and
|
|
|
|
* the destination address must not be a multicast address
|
|
|
|
* [RFC 2461, sec 8.2]
|
|
|
|
*/
|
1999-06-28 10:36:47 +04:00
|
|
|
sip6 = mtod(m0, struct ip6_hdr *);
|
2007-10-24 10:37:20 +04:00
|
|
|
sockaddr_in6_init(&src_sa, &sip6->ip6_src, 0, 0, 0);
|
2000-05-09 15:51:12 +04:00
|
|
|
if (nd6_is_addr_neighbor(&src_sa, ifp) == 0)
|
1999-07-06 16:23:19 +04:00
|
|
|
goto fail;
|
1999-06-28 10:36:47 +04:00
|
|
|
if (IN6_IS_ADDR_MULTICAST(&sip6->ip6_dst))
|
|
|
|
goto fail; /* what should we do here? */
|
|
|
|
|
|
|
|
/* rate limit */
|
|
|
|
if (icmp6_ratelimit(&sip6->ip6_src, ND_REDIRECT, 0))
|
|
|
|
goto fail;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Since we are going to append up to 1280 bytes (= IPV6_MMTU),
|
|
|
|
* we almost always ask for an mbuf cluster for simplicity.
|
|
|
|
* (MHLEN < IPV6_MMTU is almost always true)
|
|
|
|
*/
|
|
|
|
MGETHDR(m, M_DONTWAIT, MT_HEADER);
|
2009-10-13 02:32:23 +04:00
|
|
|
if (m && IPV6_MMTU >= MHLEN) {
|
|
|
|
#if IPV6_MMTU >= MCLBYTES
|
2018-04-27 12:02:16 +03:00
|
|
|
MEXTMALLOC(m, IPV6_MMTU, M_NOWAIT);
|
2009-10-13 02:32:23 +04:00
|
|
|
#else
|
2000-02-26 11:39:18 +03:00
|
|
|
MCLGET(m, M_DONTWAIT);
|
2009-10-13 02:32:23 +04:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
if (!m)
|
|
|
|
goto fail;
|
2016-06-10 16:27:10 +03:00
|
|
|
m_reset_rcvif(m);
|
2000-08-19 12:15:53 +04:00
|
|
|
m->m_len = 0;
|
|
|
|
maxlen = M_TRAILINGSPACE(m);
|
Rename min/max -> uimin/uimax for better honesty.
These functions are defined on unsigned int. The generic name
min/max should not silently truncate to 32 bits on 64-bit systems.
This is purely a name change -- no functional change intended.
HOWEVER! Some subsystems have
#define min(a, b) ((a) < (b) ? (a) : (b))
#define max(a, b) ((a) > (b) ? (a) : (b))
even though our standard name for that is MIN/MAX. Although these
may invite multiple evaluation bugs, these do _not_ cause integer
truncation.
To avoid `fixing' these cases, I first changed the name in libkern,
and then compile-tested every file where min/max occurred in order to
confirm that it failed -- and thus confirm that nothing shadowed
min/max -- before changing it.
I have left a handful of bootloaders that are too annoying to
compile-test, and some dead code:
cobalt ews4800mips hp300 hppa ia64 luna68k vax
acorn32/if_ie.c (not included in any kernels)
macppc/if_gm.c (superseded by gem(4))
It should be easy to fix the fallout once identified -- this way of
doing things fails safe, and the goal here, after all, is to _avoid_
silent integer truncations, not introduce them.
Maybe one day we can reintroduce min/max as type-generic things that
never silently truncate. But we should avoid doing that for a while,
so that existing code has a chance to be detected by the compiler for
conversion to uimin/uimax without changing the semantics until we can
properly audit it all. (Who knows, maybe in some cases integer
truncation is actually intended!)
2018-09-03 19:29:22 +03:00
|
|
|
maxlen = uimin(IPV6_MMTU, maxlen);
|
2018-01-23 13:46:59 +03:00
|
|
|
|
1999-06-28 10:36:47 +04:00
|
|
|
/* just for safety */
|
2018-01-23 13:46:59 +03:00
|
|
|
if (maxlen < sizeof(struct ip6_hdr) + sizeof(struct nd_redirect) +
|
2000-02-26 11:39:18 +03:00
|
|
|
((sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7)) {
|
1999-06-28 10:36:47 +04:00
|
|
|
goto fail;
|
2000-02-26 11:39:18 +03:00
|
|
|
}
|
1999-06-28 10:36:47 +04:00
|
|
|
|
|
|
|
{
|
|
|
|
/* get ip6 linklocal address for ifp(my outgoing interface). */
|
2000-02-26 11:39:18 +03:00
|
|
|
struct in6_ifaddr *ia;
|
2016-08-01 06:15:30 +03:00
|
|
|
int s = pserialize_read_enter();
|
2000-02-26 11:39:18 +03:00
|
|
|
if ((ia = in6ifa_ifpforlinklocal(ifp,
|
|
|
|
IN6_IFF_NOTREADY|
|
2016-08-01 06:15:30 +03:00
|
|
|
IN6_IFF_ANYCAST)) == NULL) {
|
|
|
|
pserialize_read_exit(s);
|
1999-06-28 10:36:47 +04:00
|
|
|
goto fail;
|
2016-08-01 06:15:30 +03:00
|
|
|
}
|
1999-06-28 10:36:47 +04:00
|
|
|
ifp_ll6 = &ia->ia_addr.sin6_addr;
|
2016-08-01 06:15:30 +03:00
|
|
|
pserialize_read_exit(s);
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* get ip6 linklocal address for the router. */
|
|
|
|
if (rt->rt_gateway && (rt->rt_flags & RTF_GATEWAY)) {
|
|
|
|
struct sockaddr_in6 *sin6;
|
|
|
|
sin6 = (struct sockaddr_in6 *)rt->rt_gateway;
|
2002-03-05 11:13:56 +03:00
|
|
|
nexthop = &sin6->sin6_addr;
|
|
|
|
if (!IN6_IS_ADDR_LINKLOCAL(nexthop))
|
|
|
|
nexthop = NULL;
|
1999-06-28 10:36:47 +04:00
|
|
|
} else
|
2002-03-05 11:13:56 +03:00
|
|
|
nexthop = NULL;
|
1999-06-28 10:36:47 +04:00
|
|
|
|
|
|
|
/* ip6 */
|
|
|
|
ip6 = mtod(m, struct ip6_hdr *);
|
|
|
|
ip6->ip6_flow = 0;
|
1999-12-15 09:28:43 +03:00
|
|
|
ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
|
|
|
|
ip6->ip6_vfc |= IPV6_VERSION;
|
1999-06-28 10:36:47 +04:00
|
|
|
/* ip6->ip6_plen will be set later */
|
|
|
|
ip6->ip6_nxt = IPPROTO_ICMPV6;
|
|
|
|
ip6->ip6_hlim = 255;
|
|
|
|
/* ip6->ip6_src must be linklocal addr for my outgoing if. */
|
|
|
|
bcopy(ifp_ll6, &ip6->ip6_src, sizeof(struct in6_addr));
|
|
|
|
bcopy(&sip6->ip6_src, &ip6->ip6_dst, sizeof(struct in6_addr));
|
|
|
|
|
|
|
|
/* ND Redirect */
|
|
|
|
nd_rd = (struct nd_redirect *)(ip6 + 1);
|
|
|
|
nd_rd->nd_rd_type = ND_REDIRECT;
|
|
|
|
nd_rd->nd_rd_code = 0;
|
|
|
|
nd_rd->nd_rd_reserved = 0;
|
|
|
|
if (rt->rt_flags & RTF_GATEWAY) {
|
|
|
|
/*
|
|
|
|
* nd_rd->nd_rd_target must be a link-local address in
|
|
|
|
* better router cases.
|
|
|
|
*/
|
2002-03-05 11:13:56 +03:00
|
|
|
if (!nexthop)
|
1999-06-28 10:36:47 +04:00
|
|
|
goto fail;
|
2002-03-05 11:13:56 +03:00
|
|
|
bcopy(nexthop, &nd_rd->nd_rd_target,
|
1999-06-28 10:36:47 +04:00
|
|
|
sizeof(nd_rd->nd_rd_target));
|
|
|
|
bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst,
|
|
|
|
sizeof(nd_rd->nd_rd_dst));
|
|
|
|
} else {
|
|
|
|
/* make sure redtgt == reddst */
|
2002-03-05 11:13:56 +03:00
|
|
|
nexthop = &sip6->ip6_dst;
|
1999-06-28 10:36:47 +04:00
|
|
|
bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_target,
|
|
|
|
sizeof(nd_rd->nd_rd_target));
|
|
|
|
bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst,
|
|
|
|
sizeof(nd_rd->nd_rd_dst));
|
|
|
|
}
|
|
|
|
|
|
|
|
p = (u_char *)(nd_rd + 1);
|
|
|
|
|
2002-03-05 11:13:56 +03:00
|
|
|
{
|
|
|
|
/* target lladdr option */
|
2016-04-04 10:37:07 +03:00
|
|
|
struct llentry *ln = NULL;
|
2018-01-23 13:32:50 +03:00
|
|
|
int len, pad;
|
2002-03-05 11:13:56 +03:00
|
|
|
struct nd_opt_hdr *nd_opt;
|
|
|
|
char *lladdr;
|
|
|
|
|
2016-04-04 10:37:07 +03:00
|
|
|
ln = nd6_lookup(nexthop, ifp, false);
|
|
|
|
if (ln == NULL)
|
2002-03-05 11:13:56 +03:00
|
|
|
goto nolladdropt;
|
|
|
|
len = sizeof(*nd_opt) + ifp->if_addrlen;
|
|
|
|
len = (len + 7) & ~7; /* round by 8 */
|
2018-01-23 13:32:50 +03:00
|
|
|
pad = len - (sizeof(*nd_opt) + ifp->if_addrlen);
|
|
|
|
|
2002-03-05 11:13:56 +03:00
|
|
|
/* safety check */
|
2015-07-17 05:21:08 +03:00
|
|
|
if (len + (p - (u_char *)ip6) > maxlen) {
|
2016-04-04 10:37:07 +03:00
|
|
|
LLE_RUNLOCK(ln);
|
2002-03-05 11:13:56 +03:00
|
|
|
goto nolladdropt;
|
2015-07-17 05:21:08 +03:00
|
|
|
}
|
2018-01-23 13:32:50 +03:00
|
|
|
|
2016-04-04 10:37:07 +03:00
|
|
|
if (ln->la_flags & LLE_VALID) {
|
2002-03-05 11:13:56 +03:00
|
|
|
nd_opt = (struct nd_opt_hdr *)p;
|
|
|
|
nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
|
|
|
|
nd_opt->nd_opt_len = len >> 3;
|
|
|
|
lladdr = (char *)(nd_opt + 1);
|
2016-04-04 10:37:07 +03:00
|
|
|
memcpy(lladdr, &ln->ll_addr, ifp->if_addrlen);
|
2018-01-23 13:32:50 +03:00
|
|
|
memset(lladdr + ifp->if_addrlen, 0, pad);
|
2002-03-05 11:13:56 +03:00
|
|
|
p += len;
|
|
|
|
}
|
2016-04-04 10:37:07 +03:00
|
|
|
LLE_RUNLOCK(ln);
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
2018-01-23 13:55:38 +03:00
|
|
|
nolladdropt:
|
1999-06-28 10:36:47 +04:00
|
|
|
|
|
|
|
m->m_pkthdr.len = m->m_len = p - (u_char *)ip6;
|
|
|
|
|
|
|
|
/* just to be safe */
|
|
|
|
if (m0->m_flags & M_DECRYPTED)
|
|
|
|
goto noredhdropt;
|
2000-02-26 11:39:18 +03:00
|
|
|
if (p - (u_char *)ip6 > maxlen)
|
|
|
|
goto noredhdropt;
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2003-06-03 09:20:06 +04:00
|
|
|
{
|
|
|
|
/* redirected header option */
|
|
|
|
int len;
|
|
|
|
struct nd_opt_rd_hdr *nd_opt_rh;
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2003-06-03 09:20:06 +04:00
|
|
|
/*
|
|
|
|
* compute the maximum size for icmp6 redirect header option.
|
|
|
|
* XXX room for auth header?
|
|
|
|
*/
|
|
|
|
len = maxlen - (p - (u_char *)ip6);
|
|
|
|
len &= ~7;
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2018-01-23 13:46:59 +03:00
|
|
|
if (len < sizeof(*nd_opt_rh)) {
|
|
|
|
goto noredhdropt;
|
|
|
|
}
|
|
|
|
|
2003-06-03 09:20:06 +04:00
|
|
|
/*
|
|
|
|
* Redirected header option spec (RFC2461 4.6.3) talks nothing
|
|
|
|
* about padding/truncate rule for the original IP packet.
|
|
|
|
* From the discussion on IPv6imp in Feb 1999,
|
|
|
|
* the consensus was:
|
|
|
|
* - "attach as much as possible" is the goal
|
|
|
|
* - pad if not aligned (original size can be guessed by
|
|
|
|
* original ip6 header)
|
|
|
|
* Following code adds the padding if it is simple enough,
|
|
|
|
* and truncates if not.
|
|
|
|
*/
|
|
|
|
if (len - sizeof(*nd_opt_rh) < m0->m_pkthdr.len) {
|
|
|
|
/* not enough room, truncate */
|
|
|
|
m_adj(m0, (len - sizeof(*nd_opt_rh)) -
|
|
|
|
m0->m_pkthdr.len);
|
|
|
|
} else {
|
|
|
|
/*
|
2006-03-06 02:47:08 +03:00
|
|
|
* enough room, truncate if not aligned.
|
2003-06-03 09:20:06 +04:00
|
|
|
* we don't pad here for simplicity.
|
|
|
|
*/
|
2018-01-23 13:55:38 +03:00
|
|
|
int extra;
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2003-06-03 09:20:06 +04:00
|
|
|
extra = m0->m_pkthdr.len % 8;
|
|
|
|
if (extra) {
|
1999-06-28 10:36:47 +04:00
|
|
|
/* truncate */
|
2003-06-03 09:20:06 +04:00
|
|
|
m_adj(m0, -extra);
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
2003-06-03 09:20:06 +04:00
|
|
|
len = m0->m_pkthdr.len + sizeof(*nd_opt_rh);
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
|
2003-06-03 09:20:06 +04:00
|
|
|
nd_opt_rh = (struct nd_opt_rd_hdr *)p;
|
2009-03-18 19:00:08 +03:00
|
|
|
memset(nd_opt_rh, 0, sizeof(*nd_opt_rh));
|
2003-06-03 09:20:06 +04:00
|
|
|
nd_opt_rh->nd_opt_rh_type = ND_OPT_REDIRECTED_HEADER;
|
|
|
|
nd_opt_rh->nd_opt_rh_len = len >> 3;
|
|
|
|
p += sizeof(*nd_opt_rh);
|
|
|
|
m->m_pkthdr.len = m->m_len = p - (u_char *)ip6;
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2003-06-03 09:20:06 +04:00
|
|
|
/* connect m0 to m */
|
|
|
|
m->m_pkthdr.len += m0->m_pkthdr.len;
|
2003-08-06 18:47:32 +04:00
|
|
|
m_cat(m, m0);
|
2003-06-03 09:20:06 +04:00
|
|
|
m0 = NULL;
|
|
|
|
}
|
|
|
|
noredhdropt:
|
2003-04-01 03:55:46 +04:00
|
|
|
if (m0) {
|
|
|
|
m_freem(m0);
|
|
|
|
m0 = NULL;
|
|
|
|
}
|
1999-06-28 10:36:47 +04:00
|
|
|
|
Better support of IPv6 scoped addresses.
- most of the kernel code will not care about the actual encoding of
scope zone IDs and won't touch "s6_addr16[1]" directly.
- similarly, most of the kernel code will not care about link-local
scoped addresses as a special case.
- scope boundary check will be stricter. For example, the current
*BSD code allows a packet with src=::1 and dst=(some global IPv6
address) to be sent outside of the node, if the application do:
s = socket(AF_INET6);
bind(s, "::1");
sendto(s, some_global_IPv6_addr);
This is clearly wrong, since ::1 is only meaningful within a single
node, but the current implementation of the *BSD kernel cannot
reject this attempt.
- and, while there, don't try to remove the ff02::/32 interface route
entry in in6_ifdetach() as it's already gone.
This also includes some level of support for the standard source
address selection algorithm defined in RFC3484, which will be
completed on in the future.
From the KAME project via JINMEI Tatuya.
Approved by core@.
2006-01-21 03:15:35 +03:00
|
|
|
/* XXX: clear embedded link IDs in the inner header */
|
|
|
|
in6_clearscope(&sip6->ip6_src);
|
|
|
|
in6_clearscope(&sip6->ip6_dst);
|
|
|
|
in6_clearscope(&nd_rd->nd_rd_target);
|
|
|
|
in6_clearscope(&nd_rd->nd_rd_dst);
|
1999-06-28 10:36:47 +04:00
|
|
|
|
|
|
|
ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr));
|
|
|
|
|
|
|
|
nd_rd->nd_rd_cksum = 0;
|
2018-01-23 13:55:38 +03:00
|
|
|
nd_rd->nd_rd_cksum =
|
|
|
|
in6_cksum(m, IPPROTO_ICMPV6, sizeof(*ip6), ntohs(ip6->ip6_plen));
|
1999-06-28 10:36:47 +04:00
|
|
|
|
|
|
|
/* send the packet to outside... */
|
2011-08-31 22:31:02 +04:00
|
|
|
if (ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL) != 0)
|
2001-12-07 13:10:43 +03:00
|
|
|
icmp6_ifstat_inc(ifp, ifs6_out_error);
|
|
|
|
|
2001-10-18 13:09:25 +04:00
|
|
|
icmp6_ifstat_inc(ifp, ifs6_out_msg);
|
|
|
|
icmp6_ifstat_inc(ifp, ifs6_out_redirect);
|
2008-04-15 07:57:04 +04:00
|
|
|
ICMP6_STATINC(ICMP6_STAT_OUTHIST + ND_REDIRECT);
|
1999-06-28 10:36:47 +04:00
|
|
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
fail:
|
|
|
|
if (m)
|
|
|
|
m_freem(m);
|
|
|
|
if (m0)
|
|
|
|
m_freem(m0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* ICMPv6 socket option processing.
|
|
|
|
*/
|
|
|
|
int
|
2008-08-06 19:01:23 +04:00
|
|
|
icmp6_ctloutput(int op, struct socket *so, struct sockopt *sopt)
|
1999-06-28 10:36:47 +04:00
|
|
|
{
|
|
|
|
int error = 0;
|
2001-02-10 07:14:26 +03:00
|
|
|
struct in6pcb *in6p = sotoin6pcb(so);
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2008-08-06 19:01:23 +04:00
|
|
|
if (sopt->sopt_level != IPPROTO_ICMPV6)
|
|
|
|
return rip6_ctloutput(op, so, sopt);
|
2001-02-10 07:14:26 +03:00
|
|
|
|
|
|
|
switch (op) {
|
|
|
|
case PRCO_SETOPT:
|
2008-08-06 19:01:23 +04:00
|
|
|
switch (sopt->sopt_name) {
|
2001-02-10 07:14:26 +03:00
|
|
|
case ICMP6_FILTER:
|
|
|
|
{
|
2008-08-06 19:01:23 +04:00
|
|
|
struct icmp6_filter fil;
|
2001-02-10 07:14:26 +03:00
|
|
|
|
2008-08-06 19:01:23 +04:00
|
|
|
error = sockopt_get(sopt, &fil, sizeof(fil));
|
|
|
|
if (error)
|
2001-02-10 07:14:26 +03:00
|
|
|
break;
|
2008-08-06 19:01:23 +04:00
|
|
|
memcpy(in6p->in6p_icmp6filt, &fil,
|
|
|
|
sizeof(struct icmp6_filter));
|
2001-02-10 07:14:26 +03:00
|
|
|
error = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
default:
|
|
|
|
error = ENOPROTOOPT;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case PRCO_GETOPT:
|
2008-08-06 19:01:23 +04:00
|
|
|
switch (sopt->sopt_name) {
|
2001-02-10 07:14:26 +03:00
|
|
|
case ICMP6_FILTER:
|
|
|
|
{
|
2008-08-06 19:01:23 +04:00
|
|
|
if (in6p->in6p_icmp6filt == NULL) {
|
2001-02-10 07:14:26 +03:00
|
|
|
error = EINVAL;
|
|
|
|
break;
|
|
|
|
}
|
2008-08-06 19:01:23 +04:00
|
|
|
error = sockopt_set(sopt, in6p->in6p_icmp6filt,
|
|
|
|
sizeof(struct icmp6_filter));
|
2001-02-10 07:14:26 +03:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
default:
|
|
|
|
error = ENOPROTOOPT;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
|
2018-04-14 17:59:58 +03:00
|
|
|
return error;
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Perform rate limit check.
|
|
|
|
* Returns 0 if it is okay to send the icmp6 packet.
|
|
|
|
* Returns 1 if the router SHOULD NOT send this icmp6 packet due to rate
|
|
|
|
* limitation.
|
|
|
|
*
|
|
|
|
* XXX per-destination/type check necessary?
|
|
|
|
*/
|
|
|
|
static int
|
2006-10-12 05:30:41 +04:00
|
|
|
icmp6_ratelimit(
|
2006-11-16 04:32:37 +03:00
|
|
|
const struct in6_addr *dst, /* not used at this moment */
|
|
|
|
const int type, /* not used at this moment */
|
|
|
|
const int code) /* not used at this moment */
|
1999-06-28 10:36:47 +04:00
|
|
|
{
|
2000-07-06 16:36:18 +04:00
|
|
|
int ret;
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2001-10-15 15:12:44 +04:00
|
|
|
ret = 0; /* okay to send */
|
2000-07-06 16:36:18 +04:00
|
|
|
|
|
|
|
/* PPS limit */
|
2000-07-09 10:44:57 +04:00
|
|
|
if (!ppsratecheck(&icmp6errppslim_last, &icmp6errpps_count,
|
|
|
|
icmp6errppslim)) {
|
|
|
|
/* The packet is subject to rate limit */
|
2000-07-06 16:36:18 +04:00
|
|
|
ret++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
|
|
|
|
1999-07-22 16:56:56 +04:00
|
|
|
static struct rtentry *
|
2007-05-23 21:14:59 +04:00
|
|
|
icmp6_mtudisc_clone(struct sockaddr *dst)
|
1999-07-22 16:56:56 +04:00
|
|
|
{
|
|
|
|
struct rtentry *rt;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
rt = rtalloc1(dst, 1);
|
2018-04-14 17:59:58 +03:00
|
|
|
if (rt == NULL)
|
1999-07-22 16:56:56 +04:00
|
|
|
return NULL;
|
2000-05-09 15:51:12 +04:00
|
|
|
|
1999-07-22 16:56:56 +04:00
|
|
|
/* If we didn't get a host route, allocate one */
|
|
|
|
if ((rt->rt_flags & RTF_HOST) == 0) {
|
|
|
|
struct rtentry *nrt;
|
|
|
|
|
2016-04-01 12:16:02 +03:00
|
|
|
error = rtrequest(RTM_ADD, dst, rt->rt_gateway, NULL,
|
1999-07-22 16:56:56 +04:00
|
|
|
RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC, &nrt);
|
|
|
|
if (error) {
|
Make the routing table and rtcaches MP-safe
See the following descriptions for details.
Proposed on tech-kern and tech-net
Overview
--------
We protect the routing table with a rwock and protect
rtcaches with another rwlock. Each rtentry is protected
from being freed or updated via reference counting and psref.
Global rwlocks
--------------
There are two rwlocks; one for the routing table (rt_lock) and
the other for rtcaches (rtcache_lock). rtcache_lock covers
all existing rtcaches; there may have room for optimizations
(future work).
The locking order is rtcache_lock first and rt_lock is next.
rtentry references
------------------
References to an rtentry is managed with reference counting
and psref. Either of the two mechanisms is used depending on
where a rtentry is obtained. Reference counting is used when
we obtain a rtentry from the routing table directly via
rtalloc1 and rtrequest{,1} while psref is used when we obtain
a rtentry from a rtcache via rtcache_* APIs. In both cases,
a caller can sleep/block with holding an obtained rtentry.
The reasons why we use two different mechanisms are (i) only
using reference counting hurts the performance due to atomic
instructions (rtcache case) (ii) ease of implementation;
applying psref to APIs such rtaloc1 and rtrequest{,1} requires
additional works (adding a local variable and an argument).
We will finally migrate to use only psref but we can do it
when we have a lockless routing table alternative.
Reference counting for rtentry
------------------------------
rt_refcnt now doesn't count permanent references such as for
rt_timers and rtcaches, instead it is used only for temporal
references when obtaining a rtentry via rtalloc1 and rtrequest{,1}.
We can do so because destroying a rtentry always involves
removing references of rt_timers and rtcaches to the rtentry
and we don't need to track such references. This also makes
it easy to wait for readers to release references on deleting
or updating a rtentry, i.e., we can simply wait until the
reference counter is 0 or 1. (If there are permanent references
the counter can be arbitrary.)
rt_ref increments a reference counter of a rtentry and rt_unref
decrements it. rt_ref is called inside APIs (rtalloc1 and
rtrequest{,1} so users don't need to care about it while
users must call rt_unref to an obtained rtentry after using it.
rtfree is removed and we use rt_unref and rt_free instead.
rt_unref now just decrements the counter of a given rtentry
and rt_free just tries to destroy a given rtentry.
See the next section for destructions of rtentries by rt_free.
Destructions of rtentries
-------------------------
We destroy a rtentry only when we call rtrequst{,1}(RTM_DELETE);
the original implementation can destroy in any rtfree where it's
the last reference. If we use reference counting or psref, it's
easy to understand if the place that a rtentry is destroyed is
fixed.
rt_free waits for references to a given rtentry to be released
before actually destroying the rtentry. rt_free uses a condition
variable (cv_wait) (and psref_target_destroy for psref) to wait.
Unfortunately rtrequst{,1}(RTM_DELETE) can be called in softint
that we cannot use cv_wait. In that case, we have to defer the
destruction to a workqueue.
rtentry#rt_cv, rtentry#rt_psref and global variables
(see rt_free_global) are added to conduct the procedure.
Updates of rtentries
--------------------
One difficulty to use refcnt/psref instead of rwlock for rtentry
is updates of rtentries. We need an additional mechanism to
prevent readers from seeing inconsistency of a rtentry being
updated.
We introduce RTF_UPDATING flag to rtentries that are updating.
While the flag is set to a rtentry, users cannot acquire the
rtentry. By doing so, we avoid users to see inconsistent
rtentries.
There are two options when a user tries to acquire a rtentry
with the RTF_UPDATING flag; if a user runs in softint context
the user fails to acquire a rtentry (NULL is returned).
Otherwise a user waits until the update completes by waiting
on cv.
The procedure of a updater is simpler to destruction of
a rtentry. Wait on cv (and psref) and after all readers left,
proceed with the update.
Global variables (see rt_update_global) are added to conduct
the procedure.
Currently we apply the mechanism to only RTM_CHANGE in
rtsock.c. We would have to apply other codes. See
"Known issues" section.
psref for rtentry
-----------------
When we obtain a rtentry from a rtcache via rtcache_* APIs,
psref is used to reference to the rtentry.
rtcache_ref acquires a reference to a rtentry with psref
and rtcache_unref releases the reference after using it.
rtcache_ref is called inside rtcache_* APIs and users don't
need to take care of it while users must call rtcache_unref
to release the reference.
struct psref and int bound that is needed for psref is
embedded into struct route. By doing so we don't need to
add local variables and additional argument to APIs.
However this adds another constraint to psref other than
reference counting one's; holding a reference of an rtentry
via a rtcache is allowed by just one caller at the same time.
So we must not acquire a rtentry via a rtcache twice and
avoid a recursive use of a rtcache. And also a rtcache must
be arranged to be used by a LWP/softint at the same time
somehow. For IP forwarding case, we have per-CPU rtcaches
used in softint so the constraint is guaranteed. For a h
rtcache of a PCB case, the constraint is guaranteed by the
solock of each PCB. Any other cases (pf, ipf, stf and ipsec)
are currently guaranteed by only the existence of the global
locks (softnet_lock and/or KERNEL_LOCK). If we've found the
cases that we cannot guarantee the constraint, we would need
to introduce other rtcache APIs that use simple reference
counting.
psref of rtcache is created with IPL_SOFTNET and so rtcache
shouldn't used at an IPL higher than IPL_SOFTNET.
Note that rtcache_free is used to invalidate a given rtcache.
We don't need another care by my change; just keep them as
they are.
Performance impact
------------------
When NET_MPSAFE is disabled the performance drop is 3% while
when it's enabled the drop is increased to 11%. The difference
comes from that currently we don't take any global locks and
don't use psref if NET_MPSAFE is disabled.
We can optimize the performance of the case of NET_MPSAFE
on by reducing lookups of rtcache that uses psref;
currently we do two lookups but we should be able to trim
one of two. This is a future work.
Known issues
------------
There are two known issues to be solved; one is that
a caller of rtrequest(RTM_ADD) may change rtentry (see rtinit).
We need to prevent new references during the update. Or
we may be able to remove the code (perhaps, need more
investigations).
The other is rtredirect that updates a rtentry. We need
to apply our update mechanism, however it's not easy because
rtredirect is called in softint and we cannot apply our
mechanism simply. One solution is to defer rtredirect to
a workqueue but it requires some code restructuring.
2016-12-12 06:55:57 +03:00
|
|
|
rt_unref(rt);
|
1999-07-22 16:56:56 +04:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
nrt->rt_rmx = rt->rt_rmx;
|
Make the routing table and rtcaches MP-safe
See the following descriptions for details.
Proposed on tech-kern and tech-net
Overview
--------
We protect the routing table with a rwock and protect
rtcaches with another rwlock. Each rtentry is protected
from being freed or updated via reference counting and psref.
Global rwlocks
--------------
There are two rwlocks; one for the routing table (rt_lock) and
the other for rtcaches (rtcache_lock). rtcache_lock covers
all existing rtcaches; there may have room for optimizations
(future work).
The locking order is rtcache_lock first and rt_lock is next.
rtentry references
------------------
References to an rtentry is managed with reference counting
and psref. Either of the two mechanisms is used depending on
where a rtentry is obtained. Reference counting is used when
we obtain a rtentry from the routing table directly via
rtalloc1 and rtrequest{,1} while psref is used when we obtain
a rtentry from a rtcache via rtcache_* APIs. In both cases,
a caller can sleep/block with holding an obtained rtentry.
The reasons why we use two different mechanisms are (i) only
using reference counting hurts the performance due to atomic
instructions (rtcache case) (ii) ease of implementation;
applying psref to APIs such rtaloc1 and rtrequest{,1} requires
additional works (adding a local variable and an argument).
We will finally migrate to use only psref but we can do it
when we have a lockless routing table alternative.
Reference counting for rtentry
------------------------------
rt_refcnt now doesn't count permanent references such as for
rt_timers and rtcaches, instead it is used only for temporal
references when obtaining a rtentry via rtalloc1 and rtrequest{,1}.
We can do so because destroying a rtentry always involves
removing references of rt_timers and rtcaches to the rtentry
and we don't need to track such references. This also makes
it easy to wait for readers to release references on deleting
or updating a rtentry, i.e., we can simply wait until the
reference counter is 0 or 1. (If there are permanent references
the counter can be arbitrary.)
rt_ref increments a reference counter of a rtentry and rt_unref
decrements it. rt_ref is called inside APIs (rtalloc1 and
rtrequest{,1} so users don't need to care about it while
users must call rt_unref to an obtained rtentry after using it.
rtfree is removed and we use rt_unref and rt_free instead.
rt_unref now just decrements the counter of a given rtentry
and rt_free just tries to destroy a given rtentry.
See the next section for destructions of rtentries by rt_free.
Destructions of rtentries
-------------------------
We destroy a rtentry only when we call rtrequst{,1}(RTM_DELETE);
the original implementation can destroy in any rtfree where it's
the last reference. If we use reference counting or psref, it's
easy to understand if the place that a rtentry is destroyed is
fixed.
rt_free waits for references to a given rtentry to be released
before actually destroying the rtentry. rt_free uses a condition
variable (cv_wait) (and psref_target_destroy for psref) to wait.
Unfortunately rtrequst{,1}(RTM_DELETE) can be called in softint
that we cannot use cv_wait. In that case, we have to defer the
destruction to a workqueue.
rtentry#rt_cv, rtentry#rt_psref and global variables
(see rt_free_global) are added to conduct the procedure.
Updates of rtentries
--------------------
One difficulty to use refcnt/psref instead of rwlock for rtentry
is updates of rtentries. We need an additional mechanism to
prevent readers from seeing inconsistency of a rtentry being
updated.
We introduce RTF_UPDATING flag to rtentries that are updating.
While the flag is set to a rtentry, users cannot acquire the
rtentry. By doing so, we avoid users to see inconsistent
rtentries.
There are two options when a user tries to acquire a rtentry
with the RTF_UPDATING flag; if a user runs in softint context
the user fails to acquire a rtentry (NULL is returned).
Otherwise a user waits until the update completes by waiting
on cv.
The procedure of a updater is simpler to destruction of
a rtentry. Wait on cv (and psref) and after all readers left,
proceed with the update.
Global variables (see rt_update_global) are added to conduct
the procedure.
Currently we apply the mechanism to only RTM_CHANGE in
rtsock.c. We would have to apply other codes. See
"Known issues" section.
psref for rtentry
-----------------
When we obtain a rtentry from a rtcache via rtcache_* APIs,
psref is used to reference to the rtentry.
rtcache_ref acquires a reference to a rtentry with psref
and rtcache_unref releases the reference after using it.
rtcache_ref is called inside rtcache_* APIs and users don't
need to take care of it while users must call rtcache_unref
to release the reference.
struct psref and int bound that is needed for psref is
embedded into struct route. By doing so we don't need to
add local variables and additional argument to APIs.
However this adds another constraint to psref other than
reference counting one's; holding a reference of an rtentry
via a rtcache is allowed by just one caller at the same time.
So we must not acquire a rtentry via a rtcache twice and
avoid a recursive use of a rtcache. And also a rtcache must
be arranged to be used by a LWP/softint at the same time
somehow. For IP forwarding case, we have per-CPU rtcaches
used in softint so the constraint is guaranteed. For a h
rtcache of a PCB case, the constraint is guaranteed by the
solock of each PCB. Any other cases (pf, ipf, stf and ipsec)
are currently guaranteed by only the existence of the global
locks (softnet_lock and/or KERNEL_LOCK). If we've found the
cases that we cannot guarantee the constraint, we would need
to introduce other rtcache APIs that use simple reference
counting.
psref of rtcache is created with IPL_SOFTNET and so rtcache
shouldn't used at an IPL higher than IPL_SOFTNET.
Note that rtcache_free is used to invalidate a given rtcache.
We don't need another care by my change; just keep them as
they are.
Performance impact
------------------
When NET_MPSAFE is disabled the performance drop is 3% while
when it's enabled the drop is increased to 11%. The difference
comes from that currently we don't take any global locks and
don't use psref if NET_MPSAFE is disabled.
We can optimize the performance of the case of NET_MPSAFE
on by reducing lookups of rtcache that uses psref;
currently we do two lookups but we should be able to trim
one of two. This is a future work.
Known issues
------------
There are two known issues to be solved; one is that
a caller of rtrequest(RTM_ADD) may change rtentry (see rtinit).
We need to prevent new references during the update. Or
we may be able to remove the code (perhaps, need more
investigations).
The other is rtredirect that updates a rtentry. We need
to apply our update mechanism, however it's not easy because
rtredirect is called in softint and we cannot apply our
mechanism simply. One solution is to defer rtredirect to
a workqueue but it requires some code restructuring.
2016-12-12 06:55:57 +03:00
|
|
|
rt_unref(rt);
|
1999-07-22 16:56:56 +04:00
|
|
|
rt = nrt;
|
|
|
|
}
|
2017-02-13 10:18:20 +03:00
|
|
|
|
|
|
|
mutex_enter(&icmp6_mtx);
|
1999-07-22 16:56:56 +04:00
|
|
|
error = rt_timer_add(rt, icmp6_mtudisc_timeout,
|
|
|
|
icmp6_mtudisc_timeout_q);
|
2017-02-13 10:18:20 +03:00
|
|
|
mutex_exit(&icmp6_mtx);
|
|
|
|
|
1999-07-22 16:56:56 +04:00
|
|
|
if (error) {
|
Make the routing table and rtcaches MP-safe
See the following descriptions for details.
Proposed on tech-kern and tech-net
Overview
--------
We protect the routing table with a rwock and protect
rtcaches with another rwlock. Each rtentry is protected
from being freed or updated via reference counting and psref.
Global rwlocks
--------------
There are two rwlocks; one for the routing table (rt_lock) and
the other for rtcaches (rtcache_lock). rtcache_lock covers
all existing rtcaches; there may have room for optimizations
(future work).
The locking order is rtcache_lock first and rt_lock is next.
rtentry references
------------------
References to an rtentry is managed with reference counting
and psref. Either of the two mechanisms is used depending on
where a rtentry is obtained. Reference counting is used when
we obtain a rtentry from the routing table directly via
rtalloc1 and rtrequest{,1} while psref is used when we obtain
a rtentry from a rtcache via rtcache_* APIs. In both cases,
a caller can sleep/block with holding an obtained rtentry.
The reasons why we use two different mechanisms are (i) only
using reference counting hurts the performance due to atomic
instructions (rtcache case) (ii) ease of implementation;
applying psref to APIs such rtaloc1 and rtrequest{,1} requires
additional works (adding a local variable and an argument).
We will finally migrate to use only psref but we can do it
when we have a lockless routing table alternative.
Reference counting for rtentry
------------------------------
rt_refcnt now doesn't count permanent references such as for
rt_timers and rtcaches, instead it is used only for temporal
references when obtaining a rtentry via rtalloc1 and rtrequest{,1}.
We can do so because destroying a rtentry always involves
removing references of rt_timers and rtcaches to the rtentry
and we don't need to track such references. This also makes
it easy to wait for readers to release references on deleting
or updating a rtentry, i.e., we can simply wait until the
reference counter is 0 or 1. (If there are permanent references
the counter can be arbitrary.)
rt_ref increments a reference counter of a rtentry and rt_unref
decrements it. rt_ref is called inside APIs (rtalloc1 and
rtrequest{,1} so users don't need to care about it while
users must call rt_unref to an obtained rtentry after using it.
rtfree is removed and we use rt_unref and rt_free instead.
rt_unref now just decrements the counter of a given rtentry
and rt_free just tries to destroy a given rtentry.
See the next section for destructions of rtentries by rt_free.
Destructions of rtentries
-------------------------
We destroy a rtentry only when we call rtrequst{,1}(RTM_DELETE);
the original implementation can destroy in any rtfree where it's
the last reference. If we use reference counting or psref, it's
easy to understand if the place that a rtentry is destroyed is
fixed.
rt_free waits for references to a given rtentry to be released
before actually destroying the rtentry. rt_free uses a condition
variable (cv_wait) (and psref_target_destroy for psref) to wait.
Unfortunately rtrequst{,1}(RTM_DELETE) can be called in softint
that we cannot use cv_wait. In that case, we have to defer the
destruction to a workqueue.
rtentry#rt_cv, rtentry#rt_psref and global variables
(see rt_free_global) are added to conduct the procedure.
Updates of rtentries
--------------------
One difficulty to use refcnt/psref instead of rwlock for rtentry
is updates of rtentries. We need an additional mechanism to
prevent readers from seeing inconsistency of a rtentry being
updated.
We introduce RTF_UPDATING flag to rtentries that are updating.
While the flag is set to a rtentry, users cannot acquire the
rtentry. By doing so, we avoid users to see inconsistent
rtentries.
There are two options when a user tries to acquire a rtentry
with the RTF_UPDATING flag; if a user runs in softint context
the user fails to acquire a rtentry (NULL is returned).
Otherwise a user waits until the update completes by waiting
on cv.
The procedure of a updater is simpler to destruction of
a rtentry. Wait on cv (and psref) and after all readers left,
proceed with the update.
Global variables (see rt_update_global) are added to conduct
the procedure.
Currently we apply the mechanism to only RTM_CHANGE in
rtsock.c. We would have to apply other codes. See
"Known issues" section.
psref for rtentry
-----------------
When we obtain a rtentry from a rtcache via rtcache_* APIs,
psref is used to reference to the rtentry.
rtcache_ref acquires a reference to a rtentry with psref
and rtcache_unref releases the reference after using it.
rtcache_ref is called inside rtcache_* APIs and users don't
need to take care of it while users must call rtcache_unref
to release the reference.
struct psref and int bound that is needed for psref is
embedded into struct route. By doing so we don't need to
add local variables and additional argument to APIs.
However this adds another constraint to psref other than
reference counting one's; holding a reference of an rtentry
via a rtcache is allowed by just one caller at the same time.
So we must not acquire a rtentry via a rtcache twice and
avoid a recursive use of a rtcache. And also a rtcache must
be arranged to be used by a LWP/softint at the same time
somehow. For IP forwarding case, we have per-CPU rtcaches
used in softint so the constraint is guaranteed. For a h
rtcache of a PCB case, the constraint is guaranteed by the
solock of each PCB. Any other cases (pf, ipf, stf and ipsec)
are currently guaranteed by only the existence of the global
locks (softnet_lock and/or KERNEL_LOCK). If we've found the
cases that we cannot guarantee the constraint, we would need
to introduce other rtcache APIs that use simple reference
counting.
psref of rtcache is created with IPL_SOFTNET and so rtcache
shouldn't used at an IPL higher than IPL_SOFTNET.
Note that rtcache_free is used to invalidate a given rtcache.
We don't need another care by my change; just keep them as
they are.
Performance impact
------------------
When NET_MPSAFE is disabled the performance drop is 3% while
when it's enabled the drop is increased to 11%. The difference
comes from that currently we don't take any global locks and
don't use psref if NET_MPSAFE is disabled.
We can optimize the performance of the case of NET_MPSAFE
on by reducing lookups of rtcache that uses psref;
currently we do two lookups but we should be able to trim
one of two. This is a future work.
Known issues
------------
There are two known issues to be solved; one is that
a caller of rtrequest(RTM_ADD) may change rtentry (see rtinit).
We need to prevent new references during the update. Or
we may be able to remove the code (perhaps, need more
investigations).
The other is rtredirect that updates a rtentry. We need
to apply our update mechanism, however it's not easy because
rtredirect is called in softint and we cannot apply our
mechanism simply. One solution is to defer rtredirect to
a workqueue but it requires some code restructuring.
2016-12-12 06:55:57 +03:00
|
|
|
rt_unref(rt);
|
1999-07-22 16:56:56 +04:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return rt; /* caller need to call rtfree() */
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2006-11-16 04:32:37 +03:00
|
|
|
icmp6_mtudisc_timeout(struct rtentry *rt, struct rttimer *r)
|
1999-07-22 16:56:56 +04:00
|
|
|
{
|
2018-06-01 10:13:35 +03:00
|
|
|
struct rtentry *retrt;
|
2015-08-31 09:25:15 +03:00
|
|
|
|
|
|
|
KASSERT(rt != NULL);
|
|
|
|
rt_assert_referenced(rt);
|
|
|
|
|
2000-05-09 15:51:12 +04:00
|
|
|
if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
|
1999-07-22 16:56:56 +04:00
|
|
|
(RTF_DYNAMIC | RTF_HOST)) {
|
2016-04-01 12:16:02 +03:00
|
|
|
rtrequest(RTM_DELETE, rt_getkey(rt),
|
2018-06-01 10:13:35 +03:00
|
|
|
rt->rt_gateway, rt_mask(rt), rt->rt_flags, &retrt);
|
|
|
|
rt_unref(rt);
|
|
|
|
rt_free(retrt);
|
1999-07-22 16:56:56 +04:00
|
|
|
} else {
|
2002-05-29 10:55:48 +04:00
|
|
|
if (!(rt->rt_rmx.rmx_locks & RTV_MTU))
|
|
|
|
rt->rt_rmx.rmx_mtu = 0;
|
1999-07-22 16:56:56 +04:00
|
|
|
}
|
|
|
|
}
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2001-02-08 19:07:39 +03:00
|
|
|
static void
|
2006-11-16 04:32:37 +03:00
|
|
|
icmp6_redirect_timeout(struct rtentry *rt, struct rttimer *r)
|
2001-02-08 19:07:39 +03:00
|
|
|
{
|
2018-06-01 10:13:35 +03:00
|
|
|
struct rtentry *retrt;
|
2015-08-31 09:25:15 +03:00
|
|
|
|
|
|
|
KASSERT(rt != NULL);
|
|
|
|
rt_assert_referenced(rt);
|
|
|
|
|
2001-02-08 19:07:39 +03:00
|
|
|
if ((rt->rt_flags & (RTF_GATEWAY | RTF_DYNAMIC | RTF_HOST)) ==
|
|
|
|
(RTF_GATEWAY | RTF_DYNAMIC | RTF_HOST)) {
|
2016-04-01 12:16:02 +03:00
|
|
|
rtrequest(RTM_DELETE, rt_getkey(rt),
|
2018-06-01 10:13:35 +03:00
|
|
|
rt->rt_gateway, rt_mask(rt), rt->rt_flags, &retrt);
|
|
|
|
rt_unref(rt);
|
|
|
|
rt_free(retrt);
|
2001-02-08 19:07:39 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-06-12 14:04:44 +03:00
|
|
|
#ifdef COMPAT_90
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
/*
|
|
|
|
* sysctl helper routine for the net.inet6.icmp6.nd6 nodes. silly?
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
sysctl_net_inet6_icmp6_nd6(SYSCTLFN_ARGS)
|
1999-06-28 10:36:47 +04:00
|
|
|
{
|
2006-10-12 05:30:41 +04:00
|
|
|
(void)&name;
|
|
|
|
(void)&l;
|
|
|
|
(void)&oname;
|
1999-06-28 10:36:47 +04:00
|
|
|
|
2003-12-17 21:49:38 +03:00
|
|
|
if (namelen != 0)
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
return (EINVAL);
|
|
|
|
|
2003-12-17 21:49:38 +03:00
|
|
|
return (nd6_sysctl(rnode->sysctl_num, oldp, oldlenp,
|
2005-05-30 01:43:51 +04:00
|
|
|
/*XXXUNCONST*/
|
|
|
|
__UNCONST(newp), newlen));
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
}
|
2020-06-12 14:04:44 +03:00
|
|
|
#endif
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
|
2008-04-15 07:57:04 +04:00
|
|
|
static int
|
|
|
|
sysctl_net_inet6_icmp6_stats(SYSCTLFN_ARGS)
|
|
|
|
{
|
|
|
|
|
2008-05-04 11:22:14 +04:00
|
|
|
return (NETSTAT_SYSCTL(icmp6stat_percpu, ICMP6_NSTATS));
|
2008-04-15 07:57:04 +04:00
|
|
|
}
|
|
|
|
|
2015-09-14 08:34:28 +03:00
|
|
|
static int
|
|
|
|
sysctl_net_inet6_icmp6_redirtimeout(SYSCTLFN_ARGS)
|
|
|
|
{
|
|
|
|
int error, tmp;
|
|
|
|
struct sysctlnode node;
|
|
|
|
|
2017-02-13 10:18:20 +03:00
|
|
|
mutex_enter(&icmp6_mtx);
|
|
|
|
|
2015-09-14 08:34:28 +03:00
|
|
|
node = *rnode;
|
|
|
|
node.sysctl_data = &tmp;
|
|
|
|
tmp = icmp6_redirtimeout;
|
|
|
|
error = sysctl_lookup(SYSCTLFN_CALL(&node));
|
|
|
|
if (error || newp == NULL)
|
2017-02-13 10:18:20 +03:00
|
|
|
goto out;
|
|
|
|
if (tmp < 0) {
|
|
|
|
error = EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
2015-09-14 08:34:28 +03:00
|
|
|
icmp6_redirtimeout = tmp;
|
|
|
|
|
|
|
|
if (icmp6_redirect_timeout_q != NULL) {
|
|
|
|
if (icmp6_redirtimeout == 0) {
|
2016-10-25 05:45:09 +03:00
|
|
|
rt_timer_queue_destroy(icmp6_redirect_timeout_q);
|
2015-09-14 08:34:28 +03:00
|
|
|
} else {
|
|
|
|
rt_timer_queue_change(icmp6_redirect_timeout_q,
|
|
|
|
icmp6_redirtimeout);
|
|
|
|
}
|
|
|
|
} else if (icmp6_redirtimeout > 0) {
|
|
|
|
icmp6_redirect_timeout_q =
|
|
|
|
rt_timer_queue_create(icmp6_redirtimeout);
|
|
|
|
}
|
2017-02-13 10:18:20 +03:00
|
|
|
error = 0;
|
|
|
|
out:
|
|
|
|
mutex_exit(&icmp6_mtx);
|
|
|
|
return error;
|
2015-09-14 08:34:28 +03:00
|
|
|
}
|
|
|
|
|
2009-09-16 19:23:04 +04:00
|
|
|
static void
|
|
|
|
sysctl_net_inet6_icmp6_setup(struct sysctllog **clog)
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
{
|
|
|
|
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT,
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
CTLTYPE_NODE, "inet6", NULL,
|
|
|
|
NULL, 0, NULL, 0,
|
|
|
|
CTL_NET, PF_INET6, CTL_EOL);
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT,
|
2004-05-25 08:33:59 +04:00
|
|
|
CTLTYPE_NODE, "icmp6",
|
|
|
|
SYSCTL_DESCR("ICMPv6 related settings"),
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
NULL, 0, NULL, 0,
|
|
|
|
CTL_NET, PF_INET6, IPPROTO_ICMPV6, CTL_EOL);
|
|
|
|
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT,
|
2004-05-25 08:33:59 +04:00
|
|
|
CTLTYPE_STRUCT, "stats",
|
|
|
|
SYSCTL_DESCR("ICMPv6 transmission statistics"),
|
2008-04-15 07:57:04 +04:00
|
|
|
sysctl_net_inet6_icmp6_stats, 0, NULL, 0,
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
CTL_NET, PF_INET6, IPPROTO_ICMPV6,
|
|
|
|
ICMPV6CTL_STATS, CTL_EOL);
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
2004-05-25 08:33:59 +04:00
|
|
|
CTLTYPE_INT, "rediraccept",
|
|
|
|
SYSCTL_DESCR("Accept and process redirect messages"),
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
NULL, 0, &icmp6_rediraccept, 0,
|
|
|
|
CTL_NET, PF_INET6, IPPROTO_ICMPV6,
|
|
|
|
ICMPV6CTL_REDIRACCEPT, CTL_EOL);
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
2004-05-25 08:33:59 +04:00
|
|
|
CTLTYPE_INT, "redirtimeout",
|
|
|
|
SYSCTL_DESCR("Redirect generated route lifetime"),
|
2015-09-14 08:34:28 +03:00
|
|
|
sysctl_net_inet6_icmp6_redirtimeout, 0,
|
|
|
|
&icmp6_redirtimeout, 0,
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
CTL_NET, PF_INET6, IPPROTO_ICMPV6,
|
|
|
|
ICMPV6CTL_REDIRTIMEOUT, CTL_EOL);
|
|
|
|
#if 0 /* obsoleted */
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
CTLTYPE_INT, "errratelimit", NULL,
|
|
|
|
NULL, 0, &icmp6_errratelimit, 0,
|
|
|
|
CTL_NET, PF_INET6, IPPROTO_ICMPV6,
|
|
|
|
ICMPV6CTL_ERRRATELIMIT, CTL_EOL);
|
|
|
|
#endif
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
2004-05-25 08:33:59 +04:00
|
|
|
CTLTYPE_INT, "nd6_prune",
|
|
|
|
SYSCTL_DESCR("Neighbor discovery prune interval"),
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
NULL, 0, &nd6_prune, 0,
|
|
|
|
CTL_NET, PF_INET6, IPPROTO_ICMPV6,
|
|
|
|
ICMPV6CTL_ND6_PRUNE, CTL_EOL);
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
2004-05-25 08:33:59 +04:00
|
|
|
CTLTYPE_INT, "nd6_delay",
|
|
|
|
SYSCTL_DESCR("First probe delay time"),
|
2020-09-11 18:03:33 +03:00
|
|
|
NULL, 0, &nd6_nd_domain.nd_delay, 0,
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
CTL_NET, PF_INET6, IPPROTO_ICMPV6,
|
|
|
|
ICMPV6CTL_ND6_DELAY, CTL_EOL);
|
2020-09-11 18:03:33 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
|
|
|
CTLTYPE_INT, "nd6_mmaxtries",
|
|
|
|
SYSCTL_DESCR("Number of multicast discovery attempts"),
|
|
|
|
NULL, 0, &nd6_nd_domain.nd_mmaxtries, 0,
|
|
|
|
CTL_NET, PF_INET6, IPPROTO_ICMPV6,
|
|
|
|
ICMPV6CTL_ND6_MMAXTRIES, CTL_EOL);
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
2004-05-25 08:33:59 +04:00
|
|
|
CTLTYPE_INT, "nd6_umaxtries",
|
|
|
|
SYSCTL_DESCR("Number of unicast discovery attempts"),
|
2020-09-11 18:03:33 +03:00
|
|
|
NULL, 0, &nd6_nd_domain.nd_umaxtries, 0,
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
CTL_NET, PF_INET6, IPPROTO_ICMPV6,
|
|
|
|
ICMPV6CTL_ND6_UMAXTRIES, CTL_EOL);
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
2020-09-11 18:03:33 +03:00
|
|
|
CTLTYPE_INT, "nd6_maxnudhint",
|
|
|
|
SYSCTL_DESCR("Maximum neighbor unreachable hint count"),
|
|
|
|
NULL, 0, &nd6_nd_domain.nd_maxnudhint, 0,
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
CTL_NET, PF_INET6, IPPROTO_ICMPV6,
|
2020-09-11 18:03:33 +03:00
|
|
|
ICMPV6CTL_ND6_MAXNUDHINT, CTL_EOL);
|
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
|
|
|
CTLTYPE_INT, "maxqueuelen",
|
|
|
|
SYSCTL_DESCR("max packet queue len for a unresolved ND"),
|
|
|
|
NULL, 1, &nd6_nd_domain.nd_maxqueuelen, 0,
|
|
|
|
CTL_NET, PF_INET6, IPPROTO_ICMPV6,
|
|
|
|
ICMPV6CTL_ND6_MAXQLEN, CTL_EOL);
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
2004-05-25 08:33:59 +04:00
|
|
|
CTLTYPE_INT, "nd6_useloopback",
|
|
|
|
SYSCTL_DESCR("Use loopback interface for local traffic"),
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
NULL, 0, &nd6_useloopback, 0,
|
|
|
|
CTL_NET, PF_INET6, IPPROTO_ICMPV6,
|
|
|
|
ICMPV6CTL_ND6_USELOOPBACK, CTL_EOL);
|
|
|
|
#if 0 /* obsoleted */
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
CTLTYPE_INT, "nd6_proxyall", NULL,
|
|
|
|
NULL, 0, &nd6_proxyall, 0,
|
|
|
|
CTL_NET, PF_INET6, IPPROTO_ICMPV6,
|
|
|
|
ICMPV6CTL_ND6_PROXYALL, CTL_EOL);
|
|
|
|
#endif
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
2004-05-25 08:33:59 +04:00
|
|
|
CTLTYPE_INT, "nodeinfo",
|
|
|
|
SYSCTL_DESCR("Respond to node information requests"),
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
NULL, 0, &icmp6_nodeinfo, 0,
|
|
|
|
CTL_NET, PF_INET6, IPPROTO_ICMPV6,
|
|
|
|
ICMPV6CTL_NODEINFO, CTL_EOL);
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
2004-05-25 08:33:59 +04:00
|
|
|
CTLTYPE_INT, "errppslimit",
|
|
|
|
SYSCTL_DESCR("Maximum ICMP errors sent per second"),
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
NULL, 0, &icmp6errppslim, 0,
|
|
|
|
CTL_NET, PF_INET6, IPPROTO_ICMPV6,
|
|
|
|
ICMPV6CTL_ERRPPSLIMIT, CTL_EOL);
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
2004-05-25 08:33:59 +04:00
|
|
|
CTLTYPE_INT, "mtudisc_hiwat",
|
|
|
|
SYSCTL_DESCR("Low mark on MTU Discovery route timers"),
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
NULL, 0, &icmp6_mtudisc_hiwat, 0,
|
|
|
|
CTL_NET, PF_INET6, IPPROTO_ICMPV6,
|
|
|
|
ICMPV6CTL_MTUDISC_HIWAT, CTL_EOL);
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
2004-05-25 08:33:59 +04:00
|
|
|
CTLTYPE_INT, "mtudisc_lowat",
|
|
|
|
SYSCTL_DESCR("Low mark on MTU Discovery route timers"),
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
NULL, 0, &icmp6_mtudisc_lowat, 0,
|
|
|
|
CTL_NET, PF_INET6, IPPROTO_ICMPV6,
|
|
|
|
ICMPV6CTL_MTUDISC_LOWAT, CTL_EOL);
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
2004-05-25 08:33:59 +04:00
|
|
|
CTLTYPE_INT, "nd6_debug",
|
|
|
|
SYSCTL_DESCR("Enable neighbor discovery debug output"),
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
NULL, 0, &nd6_debug, 0,
|
|
|
|
CTL_NET, PF_INET6, IPPROTO_ICMPV6,
|
|
|
|
ICMPV6CTL_ND6_DEBUG, CTL_EOL);
|
2020-06-12 14:04:44 +03:00
|
|
|
#ifdef COMPAT_90
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT,
|
2004-05-25 08:33:59 +04:00
|
|
|
CTLTYPE_STRUCT, "nd6_drlist",
|
|
|
|
SYSCTL_DESCR("Default router list"),
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
sysctl_net_inet6_icmp6_nd6, 0, NULL, 0,
|
|
|
|
CTL_NET, PF_INET6, IPPROTO_ICMPV6,
|
2020-06-12 14:04:44 +03:00
|
|
|
OICMPV6CTL_ND6_DRLIST, CTL_EOL);
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT,
|
2004-05-25 08:33:59 +04:00
|
|
|
CTLTYPE_STRUCT, "nd6_prlist",
|
|
|
|
SYSCTL_DESCR("Prefix list"),
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
sysctl_net_inet6_icmp6_nd6, 0, NULL, 0,
|
|
|
|
CTL_NET, PF_INET6, IPPROTO_ICMPV6,
|
2020-06-12 14:04:44 +03:00
|
|
|
OICMPV6CTL_ND6_PRLIST, CTL_EOL);
|
|
|
|
#endif
|
1999-06-28 10:36:47 +04:00
|
|
|
}
|
2008-04-15 07:57:04 +04:00
|
|
|
|
|
|
|
void
|
|
|
|
icmp6_statinc(u_int stat)
|
|
|
|
{
|
|
|
|
|
|
|
|
KASSERT(stat < ICMP6_NSTATS);
|
|
|
|
ICMP6_STATINC(stat);
|
|
|
|
}
|