NetBSD/sys/netinet/in.c

2503 lines
60 KiB
C
Raw Normal View History

/* $NetBSD: in.c,v 1.247 2022/11/25 08:39:32 knakahara Exp $ */
/*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
* All rights reserved.
2002-06-09 20:33:36 +04:00
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the project nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
2002-06-09 20:33:36 +04:00
*
* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*-
* Copyright (c) 1998 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Public Access Networks Corporation ("Panix"). It was developed under
* contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
1993-03-21 12:45:37 +03:00
/*
* Copyright (c) 1982, 1986, 1991, 1993
* The Regents of the University of California. All rights reserved.
1993-03-21 12:45:37 +03:00
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
1993-03-21 12:45:37 +03:00
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)in.c 8.4 (Berkeley) 1/9/95
1993-03-21 12:45:37 +03:00
*/
2001-11-13 03:32:34 +03:00
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: in.c,v 1.247 2022/11/25 08:39:32 knakahara Exp $");
2001-11-13 03:32:34 +03:00
2015-05-02 23:22:12 +03:00
#include "arp.h"
2015-08-25 01:21:26 +03:00
#ifdef _KERNEL_OPT
1998-07-05 04:51:04 +04:00
#include "opt_inet.h"
#include "opt_inet_conf.h"
1998-01-12 06:02:48 +03:00
#include "opt_mrouting.h"
#include "opt_net_mpsafe.h"
2015-08-25 01:21:26 +03:00
#endif
1998-01-12 06:02:48 +03:00
1993-12-18 03:40:47 +03:00
#include <sys/param.h>
#include <sys/ioctl.h>
#include <sys/errno.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
1993-12-18 03:40:47 +03:00
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sysctl.h>
1996-02-14 02:40:59 +03:00
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/syslog.h>
2006-05-15 01:19:33 +04:00
#include <sys/kauth.h>
#include <sys/kmem.h>
1993-12-18 03:40:47 +03:00
First step of random number subsystem rework described in <20111022023242.BA26F14A158@mail.netbsd.org>. This change includes the following: An initial cleanup and minor reorganization of the entropy pool code in sys/dev/rnd.c and sys/dev/rndpool.c. Several bugs are fixed. Some effort is made to accumulate entropy more quickly at boot time. A generic interface, "rndsink", is added, for stream generators to request that they be re-keyed with good quality entropy from the pool as soon as it is available. The arc4random()/arc4randbytes() implementation in libkern is adjusted to use the rndsink interface for rekeying, which helps address the problem of low-quality keys at boot time. An implementation of the FIPS 140-2 statistical tests for random number generator quality is provided (libkern/rngtest.c). This is based on Greg Rose's implementation from Qualcomm. A new random stream generator, nist_ctr_drbg, is provided. It is based on an implementation of the NIST SP800-90 CTR_DRBG by Henric Jungheim. This generator users AES in a modified counter mode to generate a backtracking-resistant random stream. An abstraction layer, "cprng", is provided for in-kernel consumers of randomness. The arc4random/arc4randbytes API is deprecated for in-kernel use. It is replaced by "cprng_strong". The current cprng_fast implementation wraps the existing arc4random implementation. The current cprng_strong implementation wraps the new CTR_DRBG implementation. Both interfaces are rekeyed from the entropy pool automatically at intervals justifiable from best current cryptographic practice. In some quick tests, cprng_fast() is about the same speed as the old arc4randbytes(), and cprng_strong() is about 20% faster than rnd_extract_data(). Performance is expected to improve. The AES code in src/crypto/rijndael is no longer an optional kernel component, as it is required by cprng_strong, which is not an optional kernel component. The entropy pool output is subjected to the rngtest tests at startup time; if it fails, the system will reboot. There is approximately a 3/10000 chance of a false positive from these tests. Entropy pool _input_ from hardware random numbers is subjected to the rngtest tests at attach time, as well as the FIPS continuous-output test, to detect bad or stuck hardware RNGs; if any are detected, they are detached, but the system continues to run. A problem with rndctl(8) is fixed -- datastructures with pointers in arrays are no longer passed to userspace (this was not a security problem, but rather a major issue for compat32). A new kernel will require a new rndctl. The sysctl kern.arandom() and kern.urandom() nodes are hooked up to the new generators, but the /dev/*random pseudodevices are not, yet. Manual pages for the new kernel interfaces are forthcoming.
2011-11-20 02:51:18 +04:00
#include <sys/cprng.h>
1993-12-18 03:40:47 +03:00
#include <net/if.h>
#include <net/route.h>
#include <net/pfil.h>
1993-12-18 03:40:47 +03:00
#include <net/if_arp.h>
#include <net/if_ether.h>
#include <net/if_types.h>
#include <net/if_llatbl.h>
#include <net/if_dl.h>
#include <netinet/in_systm.h>
1993-12-18 03:40:47 +03:00
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>
#include <netinet/ip_var.h>
#include <netinet/in_ifattach.h>
#include <netinet/in_pcb.h>
#include <netinet/in_selsrc.h>
#include <netinet/if_inarp.h>
#include <netinet/ip_mroute.h>
1996-02-14 02:40:59 +03:00
#include <netinet/igmp_var.h>
1994-01-09 04:06:02 +03:00
#ifdef IPSELSRC
#include <netinet/in_selsrc.h>
#endif
static u_int in_mask2len(struct in_addr *);
static int in_lifaddr_ioctl(struct socket *, u_long, void *,
struct ifnet *);
static void in_addrhash_insert_locked(struct in_ifaddr *);
static void in_addrhash_remove_locked(struct in_ifaddr *);
static int in_addprefix(struct in_ifaddr *, int);
static void in_scrubaddr(struct in_ifaddr *);
static int in_scrubprefix(struct in_ifaddr *);
static void in_sysctl_init(struct sysctllog **);
1993-03-21 12:45:37 +03:00
#ifndef SUBNETSARELOCAL
#define SUBNETSARELOCAL 1
#endif
#ifndef HOSTZEROBROADCAST
#define HOSTZEROBROADCAST 0
#endif
/* Note: 61, 127, 251, 509, 1021, 2039 are good. */
#ifndef IN_MULTI_HASH_SIZE
#define IN_MULTI_HASH_SIZE 509
#endif
static int subnetsarelocal = SUBNETSARELOCAL;
static int hostzeroisbroadcast = HOSTZEROBROADCAST;
/*
* This list is used to keep track of in_multi chains which belong to
* deleted interface addresses. We use in_ifaddr so that a chain head
* won't be deallocated until all multicast address record are deleted.
*/
LIST_HEAD(in_multihashhead, in_multi); /* Type of the hash head */
static struct pool inmulti_pool;
static u_int in_multientries;
static struct in_multihashhead *in_multihashtbl;
static u_long in_multihash;
static krwlock_t in_multilock;
#define IN_MULTI_HASH(x, ifp) \
(in_multihashtbl[(u_long)((x) ^ (ifp->if_index)) % IN_MULTI_HASH_SIZE])
/* XXX DEPRECATED. Keep them to avoid breaking kvm(3) users. */
struct in_ifaddrhashhead * in_ifaddrhashtbl;
u_long in_ifaddrhash;
struct in_ifaddrhead in_ifaddrhead;
static kmutex_t in_ifaddr_lock;
pserialize_t in_ifaddrhash_psz;
struct pslist_head * in_ifaddrhashtbl_pslist;
u_long in_ifaddrhash_pslist;
struct pslist_head in_ifaddrhead_pslist;
void
in_init(void)
{
pool_init(&inmulti_pool, sizeof(struct in_multi), 0, 0, 0, "inmltpl",
NULL, IPL_SOFTNET);
TAILQ_INIT(&in_ifaddrhead);
PSLIST_INIT(&in_ifaddrhead_pslist);
in_ifaddrhashtbl = hashinit(IN_IFADDR_HASH_SIZE, HASH_LIST, true,
&in_ifaddrhash);
in_ifaddrhash_psz = pserialize_create();
in_ifaddrhashtbl_pslist = hashinit(IN_IFADDR_HASH_SIZE, HASH_PSLIST,
true, &in_ifaddrhash_pslist);
mutex_init(&in_ifaddr_lock, MUTEX_DEFAULT, IPL_NONE);
in_multihashtbl = hashinit(IN_IFADDR_HASH_SIZE, HASH_LIST, true,
&in_multihash);
rw_init(&in_multilock);
in_sysctl_init(NULL);
}
1993-03-21 12:45:37 +03:00
/*
* Return 1 if an internet address is for a ``local'' host
* (one to which we have a connection). If subnetsarelocal
* is true, this includes other subnets of the local net.
* Otherwise, it includes only the directly-connected (sub)nets.
*/
1994-01-09 04:06:02 +03:00
int
in_localaddr(struct in_addr in)
1993-03-21 12:45:37 +03:00
{
2000-03-30 16:51:13 +04:00
struct in_ifaddr *ia;
int localaddr = 0;
int s = pserialize_read_enter();
1993-03-21 12:45:37 +03:00
if (subnetsarelocal) {
IN_ADDRLIST_READER_FOREACH(ia) {
if ((in.s_addr & ia->ia_netmask) == ia->ia_net) {
localaddr = 1;
break;
}
}
1993-03-21 12:45:37 +03:00
} else {
IN_ADDRLIST_READER_FOREACH(ia) {
if ((in.s_addr & ia->ia_subnetmask) == ia->ia_subnet) {
localaddr = 1;
break;
}
}
1993-03-21 12:45:37 +03:00
}
pserialize_read_exit(s);
return localaddr;
1993-03-21 12:45:37 +03:00
}
/*
* like in_localaddr() but can specify ifp.
*/
int
in_direct(struct in_addr in, struct ifnet *ifp)
{
struct ifaddr *ifa;
int localaddr = 0;
int s;
KASSERT(ifp != NULL);
#define ia (ifatoia(ifa))
s = pserialize_read_enter();
if (subnetsarelocal) {
IFADDR_READER_FOREACH(ifa, ifp) {
if (ifa->ifa_addr->sa_family == AF_INET &&
((in.s_addr & ia->ia_netmask) == ia->ia_net)) {
localaddr = 1;
break;
}
}
} else {
IFADDR_READER_FOREACH(ifa, ifp) {
if (ifa->ifa_addr->sa_family == AF_INET &&
(in.s_addr & ia->ia_subnetmask) == ia->ia_subnet) {
localaddr = 1;
break;
}
}
}
pserialize_read_exit(s);
return localaddr;
#undef ia
}
1993-03-21 12:45:37 +03:00
/*
* Determine whether an IP address is in a reserved set of addresses
* that may not be forwarded, or whether datagrams to that destination
* may be forwarded.
*/
1994-01-09 04:06:02 +03:00
int
in_canforward(struct in_addr in)
1993-03-21 12:45:37 +03:00
{
2000-03-30 16:51:13 +04:00
u_int32_t net;
1993-03-21 12:45:37 +03:00
if (IN_EXPERIMENTAL(in.s_addr) || IN_MULTICAST(in.s_addr))
1993-03-21 12:45:37 +03:00
return (0);
if (IN_CLASSA(in.s_addr)) {
net = in.s_addr & IN_CLASSA_NET;
if (net == 0 || net == htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))
1993-03-21 12:45:37 +03:00
return (0);
}
return (1);
}
/*
* Trim a mask in a sockaddr
*/
void
in_socktrim(struct sockaddr_in *ap)
{
2000-03-30 16:51:13 +04:00
char *cplim = (char *) &ap->sin_addr;
char *cp = (char *) (&ap->sin_addr + 1);
ap->sin_len = 0;
while (--cp >= cplim)
if (*cp) {
(ap)->sin_len = cp - (char *) (ap) + 1;
break;
}
}
/*
* Maintain the "in_maxmtu" variable, which is the largest
* mtu for non-local interfaces with AF_INET addresses assigned
* to them that are up.
*/
unsigned long in_maxmtu;
void
in_setmaxmtu(void)
{
2000-03-30 16:51:13 +04:00
struct in_ifaddr *ia;
struct ifnet *ifp;
unsigned long maxmtu = 0;
int s = pserialize_read_enter();
IN_ADDRLIST_READER_FOREACH(ia) {
if ((ifp = ia->ia_ifp) == 0)
continue;
if ((ifp->if_flags & (IFF_UP|IFF_LOOPBACK)) != IFF_UP)
continue;
if (ifp->if_mtu > maxmtu)
maxmtu = ifp->if_mtu;
}
if (maxmtu)
in_maxmtu = maxmtu;
pserialize_read_exit(s);
}
static u_int
in_mask2len(struct in_addr *mask)
{
u_int x, y;
u_char *p;
p = (u_char *)mask;
for (x = 0; x < sizeof(*mask); x++) {
if (p[x] != 0xff)
break;
}
y = 0;
if (x < sizeof(*mask)) {
2008-04-10 22:09:14 +04:00
for (y = 0; y < NBBY; y++) {
if ((p[x] & (0x80 >> y)) == 0)
break;
}
}
2008-04-10 22:09:14 +04:00
return x * NBBY + y;
}
void
in_len2mask(struct in_addr *mask, u_int len)
{
u_int i;
u_char *p;
p = (u_char *)mask;
2009-03-18 19:00:08 +03:00
memset(mask, 0, sizeof(*mask));
2008-04-10 22:09:14 +04:00
for (i = 0; i < len / NBBY; i++)
p[i] = 0xff;
2008-04-10 22:09:14 +04:00
if (len % NBBY)
p[i] = (0xff00 >> (len % NBBY)) & 0xff;
}
1993-03-21 12:45:37 +03:00
/*
* Generic internet control operations (ioctl's).
* Ifp is 0 if not an interface-specific ioctl.
*/
/* ARGSUSED */
static int
in_control0(struct socket *so, u_long cmd, void *data, struct ifnet *ifp)
1993-03-21 12:45:37 +03:00
{
2000-03-30 16:51:13 +04:00
struct ifreq *ifr = (struct ifreq *)data;
struct in_ifaddr *ia = NULL;
1993-03-21 12:45:37 +03:00
struct in_aliasreq *ifra = (struct in_aliasreq *)data;
struct sockaddr_in oldaddr, *new_dstaddr;
1993-03-21 12:45:37 +03:00
int error, hostIsNew, maskIsNew;
int newifaddr = 0;
bool run_hook = false;
bool need_reinsert = false;
struct psref psref;
int bound;
1993-03-21 12:45:37 +03:00
switch (cmd) {
case SIOCALIFADDR:
case SIOCDLIFADDR:
case SIOCGLIFADDR:
if (ifp == NULL)
return EINVAL;
return in_lifaddr_ioctl(so, cmd, data, ifp);
case SIOCGIFADDRPREF:
case SIOCSIFADDRPREF:
if (ifp == NULL)
return EINVAL;
return ifaddrpref_ioctl(so, cmd, data, ifp);
#if NARP > 0
case SIOCGNBRINFO:
{
struct in_nbrinfo *nbi = (struct in_nbrinfo *)data;
struct llentry *ln;
struct in_addr nb_addr = nbi->addr; /* make local for safety */
ln = arplookup(ifp, &nb_addr, NULL, 0);
if (ln == NULL)
return EINVAL;
nbi->state = ln->ln_state;
nbi->asked = ln->ln_asked;
nbi->expire = ln->ln_expire ?
time_mono_to_wall(ln->ln_expire) : 0;
LLE_RUNLOCK(ln);
return 0;
}
#endif
}
bound = curlwp_bind();
1993-03-21 12:45:37 +03:00
/*
* Find address for this interface, if it exists.
*/
if (ifp != NULL)
ia = in_get_ia_from_ifp_psref(ifp, &psref);
1993-03-21 12:45:37 +03:00
2015-05-02 18:22:03 +03:00
hostIsNew = 1; /* moved here to appease gcc */
1993-03-21 12:45:37 +03:00
switch (cmd) {
case SIOCAIFADDR:
case SIOCDIFADDR:
1998-09-06 21:52:01 +04:00
case SIOCGIFALIAS:
case SIOCGIFAFLAG_IN:
if (ifra->ifra_addr.sin_family == AF_INET) {
int s;
if (ia != NULL)
ia4_release(ia, &psref);
s = pserialize_read_enter();
IN_ADDRHASH_READER_FOREACH(ia,
ifra->ifra_addr.sin_addr.s_addr) {
if (ia->ia_ifp == ifp &&
in_hosteq(ia->ia_addr.sin_addr,
ifra->ifra_addr.sin_addr))
break;
}
if (ia != NULL)
ia4_acquire(ia, &psref);
pserialize_read_exit(s);
}
if ((cmd == SIOCDIFADDR ||
cmd == SIOCGIFALIAS ||
cmd == SIOCGIFAFLAG_IN) &&
ia == NULL) {
error = EADDRNOTAVAIL;
goto out;
}
if (cmd == SIOCDIFADDR &&
ifra->ifra_addr.sin_family == AF_UNSPEC) {
ifra->ifra_addr.sin_family = AF_INET;
}
1993-03-21 12:45:37 +03:00
/* FALLTHROUGH */
case SIOCSIFADDR:
if (ia == NULL || ia->ia_addr.sin_family != AF_INET)
;
else if (ifra->ifra_addr.sin_len == 0) {
ifra->ifra_addr = ia->ia_addr;
hostIsNew = 0;
} else if (in_hosteq(ia->ia_addr.sin_addr,
ifra->ifra_addr.sin_addr))
hostIsNew = 0;
if (ifra->ifra_addr.sin_family != AF_INET) {
error = EAFNOSUPPORT;
goto out;
}
/* FALLTHROUGH */
1993-03-21 12:45:37 +03:00
case SIOCSIFDSTADDR:
if (cmd == SIOCSIFDSTADDR &&
ifreq_getaddr(cmd, ifr)->sa_family != AF_INET) {
error = EAFNOSUPPORT;
goto out;
}
/* FALLTHROUGH */
case SIOCSIFNETMASK:
if (ifp == NULL)
panic("in_control");
if (cmd == SIOCGIFALIAS || cmd == SIOCGIFAFLAG_IN)
break;
if (ia == NULL &&
(cmd == SIOCSIFNETMASK || cmd == SIOCSIFDSTADDR)) {
error = EADDRNOTAVAIL;
goto out;
}
2021-09-21 17:59:14 +03:00
if (kauth_authorize_network(kauth_cred_get(),
KAUTH_NETWORK_INTERFACE,
2006-10-25 16:48:44 +04:00
KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
NULL) != 0) {
error = EPERM;
goto out;
}
1993-03-21 12:45:37 +03:00
if (ia == NULL) {
2008-12-17 23:51:31 +03:00
ia = malloc(sizeof(*ia), M_IFADDR, M_WAITOK|M_ZERO);
if (ia == NULL) {
error = ENOBUFS;
goto out;
}
1995-06-04 08:35:29 +04:00
ia->ia_ifa.ifa_addr = sintosa(&ia->ia_addr);
ia->ia_ifa.ifa_dstaddr = sintosa(&ia->ia_dstaddr);
ia->ia_ifa.ifa_netmask = sintosa(&ia->ia_sockmask);
#ifdef IPSELSRC
ia->ia_ifa.ifa_getifa = in_getifa;
#else /* IPSELSRC */
ia->ia_ifa.ifa_getifa = NULL;
#endif /* IPSELSRC */
1993-03-21 12:45:37 +03:00
ia->ia_sockmask.sin_len = 8;
ia->ia_sockmask.sin_family = AF_INET;
1993-03-21 12:45:37 +03:00
if (ifp->if_flags & IFF_BROADCAST) {
ia->ia_broadaddr.sin_len = sizeof(ia->ia_addr);
ia->ia_broadaddr.sin_family = AF_INET;
}
ia->ia_ifp = ifp;
First step of random number subsystem rework described in <20111022023242.BA26F14A158@mail.netbsd.org>. This change includes the following: An initial cleanup and minor reorganization of the entropy pool code in sys/dev/rnd.c and sys/dev/rndpool.c. Several bugs are fixed. Some effort is made to accumulate entropy more quickly at boot time. A generic interface, "rndsink", is added, for stream generators to request that they be re-keyed with good quality entropy from the pool as soon as it is available. The arc4random()/arc4randbytes() implementation in libkern is adjusted to use the rndsink interface for rekeying, which helps address the problem of low-quality keys at boot time. An implementation of the FIPS 140-2 statistical tests for random number generator quality is provided (libkern/rngtest.c). This is based on Greg Rose's implementation from Qualcomm. A new random stream generator, nist_ctr_drbg, is provided. It is based on an implementation of the NIST SP800-90 CTR_DRBG by Henric Jungheim. This generator users AES in a modified counter mode to generate a backtracking-resistant random stream. An abstraction layer, "cprng", is provided for in-kernel consumers of randomness. The arc4random/arc4randbytes API is deprecated for in-kernel use. It is replaced by "cprng_strong". The current cprng_fast implementation wraps the existing arc4random implementation. The current cprng_strong implementation wraps the new CTR_DRBG implementation. Both interfaces are rekeyed from the entropy pool automatically at intervals justifiable from best current cryptographic practice. In some quick tests, cprng_fast() is about the same speed as the old arc4randbytes(), and cprng_strong() is about 20% faster than rnd_extract_data(). Performance is expected to improve. The AES code in src/crypto/rijndael is no longer an optional kernel component, as it is required by cprng_strong, which is not an optional kernel component. The entropy pool output is subjected to the rngtest tests at startup time; if it fails, the system will reboot. There is approximately a 3/10000 chance of a false positive from these tests. Entropy pool _input_ from hardware random numbers is subjected to the rngtest tests at attach time, as well as the FIPS continuous-output test, to detect bad or stuck hardware RNGs; if any are detected, they are detached, but the system continues to run. A problem with rndctl(8) is fixed -- datastructures with pointers in arrays are no longer passed to userspace (this was not a security problem, but rather a major issue for compat32). A new kernel will require a new rndctl. The sysctl kern.arandom() and kern.urandom() nodes are hooked up to the new generators, but the /dev/*random pseudodevices are not, yet. Manual pages for the new kernel interfaces are forthcoming.
2011-11-20 02:51:18 +04:00
ia->ia_idsalt = cprng_fast32() % 65535;
LIST_INIT(&ia->ia_multiaddrs);
IN_ADDRHASH_ENTRY_INIT(ia);
IN_ADDRLIST_ENTRY_INIT(ia);
ifa_psref_init(&ia->ia_ifa);
/*
* We need a reference to make ia survive over in_ifinit
* that does ifaref and ifafree.
*/
ifaref(&ia->ia_ifa);
newifaddr = 1;
}
1993-03-21 12:45:37 +03:00
break;
case SIOCSIFBRDADDR:
2021-09-21 17:59:14 +03:00
if (kauth_authorize_network(kauth_cred_get(),
KAUTH_NETWORK_INTERFACE,
2006-10-25 16:48:44 +04:00
KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
NULL) != 0) {
error = EPERM;
goto out;
}
1993-03-21 12:45:37 +03:00
/* FALLTHROUGH */
case SIOCGIFADDR:
case SIOCGIFNETMASK:
case SIOCGIFDSTADDR:
case SIOCGIFBRDADDR:
if (ia == NULL) {
error = EADDRNOTAVAIL;
goto out;
}
1993-03-21 12:45:37 +03:00
break;
}
error = 0;
1993-03-21 12:45:37 +03:00
switch (cmd) {
case SIOCGIFADDR:
ifreq_setaddr(cmd, ifr, sintocsa(&ia->ia_addr));
1993-03-21 12:45:37 +03:00
break;
case SIOCGIFBRDADDR:
if ((ifp->if_flags & IFF_BROADCAST) == 0) {
error = EINVAL;
goto out;
}
ifreq_setdstaddr(cmd, ifr, sintocsa(&ia->ia_broadaddr));
1993-03-21 12:45:37 +03:00
break;
case SIOCGIFDSTADDR:
if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
error = EINVAL;
goto out;
}
ifreq_setdstaddr(cmd, ifr, sintocsa(&ia->ia_dstaddr));
1993-03-21 12:45:37 +03:00
break;
case SIOCGIFNETMASK:
/*
* We keep the number of trailing zero bytes the sin_len field
* of ia_sockmask, so we fix this before we pass it back to
* userland.
*/
oldaddr = ia->ia_sockmask;
oldaddr.sin_len = sizeof(struct sockaddr_in);
ifreq_setaddr(cmd, ifr, (const void *)&oldaddr);
1993-03-21 12:45:37 +03:00
break;
case SIOCSIFDSTADDR:
if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
error = EINVAL;
goto out;
}
1993-03-21 12:45:37 +03:00
oldaddr = ia->ia_dstaddr;
ia->ia_dstaddr = *satocsin(ifreq_getdstaddr(cmd, ifr));
if ((error = if_addr_init(ifp, &ia->ia_ifa, false)) != 0) {
1993-03-21 12:45:37 +03:00
ia->ia_dstaddr = oldaddr;
goto out;
1993-03-21 12:45:37 +03:00
}
if (ia->ia_flags & IFA_ROUTE) {
1995-06-04 08:35:29 +04:00
ia->ia_ifa.ifa_dstaddr = sintosa(&oldaddr);
rtinit(&ia->ia_ifa, RTM_DELETE, RTF_HOST);
1995-06-04 08:35:29 +04:00
ia->ia_ifa.ifa_dstaddr = sintosa(&ia->ia_dstaddr);
rtinit(&ia->ia_ifa, RTM_ADD, RTF_HOST|RTF_UP);
1993-03-21 12:45:37 +03:00
}
break;
case SIOCSIFBRDADDR:
if ((ifp->if_flags & IFF_BROADCAST) == 0) {
error = EINVAL;
goto out;
}
ia->ia_broadaddr = *satocsin(ifreq_getbroadaddr(cmd, ifr));
1993-03-21 12:45:37 +03:00
break;
case SIOCSIFADDR:
if (!newifaddr) {
in_addrhash_remove(ia);
need_reinsert = true;
}
error = in_ifinit(ifp, ia, satocsin(ifreq_getaddr(cmd, ifr)),
NULL, 1);
run_hook = true;
break;
1993-03-21 12:45:37 +03:00
case SIOCSIFNETMASK:
2016-09-29 18:18:18 +03:00
in_scrubprefix(ia);
ia->ia_sockmask = *satocsin(ifreq_getaddr(cmd, ifr));
ia->ia_subnetmask = ia->ia_sockmask.sin_addr.s_addr;
if (!newifaddr) {
in_addrhash_remove(ia);
need_reinsert = true;
}
error = in_ifinit(ifp, ia, NULL, NULL, 0);
break;
1993-03-21 12:45:37 +03:00
case SIOCAIFADDR:
maskIsNew = 0;
if (ifra->ifra_mask.sin_len) {
2016-09-29 18:18:18 +03:00
in_scrubprefix(ia);
1993-03-21 12:45:37 +03:00
ia->ia_sockmask = ifra->ifra_mask;
ia->ia_subnetmask = ia->ia_sockmask.sin_addr.s_addr;
1993-03-21 12:45:37 +03:00
maskIsNew = 1;
}
if ((ifp->if_flags & IFF_POINTOPOINT) &&
(ifra->ifra_dstaddr.sin_family == AF_INET)) {
new_dstaddr = &ifra->ifra_dstaddr;
1993-03-21 12:45:37 +03:00
maskIsNew = 1; /* We lie; but the effect's the same */
} else
new_dstaddr = NULL;
1993-03-21 12:45:37 +03:00
if (ifra->ifra_addr.sin_family == AF_INET &&
(hostIsNew || maskIsNew)) {
if (!newifaddr) {
in_addrhash_remove(ia);
need_reinsert = true;
}
error = in_ifinit(ifp, ia, &ifra->ifra_addr,
new_dstaddr, 0);
}
1993-03-21 12:45:37 +03:00
if ((ifp->if_flags & IFF_BROADCAST) &&
(ifra->ifra_broadaddr.sin_family == AF_INET))
ia->ia_broadaddr = ifra->ifra_broadaddr;
run_hook = true;
break;
1993-03-21 12:45:37 +03:00
1998-09-06 21:52:01 +04:00
case SIOCGIFALIAS:
ifra->ifra_mask = ia->ia_sockmask;
if ((ifp->if_flags & IFF_POINTOPOINT) &&
(ia->ia_dstaddr.sin_family == AF_INET))
ifra->ifra_dstaddr = ia->ia_dstaddr;
else if ((ifp->if_flags & IFF_BROADCAST) &&
(ia->ia_broadaddr.sin_family == AF_INET))
ifra->ifra_broadaddr = ia->ia_broadaddr;
else
memset(&ifra->ifra_broadaddr, 0,
sizeof(ifra->ifra_broadaddr));
break;
1998-09-06 21:52:01 +04:00
case SIOCGIFAFLAG_IN:
ifr->ifr_addrflags = ia->ia4_flags;
break;
1993-03-21 12:45:37 +03:00
case SIOCDIFADDR:
ia4_release(ia, &psref);
ifaref(&ia->ia_ifa);
in_purgeaddr(&ia->ia_ifa);
pfil_run_addrhooks(if_pfil, cmd, &ia->ia_ifa);
ifafree(&ia->ia_ifa);
ia = NULL;
1993-03-21 12:45:37 +03:00
break;
#ifdef MROUTING
case SIOCGETVIFCNT:
case SIOCGETSGCNT:
error = mrt_ioctl(so, cmd, data);
break;
#endif /* MROUTING */
1993-03-21 12:45:37 +03:00
default:
error = ENOTTY;
goto out;
1993-03-21 12:45:37 +03:00
}
/*
* XXX insert regardless of error to make in_purgeaddr below work.
* Need to improve.
*/
if (newifaddr) {
ifaref(&ia->ia_ifa);
ifa_insert(ifp, &ia->ia_ifa);
mutex_enter(&in_ifaddr_lock);
TAILQ_INSERT_TAIL(&in_ifaddrhead, ia, ia_list);
IN_ADDRLIST_WRITER_INSERT_TAIL(ia);
in_addrhash_insert_locked(ia);
/* Release a reference that is held just after creation. */
ifafree(&ia->ia_ifa);
mutex_exit(&in_ifaddr_lock);
} else if (need_reinsert) {
in_addrhash_insert(ia);
}
if (error == 0) {
if (run_hook)
pfil_run_addrhooks(if_pfil, cmd, &ia->ia_ifa);
} else if (newifaddr) {
KASSERT(ia != NULL);
in_purgeaddr(&ia->ia_ifa);
ia = NULL;
}
out:
if (!newifaddr && ia != NULL)
ia4_release(ia, &psref);
curlwp_bindx(bound);
return error;
1993-03-21 12:45:37 +03:00
}
int
in_control(struct socket *so, u_long cmd, void *data, struct ifnet *ifp)
{
int error;
#ifndef NET_MPSAFE
KASSERT(KERNEL_LOCKED_P());
#endif
error = in_control0(so, cmd, data, ifp);
return error;
}
/* Add ownaddr as loopback rtentry. */
static void
in_ifaddlocal(struct ifaddr *ifa)
{
struct in_ifaddr *ia;
ia = (struct in_ifaddr *)ifa;
if ((ia->ia_ifp->if_flags & IFF_UNNUMBERED)) {
rt_addrmsg(RTM_NEWADDR, ifa);
return;
}
if (ia->ia_addr.sin_addr.s_addr == INADDR_ANY ||
(ia->ia_ifp->if_flags & IFF_POINTOPOINT &&
in_hosteq(ia->ia_dstaddr.sin_addr, ia->ia_addr.sin_addr)))
{
rt_addrmsg(RTM_NEWADDR, ifa);
return;
}
rt_ifa_addlocal(ifa);
}
2016-06-23 09:40:48 +03:00
/* Remove loopback entry of ownaddr */
static void
in_ifremlocal(struct ifaddr *ifa)
{
struct in_ifaddr *ia, *p;
struct ifaddr *alt_ifa = NULL;
int ia_count = 0;
int s;
struct psref psref;
int bound = curlwp_bind();
ia = (struct in_ifaddr *)ifa;
if ((ia->ia_ifp->if_flags & IFF_UNNUMBERED)) {
rt_addrmsg(RTM_DELADDR, ifa);
goto out;
}
/* Delete the entry if exactly one ifaddr matches the
* address, ifa->ifa_addr. */
s = pserialize_read_enter();
IN_ADDRLIST_READER_FOREACH(p) {
if ((p->ia_ifp->if_flags & IFF_UNNUMBERED))
continue;
if (!in_hosteq(p->ia_addr.sin_addr, ia->ia_addr.sin_addr))
continue;
if (p->ia_ifp != ia->ia_ifp)
alt_ifa = &p->ia_ifa;
if (++ia_count > 1 && alt_ifa != NULL)
break;
}
if (alt_ifa != NULL && ia_count > 1)
ifa_acquire(alt_ifa, &psref);
pserialize_read_exit(s);
if (ia_count == 0)
goto out;
rt_ifa_remlocal(ifa, ia_count == 1 ? NULL : alt_ifa);
if (alt_ifa != NULL && ia_count > 1)
ifa_release(alt_ifa, &psref);
out:
curlwp_bindx(bound);
}
static void
in_scrubaddr(struct in_ifaddr *ia)
{
/* stop DAD processing */
if (ia->ia_dad_stop != NULL)
ia->ia_dad_stop(&ia->ia_ifa);
2016-09-29 18:18:18 +03:00
in_scrubprefix(ia);
in_ifremlocal(&ia->ia_ifa);
2016-09-29 18:18:18 +03:00
2017-03-02 08:31:04 +03:00
mutex_enter(&in_ifaddr_lock);
if (ia->ia_allhosts != NULL) {
in_delmulti(ia->ia_allhosts);
ia->ia_allhosts = NULL;
}
2017-03-02 08:31:04 +03:00
mutex_exit(&in_ifaddr_lock);
}
/*
* Depends on it isn't called in concurrent. It should be guaranteed
* by ifa->ifa_ifp's ioctl lock. The possible callers are in_control
* and if_purgeaddrs; the former is called iva ifa->ifa_ifp's ioctl
* and the latter is called via ifa->ifa_ifp's if_detach. The functions
* never be executed in concurrent.
*/
void
in_purgeaddr(struct ifaddr *ifa)
{
struct in_ifaddr *ia = (void *) ifa;
struct ifnet *ifp = ifa->ifa_ifp;
/* KASSERT(!ifa_held(ifa)); XXX need ifa_not_held (psref_not_held) */
Make the routing table and rtcaches MP-safe See the following descriptions for details. Proposed on tech-kern and tech-net Overview -------- We protect the routing table with a rwock and protect rtcaches with another rwlock. Each rtentry is protected from being freed or updated via reference counting and psref. Global rwlocks -------------- There are two rwlocks; one for the routing table (rt_lock) and the other for rtcaches (rtcache_lock). rtcache_lock covers all existing rtcaches; there may have room for optimizations (future work). The locking order is rtcache_lock first and rt_lock is next. rtentry references ------------------ References to an rtentry is managed with reference counting and psref. Either of the two mechanisms is used depending on where a rtentry is obtained. Reference counting is used when we obtain a rtentry from the routing table directly via rtalloc1 and rtrequest{,1} while psref is used when we obtain a rtentry from a rtcache via rtcache_* APIs. In both cases, a caller can sleep/block with holding an obtained rtentry. The reasons why we use two different mechanisms are (i) only using reference counting hurts the performance due to atomic instructions (rtcache case) (ii) ease of implementation; applying psref to APIs such rtaloc1 and rtrequest{,1} requires additional works (adding a local variable and an argument). We will finally migrate to use only psref but we can do it when we have a lockless routing table alternative. Reference counting for rtentry ------------------------------ rt_refcnt now doesn't count permanent references such as for rt_timers and rtcaches, instead it is used only for temporal references when obtaining a rtentry via rtalloc1 and rtrequest{,1}. We can do so because destroying a rtentry always involves removing references of rt_timers and rtcaches to the rtentry and we don't need to track such references. This also makes it easy to wait for readers to release references on deleting or updating a rtentry, i.e., we can simply wait until the reference counter is 0 or 1. (If there are permanent references the counter can be arbitrary.) rt_ref increments a reference counter of a rtentry and rt_unref decrements it. rt_ref is called inside APIs (rtalloc1 and rtrequest{,1} so users don't need to care about it while users must call rt_unref to an obtained rtentry after using it. rtfree is removed and we use rt_unref and rt_free instead. rt_unref now just decrements the counter of a given rtentry and rt_free just tries to destroy a given rtentry. See the next section for destructions of rtentries by rt_free. Destructions of rtentries ------------------------- We destroy a rtentry only when we call rtrequst{,1}(RTM_DELETE); the original implementation can destroy in any rtfree where it's the last reference. If we use reference counting or psref, it's easy to understand if the place that a rtentry is destroyed is fixed. rt_free waits for references to a given rtentry to be released before actually destroying the rtentry. rt_free uses a condition variable (cv_wait) (and psref_target_destroy for psref) to wait. Unfortunately rtrequst{,1}(RTM_DELETE) can be called in softint that we cannot use cv_wait. In that case, we have to defer the destruction to a workqueue. rtentry#rt_cv, rtentry#rt_psref and global variables (see rt_free_global) are added to conduct the procedure. Updates of rtentries -------------------- One difficulty to use refcnt/psref instead of rwlock for rtentry is updates of rtentries. We need an additional mechanism to prevent readers from seeing inconsistency of a rtentry being updated. We introduce RTF_UPDATING flag to rtentries that are updating. While the flag is set to a rtentry, users cannot acquire the rtentry. By doing so, we avoid users to see inconsistent rtentries. There are two options when a user tries to acquire a rtentry with the RTF_UPDATING flag; if a user runs in softint context the user fails to acquire a rtentry (NULL is returned). Otherwise a user waits until the update completes by waiting on cv. The procedure of a updater is simpler to destruction of a rtentry. Wait on cv (and psref) and after all readers left, proceed with the update. Global variables (see rt_update_global) are added to conduct the procedure. Currently we apply the mechanism to only RTM_CHANGE in rtsock.c. We would have to apply other codes. See "Known issues" section. psref for rtentry ----------------- When we obtain a rtentry from a rtcache via rtcache_* APIs, psref is used to reference to the rtentry. rtcache_ref acquires a reference to a rtentry with psref and rtcache_unref releases the reference after using it. rtcache_ref is called inside rtcache_* APIs and users don't need to take care of it while users must call rtcache_unref to release the reference. struct psref and int bound that is needed for psref is embedded into struct route. By doing so we don't need to add local variables and additional argument to APIs. However this adds another constraint to psref other than reference counting one's; holding a reference of an rtentry via a rtcache is allowed by just one caller at the same time. So we must not acquire a rtentry via a rtcache twice and avoid a recursive use of a rtcache. And also a rtcache must be arranged to be used by a LWP/softint at the same time somehow. For IP forwarding case, we have per-CPU rtcaches used in softint so the constraint is guaranteed. For a h rtcache of a PCB case, the constraint is guaranteed by the solock of each PCB. Any other cases (pf, ipf, stf and ipsec) are currently guaranteed by only the existence of the global locks (softnet_lock and/or KERNEL_LOCK). If we've found the cases that we cannot guarantee the constraint, we would need to introduce other rtcache APIs that use simple reference counting. psref of rtcache is created with IPL_SOFTNET and so rtcache shouldn't used at an IPL higher than IPL_SOFTNET. Note that rtcache_free is used to invalidate a given rtcache. We don't need another care by my change; just keep them as they are. Performance impact ------------------ When NET_MPSAFE is disabled the performance drop is 3% while when it's enabled the drop is increased to 11%. The difference comes from that currently we don't take any global locks and don't use psref if NET_MPSAFE is disabled. We can optimize the performance of the case of NET_MPSAFE on by reducing lookups of rtcache that uses psref; currently we do two lookups but we should be able to trim one of two. This is a future work. Known issues ------------ There are two known issues to be solved; one is that a caller of rtrequest(RTM_ADD) may change rtentry (see rtinit). We need to prevent new references during the update. Or we may be able to remove the code (perhaps, need more investigations). The other is rtredirect that updates a rtentry. We need to apply our update mechanism, however it's not easy because rtredirect is called in softint and we cannot apply our mechanism simply. One solution is to defer rtredirect to a workqueue but it requires some code restructuring.
2016-12-12 06:55:57 +03:00
ifa->ifa_flags |= IFA_DESTROYING;
in_scrubaddr(ia);
mutex_enter(&in_ifaddr_lock);
in_addrhash_remove_locked(ia);
TAILQ_REMOVE(&in_ifaddrhead, ia, ia_list);
IN_ADDRLIST_WRITER_REMOVE(ia);
ifa_remove(ifp, &ia->ia_ifa);
/* Assume ifa_remove called pserialize_perform and psref_destroy */
mutex_exit(&in_ifaddr_lock);
IN_ADDRHASH_ENTRY_DESTROY(ia);
IN_ADDRLIST_ENTRY_DESTROY(ia);
ifafree(&ia->ia_ifa);
in_setmaxmtu();
}
static void
in_addrhash_insert_locked(struct in_ifaddr *ia)
{
KASSERT(mutex_owned(&in_ifaddr_lock));
LIST_INSERT_HEAD(&IN_IFADDR_HASH(ia->ia_addr.sin_addr.s_addr), ia,
ia_hash);
IN_ADDRHASH_ENTRY_INIT(ia);
IN_ADDRHASH_WRITER_INSERT_HEAD(ia);
}
void
in_addrhash_insert(struct in_ifaddr *ia)
{
mutex_enter(&in_ifaddr_lock);
in_addrhash_insert_locked(ia);
mutex_exit(&in_ifaddr_lock);
}
static void
in_addrhash_remove_locked(struct in_ifaddr *ia)
{
KASSERT(mutex_owned(&in_ifaddr_lock));
LIST_REMOVE(ia, ia_hash);
IN_ADDRHASH_WRITER_REMOVE(ia);
}
void
in_addrhash_remove(struct in_ifaddr *ia)
{
mutex_enter(&in_ifaddr_lock);
in_addrhash_remove_locked(ia);
#ifdef NET_MPSAFE
pserialize_perform(in_ifaddrhash_psz);
#endif
mutex_exit(&in_ifaddr_lock);
IN_ADDRHASH_ENTRY_DESTROY(ia);
}
void
in_purgeif(struct ifnet *ifp) /* MUST be called at splsoftnet() */
{
IFNET_LOCK(ifp);
if_purgeaddrs(ifp, AF_INET, in_purgeaddr);
igmp_purgeif(ifp); /* manipulates pools */
#ifdef MROUTING
ip_mrouter_detach(ifp);
#endif
IFNET_UNLOCK(ifp);
}
/*
* SIOC[GAD]LIFADDR.
* SIOCGLIFADDR: get first address. (???)
* SIOCGLIFADDR with IFLR_PREFIX:
* get first address that matches the specified prefix.
* SIOCALIFADDR: add the specified address.
* SIOCALIFADDR with IFLR_PREFIX:
* EINVAL since we can't deduce hostid part of the address.
* SIOCDLIFADDR: delete the specified address.
* SIOCDLIFADDR with IFLR_PREFIX:
* delete the first address that matches the specified prefix.
* return values:
* EINVAL on invalid parameters
* EADDRNOTAVAIL on prefix match failed/specified address not found
* other values may be returned from in_ioctl()
*/
static int
in_lifaddr_ioctl(struct socket *so, u_long cmd, void *data,
struct ifnet *ifp)
{
struct if_laddrreq *iflr = (struct if_laddrreq *)data;
struct ifaddr *ifa;
struct sockaddr *sa;
/* sanity checks */
if (data == NULL || ifp == NULL) {
panic("invalid argument to in_lifaddr_ioctl");
/*NOTRECHED*/
}
switch (cmd) {
case SIOCGLIFADDR:
/* address must be specified on GET with IFLR_PREFIX */
if ((iflr->flags & IFLR_PREFIX) == 0)
break;
/*FALLTHROUGH*/
case SIOCALIFADDR:
case SIOCDLIFADDR:
/* address must be specified on ADD and DELETE */
sa = (struct sockaddr *)&iflr->addr;
if (sa->sa_family != AF_INET)
return EINVAL;
if (sa->sa_len != sizeof(struct sockaddr_in))
return EINVAL;
/* XXX need improvement */
sa = (struct sockaddr *)&iflr->dstaddr;
if (sa->sa_family != AF_UNSPEC && sa->sa_family != AF_INET)
return EINVAL;
if (sa->sa_len != 0 && sa->sa_len != sizeof(struct sockaddr_in))
return EINVAL;
break;
default: /*shouldn't happen*/
#if 0
panic("invalid cmd to in_lifaddr_ioctl");
/*NOTREACHED*/
#else
return EOPNOTSUPP;
#endif
}
2008-04-10 22:09:14 +04:00
if (sizeof(struct in_addr) * NBBY < iflr->prefixlen)
return EINVAL;
switch (cmd) {
case SIOCALIFADDR:
{
struct in_aliasreq ifra;
if (iflr->flags & IFLR_PREFIX)
return EINVAL;
/* copy args to in_aliasreq, perform ioctl(SIOCAIFADDR). */
2009-03-18 19:00:08 +03:00
memset(&ifra, 0, sizeof(ifra));
memcpy(ifra.ifra_name, iflr->iflr_name,
sizeof(ifra.ifra_name));
memcpy(&ifra.ifra_addr, &iflr->addr,
((struct sockaddr *)&iflr->addr)->sa_len);
if (((struct sockaddr *)&iflr->dstaddr)->sa_family) { /*XXX*/
memcpy(&ifra.ifra_dstaddr, &iflr->dstaddr,
((struct sockaddr *)&iflr->dstaddr)->sa_len);
}
ifra.ifra_mask.sin_family = AF_INET;
ifra.ifra_mask.sin_len = sizeof(struct sockaddr_in);
in_len2mask(&ifra.ifra_mask.sin_addr, iflr->prefixlen);
return in_control(so, SIOCAIFADDR, &ifra, ifp);
}
case SIOCGLIFADDR:
case SIOCDLIFADDR:
{
struct in_ifaddr *ia;
struct in_addr mask, candidate, match;
struct sockaddr_in *sin;
int cmp, s;
2009-03-18 19:00:08 +03:00
memset(&mask, 0, sizeof(mask));
memset(&match, 0, sizeof(match)); /* XXX gcc */
if (iflr->flags & IFLR_PREFIX) {
/* lookup a prefix rather than address. */
in_len2mask(&mask, iflr->prefixlen);
sin = (struct sockaddr_in *)&iflr->addr;
match.s_addr = sin->sin_addr.s_addr;
match.s_addr &= mask.s_addr;
/* if you set extra bits, that's wrong */
if (match.s_addr != sin->sin_addr.s_addr)
return EINVAL;
cmp = 1;
} else {
if (cmd == SIOCGLIFADDR) {
/* on getting an address, take the 1st match */
cmp = 0; /*XXX*/
} else {
/* on deleting an address, do exact match */
in_len2mask(&mask, 32);
sin = (struct sockaddr_in *)&iflr->addr;
match.s_addr = sin->sin_addr.s_addr;
cmp = 1;
}
}
s = pserialize_read_enter();
IFADDR_READER_FOREACH(ifa, ifp) {
if (ifa->ifa_addr->sa_family != AF_INET)
continue;
if (cmp == 0)
break;
candidate.s_addr = ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr.s_addr;
candidate.s_addr &= mask.s_addr;
if (candidate.s_addr == match.s_addr)
break;
}
if (ifa == NULL) {
pserialize_read_exit(s);
return EADDRNOTAVAIL;
}
ia = (struct in_ifaddr *)ifa;
if (cmd == SIOCGLIFADDR) {
/* fill in the if_laddrreq structure */
memcpy(&iflr->addr, &ia->ia_addr, ia->ia_addr.sin_len);
if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
memcpy(&iflr->dstaddr, &ia->ia_dstaddr,
ia->ia_dstaddr.sin_len);
} else
2009-03-18 19:00:08 +03:00
memset(&iflr->dstaddr, 0, sizeof(iflr->dstaddr));
iflr->prefixlen =
in_mask2len(&ia->ia_sockmask.sin_addr);
iflr->flags = 0; /*XXX*/
pserialize_read_exit(s);
return 0;
} else {
struct in_aliasreq ifra;
/* fill in_aliasreq and do ioctl(SIOCDIFADDR) */
2009-03-18 19:00:08 +03:00
memset(&ifra, 0, sizeof(ifra));
memcpy(ifra.ifra_name, iflr->iflr_name,
sizeof(ifra.ifra_name));
memcpy(&ifra.ifra_addr, &ia->ia_addr,
ia->ia_addr.sin_len);
if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
memcpy(&ifra.ifra_dstaddr, &ia->ia_dstaddr,
ia->ia_dstaddr.sin_len);
}
memcpy(&ifra.ifra_dstaddr, &ia->ia_sockmask,
ia->ia_sockmask.sin_len);
pserialize_read_exit(s);
return in_control(so, SIOCDIFADDR, &ifra, ifp);
}
}
}
return EOPNOTSUPP; /*just for safety*/
}
1993-03-21 12:45:37 +03:00
/*
* Initialize an interface's internet address
* and routing table entry.
*/
int
in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia,
const struct sockaddr_in *sin, const struct sockaddr_in *dst, int scrub)
1993-03-21 12:45:37 +03:00
{
u_int32_t i;
struct sockaddr_in oldaddr, olddst;
int s, oldflags, flags = RTF_UP, error, hostIsNew;
1993-03-21 12:45:37 +03:00
if (sin == NULL)
sin = &ia->ia_addr;
if (dst == NULL)
dst = &ia->ia_dstaddr;
/*
* Set up new addresses.
*/
1993-03-21 12:45:37 +03:00
oldaddr = ia->ia_addr;
olddst = ia->ia_dstaddr;
oldflags = ia->ia4_flags;
1993-03-21 12:45:37 +03:00
ia->ia_addr = *sin;
ia->ia_dstaddr = *dst;
hostIsNew = oldaddr.sin_family != AF_INET ||
!in_hosteq(ia->ia_addr.sin_addr, oldaddr.sin_addr);
if (!scrub)
scrub = oldaddr.sin_family != ia->ia_dstaddr.sin_family ||
!in_hosteq(ia->ia_dstaddr.sin_addr, olddst.sin_addr);
/*
* Configure address flags.
2018-05-14 01:42:51 +03:00
* We need to do this early because they may be adjusted
* by if_addr_init depending on the address.
*/
if (ia->ia4_flags & IN_IFF_DUPLICATED) {
ia->ia4_flags &= ~IN_IFF_DUPLICATED;
hostIsNew = 1;
}
if (ifp->if_link_state == LINK_STATE_DOWN) {
ia->ia4_flags |= IN_IFF_DETACHED;
ia->ia4_flags &= ~IN_IFF_TENTATIVE;
} else if (hostIsNew && if_do_dad(ifp) && ip_dad_enabled())
ia->ia4_flags |= IN_IFF_TRYTENTATIVE;
1993-03-21 12:45:37 +03:00
/*
* Give the interface a chance to initialize
* if this is its first address,
* and to validate the address if necessary.
*/
2017-01-23 13:19:03 +03:00
s = splsoftnet();
error = if_addr_init(ifp, &ia->ia_ifa, true);
splx(s);
2017-08-04 23:17:45 +03:00
/* Now clear the try tentative flag, its job is done. */
ia->ia4_flags &= ~IN_IFF_TRYTENTATIVE;
if (error != 0) {
ia->ia_addr = oldaddr;
ia->ia_dstaddr = olddst;
ia->ia4_flags = oldflags;
return error;
}
/*
* The interface which does not have IPv4 address is not required
* to scrub old address. So, skip scrub such cases.
*/
if (oldaddr.sin_family == AF_INET && (scrub || hostIsNew)) {
int newflags = ia->ia4_flags;
1995-06-04 08:35:29 +04:00
ia->ia_ifa.ifa_addr = sintosa(&oldaddr);
ia->ia_ifa.ifa_dstaddr = sintosa(&olddst);
ia->ia4_flags = oldflags;
if (hostIsNew)
in_scrubaddr(ia);
else if (scrub)
2016-09-29 18:18:18 +03:00
in_scrubprefix(ia);
1995-06-04 08:35:29 +04:00
ia->ia_ifa.ifa_addr = sintosa(&ia->ia_addr);
ia->ia_ifa.ifa_dstaddr = sintosa(&ia->ia_dstaddr);
ia->ia4_flags = newflags;
1993-03-21 12:45:37 +03:00
}
i = ia->ia_addr.sin_addr.s_addr;
if (ifp->if_flags & IFF_POINTOPOINT)
ia->ia_netmask = INADDR_BROADCAST; /* default to /32 */
else if (IN_CLASSA(i))
1993-03-21 12:45:37 +03:00
ia->ia_netmask = IN_CLASSA_NET;
else if (IN_CLASSB(i))
ia->ia_netmask = IN_CLASSB_NET;
else
ia->ia_netmask = IN_CLASSC_NET;
/*
* The subnet mask usually includes at least the standard network part,
* but may may be smaller in the case of supernetting.
* If it is set, we believe it.
1993-03-21 12:45:37 +03:00
*/
if (ia->ia_subnetmask == 0) {
ia->ia_subnetmask = ia->ia_netmask;
ia->ia_sockmask.sin_addr.s_addr = ia->ia_subnetmask;
} else
ia->ia_netmask &= ia->ia_subnetmask;
ia->ia_net = i & ia->ia_netmask;
1993-03-21 12:45:37 +03:00
ia->ia_subnet = i & ia->ia_subnetmask;
in_socktrim(&ia->ia_sockmask);
/* re-calculate the "in_maxmtu" value */
in_setmaxmtu();
ia->ia_ifa.ifa_metric = ifp->if_metric;
1993-03-21 12:45:37 +03:00
if (ifp->if_flags & IFF_BROADCAST) {
if (ia->ia_subnetmask == IN_RFC3021_MASK) {
ia->ia_broadaddr.sin_addr.s_addr = INADDR_BROADCAST;
ia->ia_netbroadcast.s_addr = INADDR_BROADCAST;
} else {
ia->ia_broadaddr.sin_addr.s_addr =
ia->ia_subnet | ~ia->ia_subnetmask;
ia->ia_netbroadcast.s_addr =
ia->ia_net | ~ia->ia_netmask;
}
1993-03-21 12:45:37 +03:00
} else if (ifp->if_flags & IFF_LOOPBACK) {
ia->ia_dstaddr = ia->ia_addr;
1993-03-21 12:45:37 +03:00
flags |= RTF_HOST;
} else if (ifp->if_flags & IFF_POINTOPOINT) {
if (ia->ia_dstaddr.sin_family != AF_INET)
return (0);
flags |= RTF_HOST;
1993-03-21 12:45:37 +03:00
}
/* Add the local route to the address */
in_ifaddlocal(&ia->ia_ifa);
/* Add the prefix route for the address */
error = in_addprefix(ia, flags);
/*
* If the interface supports multicast, join the "all hosts"
* multicast group on that interface.
*/
2017-03-02 08:31:04 +03:00
mutex_enter(&in_ifaddr_lock);
if ((ifp->if_flags & IFF_MULTICAST) != 0 && ia->ia_allhosts == NULL) {
struct in_addr addr;
addr.s_addr = INADDR_ALLHOSTS_GROUP;
ia->ia_allhosts = in_addmulti(&addr, ifp);
}
2017-03-02 08:31:04 +03:00
mutex_exit(&in_ifaddr_lock);
if (hostIsNew &&
ia->ia4_flags & IN_IFF_TENTATIVE &&
if_do_dad(ifp))
ia->ia_dad_start((struct ifaddr *)ia);
return error;
1993-03-21 12:45:37 +03:00
}
#define rtinitflags(x) \
((((x)->ia_ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) != 0) \
? RTF_HOST : 0)
/*
* add a route to prefix ("connected route" in cisco terminology).
* does nothing if there's some interface address with the same prefix already.
*/
static int
in_addprefix(struct in_ifaddr *target, int flags)
{
struct in_ifaddr *ia;
struct in_addr prefix, mask, p;
int error;
int s;
if ((flags & RTF_HOST) != 0)
prefix = target->ia_dstaddr.sin_addr;
else {
prefix = target->ia_addr.sin_addr;
mask = target->ia_sockmask.sin_addr;
prefix.s_addr &= mask.s_addr;
}
s = pserialize_read_enter();
IN_ADDRLIST_READER_FOREACH(ia) {
if (rtinitflags(ia))
p = ia->ia_dstaddr.sin_addr;
else {
p = ia->ia_addr.sin_addr;
p.s_addr &= ia->ia_sockmask.sin_addr.s_addr;
}
if (prefix.s_addr != p.s_addr)
continue;
if ((ia->ia_ifp->if_flags & IFF_UNNUMBERED))
continue;
/*
* if we got a matching prefix route inserted by other
2001-09-16 12:49:50 +04:00
* interface address, we don't need to bother
*
* XXX RADIX_MPATH implications here? -dyoung
*/
if (ia->ia_flags & IFA_ROUTE) {
pserialize_read_exit(s);
return 0;
}
}
pserialize_read_exit(s);
/*
* noone seem to have prefix route. insert it.
*/
if (target->ia_ifa.ifa_ifp->if_flags & IFF_UNNUMBERED) {
error = 0;
} else {
error = rtinit(&target->ia_ifa, RTM_ADD, flags);
if (error == 0)
target->ia_flags |= IFA_ROUTE;
else if (error == EEXIST) {
/*
* the fact the route already exists is not an error.
*/
error = 0;
}
}
return error;
}
static int
in_rt_ifa_matcher(struct rtentry *rt, void *v)
{
struct ifaddr *ifa = v;
if (rt->rt_ifa == ifa)
return 1;
else
return 0;
}
/*
* remove a route to prefix ("connected route" in cisco terminology).
* re-installs the route by using another interface address, if there's one
* with the same prefix (otherwise we lose the route mistakenly).
*/
static int
in_scrubprefix(struct in_ifaddr *target)
{
struct in_ifaddr *ia;
struct in_addr prefix, mask, p;
int error;
int s;
/* If we don't have IFA_ROUTE we have nothing to do */
if ((target->ia_flags & IFA_ROUTE) == 0)
return 0;
if (rtinitflags(target))
prefix = target->ia_dstaddr.sin_addr;
else {
prefix = target->ia_addr.sin_addr;
mask = target->ia_sockmask.sin_addr;
prefix.s_addr &= mask.s_addr;
}
s = pserialize_read_enter();
IN_ADDRLIST_READER_FOREACH(ia) {
if (rtinitflags(ia))
p = ia->ia_dstaddr.sin_addr;
else {
p = ia->ia_addr.sin_addr;
p.s_addr &= ia->ia_sockmask.sin_addr.s_addr;
}
if (prefix.s_addr != p.s_addr)
continue;
if ((ia->ia_ifp->if_flags & IFF_UNNUMBERED))
continue;
/*
* if we got a matching prefix route, move IFA_ROUTE to him
*/
if ((ia->ia_flags & IFA_ROUTE) == 0) {
struct psref psref;
int bound = curlwp_bind();
ia4_acquire(ia, &psref);
pserialize_read_exit(s);
rtinit(&target->ia_ifa, RTM_DELETE,
rtinitflags(target));
target->ia_flags &= ~IFA_ROUTE;
error = rtinit(&ia->ia_ifa, RTM_ADD,
rtinitflags(ia) | RTF_UP);
if (error == 0)
ia->ia_flags |= IFA_ROUTE;
if (!ISSET(target->ia_ifa.ifa_flags, IFA_DESTROYING))
goto skip;
/*
* Replace rt_ifa of routes that have the removing address
* with the new address.
*/
rt_replace_ifa_matched_entries(AF_INET,
in_rt_ifa_matcher, &target->ia_ifa, &ia->ia_ifa);
skip:
ia4_release(ia, &psref);
curlwp_bindx(bound);
return error;
}
}
pserialize_read_exit(s);
/*
* noone seem to have prefix route. remove it.
*/
rtinit(&target->ia_ifa, RTM_DELETE, rtinitflags(target));
target->ia_flags &= ~IFA_ROUTE;
if (ISSET(target->ia_ifa.ifa_flags, IFA_DESTROYING)) {
/* Remove routes that have the removing address as rt_ifa. */
rt_delete_matched_entries(AF_INET, in_rt_ifa_matcher,
&target->ia_ifa, true);
}
return 0;
}
#undef rtinitflags
1993-03-21 12:45:37 +03:00
/*
* Return 1 if the address might be a local broadcast address.
*/
1994-01-09 04:06:02 +03:00
int
in_broadcast(struct in_addr in, struct ifnet *ifp)
1993-03-21 12:45:37 +03:00
{
2000-03-30 16:51:13 +04:00
struct ifaddr *ifa;
int s;
KASSERT(ifp != NULL);
1993-03-21 12:45:37 +03:00
if (in.s_addr == INADDR_BROADCAST ||
in_nullhost(in))
return 1;
if ((ifp->if_flags & IFF_BROADCAST) == 0)
return 0;
1993-03-21 12:45:37 +03:00
/*
* Look through the list of addresses for a match
* with a broadcast address.
*/
1995-06-04 09:06:49 +04:00
#define ia (ifatoia(ifa))
s = pserialize_read_enter();
IFADDR_READER_FOREACH(ifa, ifp) {
if (ifa->ifa_addr->sa_family == AF_INET &&
!in_hosteq(in, ia->ia_addr.sin_addr) &&
(in_hosteq(in, ia->ia_broadaddr.sin_addr) ||
in_hosteq(in, ia->ia_netbroadcast) ||
2002-06-09 20:33:36 +04:00
(hostzeroisbroadcast &&
/*
* Check for old-style (host 0) broadcast, but
* taking into account that RFC 3021 obsoletes it.
*/
ia->ia_subnetmask != IN_RFC3021_MASK &&
(in.s_addr == ia->ia_subnet ||
in.s_addr == ia->ia_net)))) {
pserialize_read_exit(s);
return 1;
}
}
pserialize_read_exit(s);
1993-03-21 12:45:37 +03:00
return (0);
#undef ia
1993-03-21 12:45:37 +03:00
}
/*
* perform DAD when interface becomes IFF_UP.
*/
void
in_if_link_up(struct ifnet *ifp)
{
struct ifaddr *ifa;
struct in_ifaddr *ia;
int s, bound;
/* Ensure it's sane to run DAD */
if (ifp->if_link_state == LINK_STATE_DOWN)
return;
if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING))
return;
bound = curlwp_bind();
s = pserialize_read_enter();
IFADDR_READER_FOREACH(ifa, ifp) {
struct psref psref;
if (ifa->ifa_addr->sa_family != AF_INET)
continue;
ifa_acquire(ifa, &psref);
pserialize_read_exit(s);
ia = (struct in_ifaddr *)ifa;
/* If detached then mark as tentative */
if (ia->ia4_flags & IN_IFF_DETACHED) {
ia->ia4_flags &= ~IN_IFF_DETACHED;
if (ip_dad_enabled() && if_do_dad(ifp) &&
ia->ia_dad_start != NULL)
ia->ia4_flags |= IN_IFF_TENTATIVE;
else if ((ia->ia4_flags & IN_IFF_TENTATIVE) == 0)
rt_addrmsg(RTM_NEWADDR, ifa);
}
if (ia->ia4_flags & IN_IFF_TENTATIVE) {
/* Clear the duplicated flag as we're starting DAD. */
ia->ia4_flags &= ~IN_IFF_DUPLICATED;
ia->ia_dad_start(ifa);
}
s = pserialize_read_enter();
ifa_release(ifa, &psref);
}
pserialize_read_exit(s);
curlwp_bindx(bound);
}
void
in_if_up(struct ifnet *ifp)
{
/* interface may not support link state, so bring it up also */
in_if_link_up(ifp);
}
/*
* Mark all addresses as detached.
*/
void
in_if_link_down(struct ifnet *ifp)
{
struct ifaddr *ifa;
struct in_ifaddr *ia;
int s, bound;
bound = curlwp_bind();
s = pserialize_read_enter();
IFADDR_READER_FOREACH(ifa, ifp) {
struct psref psref;
if (ifa->ifa_addr->sa_family != AF_INET)
continue;
ifa_acquire(ifa, &psref);
pserialize_read_exit(s);
ia = (struct in_ifaddr *)ifa;
/* Stop DAD processing */
if (ia->ia_dad_stop != NULL)
ia->ia_dad_stop(ifa);
/*
* Mark the address as detached.
*/
if (!(ia->ia4_flags & IN_IFF_DETACHED)) {
ia->ia4_flags |= IN_IFF_DETACHED;
ia->ia4_flags &=
~(IN_IFF_TENTATIVE | IN_IFF_DUPLICATED);
rt_addrmsg(RTM_NEWADDR, ifa);
}
s = pserialize_read_enter();
ifa_release(ifa, &psref);
}
pserialize_read_exit(s);
curlwp_bindx(bound);
}
void
in_if_down(struct ifnet *ifp)
{
in_if_link_down(ifp);
#if NARP > 0
lltable_purge_entries(LLTABLE(ifp));
#endif
}
void
in_if_link_state_change(struct ifnet *ifp, int link_state)
{
/*
* Treat LINK_STATE_UNKNOWN as UP.
* LINK_STATE_UNKNOWN transitions to LINK_STATE_DOWN when
* if_link_state_change() transitions to LINK_STATE_UP.
*/
if (link_state == LINK_STATE_DOWN)
in_if_link_down(ifp);
else
in_if_link_up(ifp);
}
/*
* in_lookup_multi: look up the in_multi record for a given IP
* multicast address on a given interface. If no matching record is
* found, return NULL.
*/
struct in_multi *
in_lookup_multi(struct in_addr addr, ifnet_t *ifp)
{
struct in_multi *inm;
KASSERT(rw_lock_held(&in_multilock));
LIST_FOREACH(inm, &IN_MULTI_HASH(addr.s_addr, ifp), inm_list) {
if (in_hosteq(inm->inm_addr, addr) && inm->inm_ifp == ifp)
break;
}
return inm;
}
/*
* in_multi_group: check whether the address belongs to an IP multicast
* group we are joined on this interface. Returns true or false.
*/
bool
in_multi_group(struct in_addr addr, ifnet_t *ifp, int flags)
{
bool ingroup;
if (__predict_true(flags & IP_IGMP_MCAST) == 0) {
rw_enter(&in_multilock, RW_READER);
ingroup = in_lookup_multi(addr, ifp) != NULL;
rw_exit(&in_multilock);
} else {
/* XXX Recursive call from ip_output(). */
KASSERT(rw_lock_held(&in_multilock));
ingroup = in_lookup_multi(addr, ifp) != NULL;
}
return ingroup;
}
/*
* Add an address to the list of IP multicast addresses for a given interface.
*/
struct in_multi *
in_addmulti(struct in_addr *ap, ifnet_t *ifp)
{
struct sockaddr_in sin;
2000-03-30 16:51:13 +04:00
struct in_multi *inm;
/*
* See if address already in list.
*/
rw_enter(&in_multilock, RW_WRITER);
inm = in_lookup_multi(*ap, ifp);
if (inm != NULL) {
/*
* Found it; just increment the reference count.
*/
inm->inm_refcount++;
rw_exit(&in_multilock);
return inm;
}
/*
* New address; allocate a new multicast record.
*/
inm = pool_get(&inmulti_pool, PR_NOWAIT);
if (inm == NULL) {
rw_exit(&in_multilock);
return NULL;
}
inm->inm_addr = *ap;
inm->inm_ifp = ifp;
inm->inm_refcount = 1;
/*
* Ask the network driver to update its multicast reception
* filter appropriately for the new address.
*/
sockaddr_in_init(&sin, ap, 0);
if (if_mcast_op(ifp, SIOCADDMULTI, sintosa(&sin)) != 0) {
rw_exit(&in_multilock);
pool_put(&inmulti_pool, inm);
return NULL;
}
/*
* Let IGMP know that we have joined a new IP multicast group.
*/
if (igmp_joingroup(inm) != 0) {
rw_exit(&in_multilock);
pool_put(&inmulti_pool, inm);
return NULL;
}
LIST_INSERT_HEAD(
&IN_MULTI_HASH(inm->inm_addr.s_addr, ifp),
inm, inm_list);
in_multientries++;
rw_exit(&in_multilock);
return inm;
}
/*
* Delete a multicast address record.
*/
1996-02-14 02:40:59 +03:00
void
in_delmulti(struct in_multi *inm)
{
struct sockaddr_in sin;
rw_enter(&in_multilock, RW_WRITER);
if (--inm->inm_refcount > 0) {
rw_exit(&in_multilock);
return;
}
/*
* No remaining claims to this record; let IGMP know that
* we are leaving the multicast group.
*/
igmp_leavegroup(inm);
/*
* Notify the network driver to update its multicast reception
* filter.
*/
sockaddr_in_init(&sin, &inm->inm_addr, 0);
if_mcast_op(inm->inm_ifp, SIOCDELMULTI, sintosa(&sin));
/*
* Unlink from list.
*/
LIST_REMOVE(inm, inm_list);
in_multientries--;
rw_exit(&in_multilock);
pool_put(&inmulti_pool, inm);
}
/*
* in_next_multi: step through all of the in_multi records, one at a time.
* The current position is remembered in "step", which the caller must
* provide. in_first_multi(), below, must be called to initialize "step"
* and get the first record. Both macros return a NULL "inm" when there
* are no remaining records.
*/
struct in_multi *
in_next_multi(struct in_multistep *step)
{
struct in_multi *inm;
KASSERT(rw_lock_held(&in_multilock));
while (step->i_inm == NULL && step->i_n < IN_MULTI_HASH_SIZE) {
step->i_inm = LIST_FIRST(&in_multihashtbl[++step->i_n]);
}
if ((inm = step->i_inm) != NULL) {
step->i_inm = LIST_NEXT(inm, inm_list);
}
return inm;
}
struct in_multi *
in_first_multi(struct in_multistep *step)
{
KASSERT(rw_lock_held(&in_multilock));
step->i_n = 0;
step->i_inm = LIST_FIRST(&in_multihashtbl[0]);
return in_next_multi(step);
}
void
in_multi_lock(int op)
{
rw_enter(&in_multilock, op);
}
void
in_multi_unlock(void)
{
rw_exit(&in_multilock);
}
int
in_multi_lock_held(void)
{
return rw_lock_held(&in_multilock);
}
struct in_ifaddr *
in_selectsrc(struct sockaddr_in *sin, struct route *ro,
int soopts, struct ip_moptions *mopts, int *errorp, struct psref *psref)
{
struct rtentry *rt = NULL;
struct in_ifaddr *ia = NULL;
KASSERT(ISSET(curlwp->l_pflag, LP_BOUND));
/*
* If route is known or can be allocated now, take the
* source address from the interface. Otherwise, punt.
*/
if ((soopts & SO_DONTROUTE) != 0)
rtcache_free(ro);
else {
union {
struct sockaddr dst;
struct sockaddr_in dst4;
} u;
sockaddr_in_init(&u.dst4, &sin->sin_addr, 0);
rt = rtcache_lookup(ro, &u.dst);
}
/*
* If we found a route, use the address
* corresponding to the outgoing interface
* unless it is the loopback (in case a route
* to our address on another net goes to loopback).
*
* XXX Is this still true? Do we care?
*/
if (rt != NULL && (rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0) {
int s;
struct ifaddr *ifa;
/*
* Just in case. May not need to do this workaround.
* Revisit when working on rtentry MP-ification.
*/
s = pserialize_read_enter();
IFADDR_READER_FOREACH(ifa, rt->rt_ifp) {
if (ifa == rt->rt_ifa)
break;
}
if (ifa != NULL)
ifa_acquire(ifa, psref);
pserialize_read_exit(s);
ia = ifatoia(ifa);
}
if (ia == NULL) {
2022-11-04 12:03:20 +03:00
in_port_t fport = sin->sin_port;
struct ifaddr *ifa;
int s;
sin->sin_port = 0;
ifa = ifa_ifwithladdr_psref(sintosa(sin), psref);
sin->sin_port = fport;
if (ifa == NULL) {
/* Find 1st non-loopback AF_INET address */
s = pserialize_read_enter();
IN_ADDRLIST_READER_FOREACH(ia) {
if (!(ia->ia_ifp->if_flags & IFF_LOOPBACK))
break;
}
if (ia != NULL)
ia4_acquire(ia, psref);
pserialize_read_exit(s);
} else {
/* ia is already referenced by psref */
ia = ifatoia(ifa);
}
if (ia == NULL) {
*errorp = EADDRNOTAVAIL;
goto out;
}
}
/*
* If the destination address is multicast and an outgoing
* interface has been set as a multicast option, use the
* address of that interface as our source address.
*/
if (IN_MULTICAST(sin->sin_addr.s_addr) && mopts != NULL) {
struct ip_moptions *imo;
imo = mopts;
if (imo->imo_multicast_if_index != 0) {
struct ifnet *ifp;
int s;
if (ia != NULL)
ia4_release(ia, psref);
s = pserialize_read_enter();
ifp = if_byindex(imo->imo_multicast_if_index);
if (ifp != NULL) {
/* XXX */
ia = in_get_ia_from_ifp_psref(ifp, psref);
} else
ia = NULL;
if (ia == NULL || ia->ia4_flags & IN_IFF_NOTREADY) {
pserialize_read_exit(s);
if (ia != NULL)
ia4_release(ia, psref);
*errorp = EADDRNOTAVAIL;
ia = NULL;
goto out;
}
pserialize_read_exit(s);
}
}
if (ia->ia_ifa.ifa_getifa != NULL) {
ia = ifatoia((*ia->ia_ifa.ifa_getifa)(&ia->ia_ifa,
sintosa(sin)));
if (ia == NULL) {
*errorp = EADDRNOTAVAIL;
goto out;
}
/* FIXME NOMPSAFE */
ia4_acquire(ia, psref);
}
#ifdef GETIFA_DEBUG
else
printf("%s: missing ifa_getifa\n", __func__);
#endif
out:
rtcache_unref(rt, ro);
return ia;
}
int
in_tunnel_validate(const struct ip *ip, struct in_addr src, struct in_addr dst)
{
struct in_ifaddr *ia4;
int s;
/* check for address match */
if (src.s_addr != ip->ip_dst.s_addr ||
dst.s_addr != ip->ip_src.s_addr)
return 0;
/* martian filters on outer source - NOT done in ip_input! */
if (IN_MULTICAST(ip->ip_src.s_addr))
return 0;
switch ((ntohl(ip->ip_src.s_addr) & 0xff000000) >> 24) {
case 0:
case 127:
case 255:
return 0;
}
/* reject packets with broadcast on source */
s = pserialize_read_enter();
IN_ADDRLIST_READER_FOREACH(ia4) {
if ((ia4->ia_ifa.ifa_ifp->if_flags & IFF_BROADCAST) == 0)
continue;
if (ip->ip_src.s_addr == ia4->ia_broadaddr.sin_addr.s_addr) {
pserialize_read_exit(s);
return 0;
}
}
pserialize_read_exit(s);
/* NOTE: packet may dropped by uRPF */
/* return valid bytes length */
return sizeof(src) + sizeof(dst);
}
#if NARP > 0
2015-08-31 19:46:14 +03:00
#define IN_LLTBL_DEFAULT_HSIZE 32
#define IN_LLTBL_HASH(k, h) \
(((((((k >> 8) ^ k) >> 8) ^ k) >> 8) ^ k) & ((h) - 1))
/*
* Do actual deallocation of @lle.
* Called by LLE_FREE_LOCKED when number of references
* drops to zero.
*/
static void
in_lltable_destroy_lle(struct llentry *lle)
{
2019-09-25 12:53:37 +03:00
KASSERTMSG(lle->la_numheld == 0, "la_numheld=%d", lle->la_numheld);
LLE_WUNLOCK(lle);
LLE_LOCK_DESTROY(lle);
llentry_pool_put(lle);
}
static struct llentry *
in_lltable_new(struct in_addr addr4, u_int flags)
{
struct llentry *lle;
lle = llentry_pool_get(PR_NOWAIT);
if (lle == NULL) /* NB: caller generates msg */
return NULL;
lle->r_l3addr.addr4 = addr4;
lle->lle_refcnt = 1;
lle->lle_free = in_lltable_destroy_lle;
LLE_LOCK_INIT(lle);
callout_init(&lle->la_timer, CALLOUT_MPSAFE);
return lle;
}
#define IN_ARE_MASKED_ADDR_EQUAL(d, a, m) ( \
(((ntohl((d).s_addr) ^ (a)->sin_addr.s_addr) & (m)->sin_addr.s_addr)) == 0 )
static int
in_lltable_match_prefix(const struct sockaddr *prefix,
const struct sockaddr *mask, u_int flags, struct llentry *lle)
{
const struct sockaddr_in *pfx = (const struct sockaddr_in *)prefix;
const struct sockaddr_in *msk = (const struct sockaddr_in *)mask;
struct in_addr lle_addr;
lle_addr.s_addr = ntohl(lle->r_l3addr.addr4.s_addr);
/*
* (flags & LLE_STATIC) means deleting all entries
* including static ARP entries.
*/
if (IN_ARE_MASKED_ADDR_EQUAL(lle_addr, pfx, msk) &&
((flags & LLE_STATIC) || !(lle->la_flags & LLE_STATIC)))
return (1);
return (0);
}
static void
in_lltable_free_entry(struct lltable *llt, struct llentry *lle)
{
size_t pkts_dropped;
LLE_WLOCK_ASSERT(lle);
KASSERT(llt != NULL);
pkts_dropped = llentry_free(lle);
arp_stat_add(ARP_STAT_DFRDROPPED, (uint64_t)pkts_dropped);
}
static int
in_lltable_rtcheck(struct ifnet *ifp, u_int flags, const struct sockaddr *l3addr,
const struct rtentry *rt)
{
int error = EINVAL;
if (rt == NULL)
return error;
/*
* If the gateway for an existing host route matches the target L3
* address, which is a special route inserted by some implementation
* such as MANET, and the interface is of the correct type, then
* allow for ARP to proceed.
*/
if (rt->rt_flags & RTF_GATEWAY) {
if (!(rt->rt_flags & RTF_HOST) || !rt->rt_ifp ||
rt->rt_ifp->if_type != IFT_ETHER ||
(rt->rt_ifp->if_flags & IFF_NOARP) != 0 ||
memcmp(rt->rt_gateway->sa_data, l3addr->sa_data,
sizeof(in_addr_t)) != 0) {
goto error;
}
}
/*
* Make sure that at least the destination address is covered
* by the route. This is for handling the case where 2 or more
* interfaces have the same prefix. An incoming packet arrives
* on one interface and the corresponding outgoing packet leaves
* another interface.
*/
if (!(rt->rt_flags & RTF_HOST) && rt->rt_ifp != ifp) {
const char *sa, *mask, *addr, *lim;
int len;
mask = (const char *)rt_mask(rt);
/*
* Just being extra cautious to avoid some custom
* code getting into trouble.
*/
if (mask == NULL)
goto error;
sa = (const char *)rt_getkey(rt);
addr = (const char *)l3addr;
len = ((const struct sockaddr_in *)l3addr)->sin_len;
lim = addr + len;
for ( ; addr < lim; sa++, mask++, addr++) {
if ((*sa ^ *addr) & *mask) {
#ifdef DIAGNOSTIC
log(LOG_INFO, "IPv4 address: \"%s\" is not on the network\n",
inet_ntoa(((const struct sockaddr_in *)l3addr)->sin_addr));
#endif
goto error;
}
}
}
error = 0;
error:
return error;
}
static inline uint32_t
in_lltable_hash_dst(const struct in_addr dst, uint32_t hsize)
{
return (IN_LLTBL_HASH(dst.s_addr, hsize));
}
static uint32_t
in_lltable_hash(const struct llentry *lle, uint32_t hsize)
{
return (in_lltable_hash_dst(lle->r_l3addr.addr4, hsize));
}
static void
in_lltable_fill_sa_entry(const struct llentry *lle, struct sockaddr *sa)
{
struct sockaddr_in *sin;
sin = (struct sockaddr_in *)sa;
memset(sin, 0, sizeof(*sin));
sin->sin_family = AF_INET;
sin->sin_len = sizeof(*sin);
sin->sin_addr = lle->r_l3addr.addr4;
}
static inline struct llentry *
in_lltable_find_dst(struct lltable *llt, struct in_addr dst)
{
struct llentry *lle;
struct llentries *lleh;
u_int hashidx;
hashidx = in_lltable_hash_dst(dst, llt->llt_hsize);
lleh = &llt->lle_head[hashidx];
LIST_FOREACH(lle, lleh, lle_next) {
if (lle->la_flags & LLE_DELETED)
continue;
if (lle->r_l3addr.addr4.s_addr == dst.s_addr)
break;
}
return (lle);
}
static int
in_lltable_delete(struct lltable *llt, u_int flags,
const struct sockaddr *l3addr)
{
const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr;
2015-08-31 12:21:55 +03:00
struct ifnet *ifp __diagused = llt->llt_ifp;
struct llentry *lle;
IF_AFDATA_WLOCK_ASSERT(ifp);
KASSERTMSG(l3addr->sa_family == AF_INET,
"sin_family %d", l3addr->sa_family);
lle = in_lltable_find_dst(llt, sin->sin_addr);
if (lle == NULL) {
#ifdef LLTABLE_DEBUG
char buf[64];
sockaddr_format(l3addr, buf, sizeof(buf));
log(LOG_INFO, "%s: cache for %s is not found\n",
__func__, buf);
#endif
return (ENOENT);
}
2016-04-04 10:37:07 +03:00
LLE_WLOCK(lle);
#ifdef LLTABLE_DEBUG
{
char buf[64];
sockaddr_format(l3addr, buf, sizeof(buf));
log(LOG_INFO, "%s: cache for %s (%p) is deleted\n",
__func__, buf, lle);
}
#endif
llentry_free(lle);
return (0);
}
static struct llentry *
in_lltable_create(struct lltable *llt, u_int flags, const struct sockaddr *l3addr,
const struct rtentry *rt)
{
const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr;
struct ifnet *ifp = llt->llt_ifp;
struct llentry *lle;
IF_AFDATA_WLOCK_ASSERT(ifp);
KASSERTMSG(l3addr->sa_family == AF_INET,
"sin_family %d", l3addr->sa_family);
lle = in_lltable_find_dst(llt, sin->sin_addr);
if (lle != NULL) {
LLE_WLOCK(lle);
return (lle);
}
/* no existing record, we need to create new one */
/*
* A route that covers the given address must have
* been installed 1st because we are doing a resolution,
* verify this.
*/
if (!(flags & LLE_IFADDR) &&
in_lltable_rtcheck(ifp, flags, l3addr, rt) != 0)
return (NULL);
lle = in_lltable_new(sin->sin_addr, flags);
if (lle == NULL) {
log(LOG_INFO, "lla_lookup: new lle malloc failed\n");
return (NULL);
}
lle->la_flags = flags;
if ((flags & LLE_IFADDR) == LLE_IFADDR) {
memcpy(&lle->ll_addr, CLLADDR(ifp->if_sadl), ifp->if_addrlen);
lle->la_flags |= (LLE_VALID | LLE_STATIC);
}
lltable_link_entry(llt, lle);
LLE_WLOCK(lle);
return (lle);
}
/*
* Return NULL if not found or marked for deletion.
* If found return lle read locked.
*/
static struct llentry *
in_lltable_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3addr)
{
const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr;
struct llentry *lle;
IF_AFDATA_LOCK_ASSERT(llt->llt_ifp);
KASSERTMSG(l3addr->sa_family == AF_INET,
"sin_family %d", l3addr->sa_family);
lle = in_lltable_find_dst(llt, sin->sin_addr);
if (lle == NULL)
return NULL;
if (flags & LLE_EXCLUSIVE)
LLE_WLOCK(lle);
else
LLE_RLOCK(lle);
return lle;
}
2016-04-04 10:37:07 +03:00
static int
in_lltable_dump_entry(struct lltable *llt, struct llentry *lle,
struct rt_walkarg *w)
{
struct sockaddr_in sin;
LLTABLE_LOCK_ASSERT();
/* skip deleted entries */
if (lle->la_flags & LLE_DELETED)
return 0;
sockaddr_in_init(&sin, &lle->r_l3addr.addr4, 0);
return lltable_dump_entry(llt, lle, w, sintosa(&sin));
}
#endif /* NARP > 0 */
2015-08-31 19:46:14 +03:00
static int
in_multicast_sysctl(SYSCTLFN_ARGS)
{
struct ifnet *ifp;
struct ifaddr *ifa;
struct in_ifaddr *ifa4;
struct in_multi *inm;
uint32_t tmp;
int error;
size_t written;
struct psref psref;
int bound;
if (namelen != 1)
return EINVAL;
bound = curlwp_bind();
ifp = if_get_byindex(name[0], &psref);
if (ifp == NULL) {
curlwp_bindx(bound);
return ENODEV;
}
if (oldp == NULL) {
*oldlenp = 0;
IFADDR_FOREACH(ifa, ifp) {
if (ifa->ifa_addr->sa_family != AF_INET)
continue;
ifa4 = (void *)ifa;
LIST_FOREACH(inm, &ifa4->ia_multiaddrs, inm_list) {
*oldlenp += 2 * sizeof(struct in_addr) +
sizeof(uint32_t);
}
}
if_put(ifp, &psref);
curlwp_bindx(bound);
return 0;
}
error = 0;
written = 0;
IFADDR_FOREACH(ifa, ifp) {
if (ifa->ifa_addr->sa_family != AF_INET)
continue;
ifa4 = (void *)ifa;
LIST_FOREACH(inm, &ifa4->ia_multiaddrs, inm_list) {
if (written + 2 * sizeof(struct in_addr) +
sizeof(uint32_t) > *oldlenp)
goto done;
error = sysctl_copyout(l, &ifa4->ia_addr.sin_addr,
oldp, sizeof(struct in_addr));
if (error)
goto done;
oldp = (char *)oldp + sizeof(struct in_addr);
written += sizeof(struct in_addr);
error = sysctl_copyout(l, &inm->inm_addr,
oldp, sizeof(struct in_addr));
if (error)
goto done;
oldp = (char *)oldp + sizeof(struct in_addr);
written += sizeof(struct in_addr);
tmp = inm->inm_refcount;
error = sysctl_copyout(l, &tmp, oldp, sizeof(tmp));
if (error)
goto done;
oldp = (char *)oldp + sizeof(tmp);
written += sizeof(tmp);
}
}
done:
if_put(ifp, &psref);
curlwp_bindx(bound);
*oldlenp = written;
return error;
}
static void
in_sysctl_init(struct sysctllog **clog)
{
sysctl_createv(clog, 0, NULL, NULL,
CTLFLAG_PERMANENT,
CTLTYPE_NODE, "inet",
SYSCTL_DESCR("PF_INET related settings"),
NULL, 0, NULL, 0,
CTL_NET, PF_INET, CTL_EOL);
sysctl_createv(clog, 0, NULL, NULL,
CTLFLAG_PERMANENT,
CTLTYPE_NODE, "multicast",
SYSCTL_DESCR("Multicast information"),
in_multicast_sysctl, 0, NULL, 0,
CTL_NET, PF_INET, CTL_CREATE, CTL_EOL);
sysctl_createv(clog, 0, NULL, NULL,
CTLFLAG_PERMANENT,
CTLTYPE_NODE, "ip",
SYSCTL_DESCR("IPv4 related settings"),
NULL, 0, NULL, 0,
CTL_NET, PF_INET, IPPROTO_IP, CTL_EOL);
sysctl_createv(clog, 0, NULL, NULL,
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
CTLTYPE_INT, "subnetsarelocal",
SYSCTL_DESCR("Whether logical subnets are considered "
"local"),
NULL, 0, &subnetsarelocal, 0,
CTL_NET, PF_INET, IPPROTO_IP,
IPCTL_SUBNETSARELOCAL, CTL_EOL);
sysctl_createv(clog, 0, NULL, NULL,
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
CTLTYPE_INT, "hostzerobroadcast",
SYSCTL_DESCR("All zeroes address is broadcast address"),
NULL, 0, &hostzeroisbroadcast, 0,
CTL_NET, PF_INET, IPPROTO_IP,
IPCTL_HOSTZEROBROADCAST, CTL_EOL);
}
#if NARP > 0
static struct lltable *
in_lltattach(struct ifnet *ifp, struct in_ifinfo *ii)
{
struct lltable *llt;
llt = lltable_allocate_htbl(IN_LLTBL_DEFAULT_HSIZE);
llt->llt_af = AF_INET;
llt->llt_ifp = ifp;
llt->llt_lookup = in_lltable_lookup;
llt->llt_create = in_lltable_create;
llt->llt_delete = in_lltable_delete;
llt->llt_dump_entry = in_lltable_dump_entry;
llt->llt_hash = in_lltable_hash;
llt->llt_fill_sa_entry = in_lltable_fill_sa_entry;
llt->llt_free_entry = in_lltable_free_entry;
llt->llt_match_prefix = in_lltable_match_prefix;
#ifdef MBUFTRACE
struct mowner *mowner = &ii->ii_mowner;
mowner_init_owner(mowner, ifp->if_xname, "arp");
MOWNER_ATTACH(mowner);
llt->llt_mowner = mowner;
#endif
lltable_link(llt);
return (llt);
}
#endif /* NARP > 0 */
void *
in_domifattach(struct ifnet *ifp)
{
struct in_ifinfo *ii;
ii = kmem_zalloc(sizeof(struct in_ifinfo), KM_SLEEP);
#if NARP > 0
ii->ii_llt = in_lltattach(ifp, ii);
2015-08-31 19:46:14 +03:00
#endif
#ifdef IPSELSRC
ii->ii_selsrc = in_selsrc_domifattach(ifp);
KASSERT(ii->ii_selsrc != NULL);
#endif
return ii;
}
void
in_domifdetach(struct ifnet *ifp, void *aux)
{
struct in_ifinfo *ii = aux;
#ifdef IPSELSRC
in_selsrc_domifdetach(ifp, ii->ii_selsrc);
#endif
#if NARP > 0
lltable_free(ii->ii_llt);
#ifdef MBUFTRACE
MOWNER_DETACH(&ii->ii_mowner);
#endif
2015-08-31 19:46:14 +03:00
#endif
kmem_free(ii, sizeof(struct in_ifinfo));
}