2018-06-26 09:47:57 +03:00
|
|
|
/* $NetBSD: ip_carp.c,v 1.99 2018/06/26 06:48:03 msaitoh Exp $ */
|
2006-05-18 13:05:49 +04:00
|
|
|
/* $OpenBSD: ip_carp.c,v 1.113 2005/11/04 08:11:54 mcbride Exp $ */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Copyright (c) 2002 Michael Shalayeff. All rights reserved.
|
|
|
|
* Copyright (c) 2003 Ryan McBride. All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
|
|
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
|
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
|
|
* IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
|
|
|
|
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
|
|
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
|
|
* SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
|
|
|
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
|
|
|
|
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
|
|
|
* THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
2015-08-25 01:21:26 +03:00
|
|
|
#ifdef _KERNEL_OPT
|
2010-08-11 01:46:12 +04:00
|
|
|
#include "opt_inet.h"
|
2014-04-04 16:53:04 +04:00
|
|
|
#include "opt_mbuftrace.h"
|
2015-08-25 01:21:26 +03:00
|
|
|
#endif
|
2010-08-11 01:46:12 +04:00
|
|
|
|
2007-12-11 15:29:11 +03:00
|
|
|
#include <sys/cdefs.h>
|
2018-06-26 09:47:57 +03:00
|
|
|
__KERNEL_RCSID(0, "$NetBSD: ip_carp.c,v 1.99 2018/06/26 06:48:03 msaitoh Exp $");
|
2007-12-11 15:29:11 +03:00
|
|
|
|
2006-05-18 13:05:49 +04:00
|
|
|
/*
|
|
|
|
* TODO:
|
|
|
|
* - iface reconfigure
|
|
|
|
* - support for hardware checksum calculations;
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <sys/param.h>
|
|
|
|
#include <sys/proc.h>
|
|
|
|
#include <sys/mbuf.h>
|
|
|
|
#include <sys/socket.h>
|
|
|
|
#include <sys/socketvar.h>
|
|
|
|
#include <sys/callout.h>
|
|
|
|
#include <sys/ioctl.h>
|
|
|
|
#include <sys/errno.h>
|
|
|
|
#include <sys/device.h>
|
|
|
|
#include <sys/time.h>
|
|
|
|
#include <sys/kernel.h>
|
|
|
|
#include <sys/kauth.h>
|
|
|
|
#include <sys/sysctl.h>
|
|
|
|
#include <sys/ucred.h>
|
|
|
|
#include <sys/syslog.h>
|
|
|
|
#include <sys/acct.h>
|
First step of random number subsystem rework described in
<20111022023242.BA26F14A158@mail.netbsd.org>. This change includes
the following:
An initial cleanup and minor reorganization of the entropy pool
code in sys/dev/rnd.c and sys/dev/rndpool.c. Several bugs are
fixed. Some effort is made to accumulate entropy more quickly at
boot time.
A generic interface, "rndsink", is added, for stream generators to
request that they be re-keyed with good quality entropy from the pool
as soon as it is available.
The arc4random()/arc4randbytes() implementation in libkern is
adjusted to use the rndsink interface for rekeying, which helps
address the problem of low-quality keys at boot time.
An implementation of the FIPS 140-2 statistical tests for random
number generator quality is provided (libkern/rngtest.c). This
is based on Greg Rose's implementation from Qualcomm.
A new random stream generator, nist_ctr_drbg, is provided. It is
based on an implementation of the NIST SP800-90 CTR_DRBG by
Henric Jungheim. This generator users AES in a modified counter
mode to generate a backtracking-resistant random stream.
An abstraction layer, "cprng", is provided for in-kernel consumers
of randomness. The arc4random/arc4randbytes API is deprecated for
in-kernel use. It is replaced by "cprng_strong". The current
cprng_fast implementation wraps the existing arc4random
implementation. The current cprng_strong implementation wraps the
new CTR_DRBG implementation. Both interfaces are rekeyed from
the entropy pool automatically at intervals justifiable from best
current cryptographic practice.
In some quick tests, cprng_fast() is about the same speed as
the old arc4randbytes(), and cprng_strong() is about 20% faster
than rnd_extract_data(). Performance is expected to improve.
The AES code in src/crypto/rijndael is no longer an optional
kernel component, as it is required by cprng_strong, which is
not an optional kernel component.
The entropy pool output is subjected to the rngtest tests at
startup time; if it fails, the system will reboot. There is
approximately a 3/10000 chance of a false positive from these
tests. Entropy pool _input_ from hardware random numbers is
subjected to the rngtest tests at attach time, as well as the
FIPS continuous-output test, to detect bad or stuck hardware
RNGs; if any are detected, they are detached, but the system
continues to run.
A problem with rndctl(8) is fixed -- datastructures with
pointers in arrays are no longer passed to userspace (this
was not a security problem, but rather a major issue for
compat32). A new kernel will require a new rndctl.
The sysctl kern.arandom() and kern.urandom() nodes are hooked
up to the new generators, but the /dev/*random pseudodevices
are not, yet.
Manual pages for the new kernel interfaces are forthcoming.
2011-11-20 02:51:18 +04:00
|
|
|
#include <sys/cprng.h>
|
2007-10-19 15:59:34 +04:00
|
|
|
#include <sys/cpu.h>
|
2017-11-22 10:40:45 +03:00
|
|
|
#include <sys/pserialize.h>
|
|
|
|
#include <sys/psref.h>
|
2006-05-18 13:05:49 +04:00
|
|
|
|
|
|
|
#include <net/if.h>
|
|
|
|
#include <net/pfil.h>
|
|
|
|
#include <net/if_types.h>
|
|
|
|
#include <net/if_ether.h>
|
|
|
|
#include <net/route.h>
|
|
|
|
#include <net/netisr.h>
|
2008-04-23 09:26:50 +04:00
|
|
|
#include <net/net_stats.h>
|
2006-05-18 13:05:49 +04:00
|
|
|
#include <netinet/if_inarp.h>
|
2017-02-02 05:52:10 +03:00
|
|
|
#include <netinet/wqinput.h>
|
2006-05-18 13:05:49 +04:00
|
|
|
|
|
|
|
#if NFDDI > 0
|
|
|
|
#include <net/if_fddi.h>
|
|
|
|
#endif
|
|
|
|
#if NTOKEN > 0
|
|
|
|
#include <net/if_token.h>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef INET
|
|
|
|
#include <netinet/in.h>
|
|
|
|
#include <netinet/in_systm.h>
|
|
|
|
#include <netinet/in_var.h>
|
|
|
|
#include <netinet/ip.h>
|
|
|
|
#include <netinet/ip_var.h>
|
|
|
|
|
|
|
|
#include <net/if_dl.h>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef INET6
|
|
|
|
#include <netinet/icmp6.h>
|
|
|
|
#include <netinet/ip6.h>
|
|
|
|
#include <netinet6/ip6_var.h>
|
|
|
|
#include <netinet6/nd6.h>
|
2008-03-15 19:44:03 +03:00
|
|
|
#include <netinet6/scope6_var.h>
|
2016-07-23 16:37:10 +03:00
|
|
|
#include <netinet6/in6_var.h>
|
2006-05-18 13:05:49 +04:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#include <net/bpf.h>
|
|
|
|
|
|
|
|
#include <sys/sha1.h>
|
|
|
|
|
|
|
|
#include <netinet/ip_carp.h>
|
|
|
|
|
2015-08-20 17:40:16 +03:00
|
|
|
#include "ioconf.h"
|
|
|
|
|
2006-05-18 13:05:49 +04:00
|
|
|
struct carp_mc_entry {
|
|
|
|
LIST_ENTRY(carp_mc_entry) mc_entries;
|
|
|
|
union {
|
|
|
|
struct ether_multi *mcu_enm;
|
|
|
|
} mc_u;
|
|
|
|
struct sockaddr_storage mc_addr;
|
|
|
|
};
|
|
|
|
#define mc_enm mc_u.mcu_enm
|
|
|
|
|
|
|
|
struct carp_softc {
|
|
|
|
struct ethercom sc_ac;
|
|
|
|
#define sc_if sc_ac.ec_if
|
|
|
|
#define sc_carpdev sc_ac.ec_if.if_carpdev
|
|
|
|
int ah_cookie;
|
|
|
|
int lh_cookie;
|
|
|
|
struct ip_moptions sc_imo;
|
|
|
|
#ifdef INET6
|
|
|
|
struct ip6_moptions sc_im6o;
|
|
|
|
#endif /* INET6 */
|
|
|
|
TAILQ_ENTRY(carp_softc) sc_list;
|
|
|
|
|
|
|
|
enum { INIT = 0, BACKUP, MASTER } sc_state;
|
|
|
|
|
|
|
|
int sc_suppress;
|
|
|
|
int sc_bow_out;
|
|
|
|
|
|
|
|
int sc_sendad_errors;
|
|
|
|
#define CARP_SENDAD_MAX_ERRORS 3
|
|
|
|
int sc_sendad_success;
|
|
|
|
#define CARP_SENDAD_MIN_SUCCESS 3
|
|
|
|
|
|
|
|
int sc_vhid;
|
|
|
|
int sc_advskew;
|
|
|
|
int sc_naddrs;
|
|
|
|
int sc_naddrs6;
|
|
|
|
int sc_advbase; /* seconds */
|
|
|
|
int sc_init_counter;
|
|
|
|
u_int64_t sc_counter;
|
|
|
|
|
|
|
|
/* authentication */
|
|
|
|
#define CARP_HMAC_PAD 64
|
|
|
|
unsigned char sc_key[CARP_KEY_LEN];
|
|
|
|
unsigned char sc_pad[CARP_HMAC_PAD];
|
|
|
|
SHA1_CTX sc_sha1;
|
|
|
|
u_int32_t sc_hashkey[2];
|
|
|
|
|
|
|
|
struct callout sc_ad_tmo; /* advertisement timeout */
|
|
|
|
struct callout sc_md_tmo; /* master down timeout */
|
|
|
|
struct callout sc_md6_tmo; /* master down timeout */
|
|
|
|
|
|
|
|
LIST_HEAD(__carp_mchead, carp_mc_entry) carp_mc_listhead;
|
|
|
|
};
|
|
|
|
|
2014-07-31 06:37:25 +04:00
|
|
|
int carp_suppress_preempt = 0;
|
2014-07-31 04:56:23 +04:00
|
|
|
static int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 0, 0 }; /* XXX for now */
|
2008-04-15 10:03:28 +04:00
|
|
|
|
|
|
|
static percpu_t *carpstat_percpu;
|
|
|
|
|
2008-04-23 09:26:50 +04:00
|
|
|
#define CARP_STATINC(x) _NET_STATINC(carpstat_percpu, x)
|
2006-05-18 13:05:49 +04:00
|
|
|
|
2014-04-04 16:53:04 +04:00
|
|
|
#ifdef MBUFTRACE
|
|
|
|
static struct mowner carp_proto_mowner_rx = MOWNER_INIT("carp", "rx");
|
|
|
|
static struct mowner carp_proto_mowner_tx = MOWNER_INIT("carp", "tx");
|
|
|
|
static struct mowner carp_proto6_mowner_rx = MOWNER_INIT("carp6", "rx");
|
|
|
|
static struct mowner carp_proto6_mowner_tx = MOWNER_INIT("carp6", "tx");
|
|
|
|
#endif
|
|
|
|
|
2006-05-18 13:05:49 +04:00
|
|
|
struct carp_if {
|
|
|
|
TAILQ_HEAD(, carp_softc) vhif_vrs;
|
|
|
|
int vhif_nvrs;
|
|
|
|
|
|
|
|
struct ifnet *vhif_ifp;
|
|
|
|
};
|
|
|
|
|
|
|
|
#define CARP_LOG(sc, s) \
|
|
|
|
if (carp_opts[CARPCTL_LOG]) { \
|
|
|
|
if (sc) \
|
|
|
|
log(LOG_INFO, "%s: ", \
|
|
|
|
(sc)->sc_if.if_xname); \
|
|
|
|
else \
|
|
|
|
log(LOG_INFO, "carp: "); \
|
|
|
|
addlog s; \
|
|
|
|
addlog("\n"); \
|
|
|
|
}
|
|
|
|
|
2014-07-31 04:56:23 +04:00
|
|
|
static void carp_hmac_prepare(struct carp_softc *);
|
|
|
|
static void carp_hmac_generate(struct carp_softc *, u_int32_t *,
|
|
|
|
unsigned char *);
|
|
|
|
static int carp_hmac_verify(struct carp_softc *, u_int32_t *,
|
|
|
|
unsigned char *);
|
|
|
|
static void carp_setroute(struct carp_softc *, int);
|
|
|
|
static void carp_proto_input_c(struct mbuf *, struct carp_header *,
|
|
|
|
sa_family_t);
|
|
|
|
static void carpdetach(struct carp_softc *);
|
2018-03-21 18:33:25 +03:00
|
|
|
static void carp_prepare_ad(struct mbuf *, struct carp_softc *,
|
2014-07-31 04:56:23 +04:00
|
|
|
struct carp_header *);
|
|
|
|
static void carp_send_ad_all(void);
|
|
|
|
static void carp_send_ad(void *);
|
|
|
|
static void carp_send_arp(struct carp_softc *);
|
|
|
|
static void carp_master_down(void *);
|
|
|
|
static int carp_ioctl(struct ifnet *, u_long, void *);
|
|
|
|
static void carp_start(struct ifnet *);
|
|
|
|
static void carp_setrun(struct carp_softc *, sa_family_t);
|
|
|
|
static void carp_set_state(struct carp_softc *, int);
|
|
|
|
static int carp_addrcount(struct carp_if *, struct in_ifaddr *, int);
|
2006-05-18 13:05:49 +04:00
|
|
|
enum { CARP_COUNT_MASTER, CARP_COUNT_RUNNING };
|
|
|
|
|
2014-07-31 04:56:23 +04:00
|
|
|
static void carp_multicast_cleanup(struct carp_softc *);
|
|
|
|
static int carp_set_ifp(struct carp_softc *, struct ifnet *);
|
|
|
|
static void carp_set_enaddr(struct carp_softc *);
|
|
|
|
#if 0
|
|
|
|
static void carp_addr_updated(void *);
|
|
|
|
#endif
|
|
|
|
static u_int32_t carp_hash(struct carp_softc *, u_char *);
|
|
|
|
static int carp_set_addr(struct carp_softc *, struct sockaddr_in *);
|
|
|
|
static int carp_join_multicast(struct carp_softc *);
|
2006-05-18 13:05:49 +04:00
|
|
|
#ifdef INET6
|
2014-07-31 04:56:23 +04:00
|
|
|
static void carp_send_na(struct carp_softc *);
|
|
|
|
static int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
|
|
|
|
static int carp_join_multicast6(struct carp_softc *);
|
2006-05-18 13:05:49 +04:00
|
|
|
#endif
|
2014-07-31 04:56:23 +04:00
|
|
|
static int carp_clone_create(struct if_clone *, int);
|
|
|
|
static int carp_clone_destroy(struct ifnet *);
|
|
|
|
static int carp_ether_addmulti(struct carp_softc *, struct ifreq *);
|
|
|
|
static int carp_ether_delmulti(struct carp_softc *, struct ifreq *);
|
|
|
|
static void carp_ether_purgemulti(struct carp_softc *);
|
2006-05-18 13:05:49 +04:00
|
|
|
|
2014-07-31 04:56:23 +04:00
|
|
|
static void sysctl_net_inet_carp_setup(struct sysctllog **);
|
2009-09-16 19:23:04 +04:00
|
|
|
|
2017-02-02 05:52:10 +03:00
|
|
|
/* workqueue-based pr_input */
|
|
|
|
static struct wqinput *carp_wqinput;
|
|
|
|
static void _carp_proto_input(struct mbuf *, int, int);
|
|
|
|
#ifdef INET6
|
|
|
|
static struct wqinput *carp6_wqinput;
|
|
|
|
static void _carp6_proto_input(struct mbuf *, int, int);
|
|
|
|
#endif
|
|
|
|
|
2006-05-18 13:05:49 +04:00
|
|
|
struct if_clone carp_cloner =
|
|
|
|
IF_CLONE_INITIALIZER("carp", carp_clone_create, carp_clone_destroy);
|
|
|
|
|
|
|
|
static __inline u_int16_t
|
|
|
|
carp_cksum(struct mbuf *m, int len)
|
|
|
|
{
|
|
|
|
return (in_cksum(m, len));
|
|
|
|
}
|
|
|
|
|
2017-02-27 11:26:53 +03:00
|
|
|
static __inline u_int16_t
|
|
|
|
carp6_cksum(struct mbuf *m, uint32_t off, uint32_t len)
|
|
|
|
{
|
|
|
|
return (in6_cksum(m, IPPROTO_CARP, off, len));
|
|
|
|
}
|
|
|
|
|
2014-07-31 04:56:23 +04:00
|
|
|
static void
|
2006-05-18 13:05:49 +04:00
|
|
|
carp_hmac_prepare(struct carp_softc *sc)
|
|
|
|
{
|
|
|
|
u_int8_t carp_version = CARP_VERSION, type = CARP_ADVERTISEMENT;
|
|
|
|
u_int8_t vhid = sc->sc_vhid & 0xff;
|
|
|
|
SHA1_CTX sha1ctx;
|
|
|
|
u_int32_t kmd[5];
|
|
|
|
struct ifaddr *ifa;
|
|
|
|
int i, found;
|
|
|
|
struct in_addr last, cur, in;
|
|
|
|
#ifdef INET6
|
|
|
|
struct in6_addr last6, cur6, in6;
|
|
|
|
#endif /* INET6 */
|
|
|
|
|
|
|
|
/* compute ipad from key */
|
2009-03-18 19:00:08 +03:00
|
|
|
memset(sc->sc_pad, 0, sizeof(sc->sc_pad));
|
2009-04-18 18:58:02 +04:00
|
|
|
memcpy(sc->sc_pad, sc->sc_key, sizeof(sc->sc_key));
|
2006-05-18 13:05:49 +04:00
|
|
|
for (i = 0; i < sizeof(sc->sc_pad); i++)
|
|
|
|
sc->sc_pad[i] ^= 0x36;
|
|
|
|
|
|
|
|
/* precompute first part of inner hash */
|
|
|
|
SHA1Init(&sc->sc_sha1);
|
|
|
|
SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
|
|
|
|
SHA1Update(&sc->sc_sha1, (void *)&carp_version, sizeof(carp_version));
|
|
|
|
SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
|
|
|
|
|
|
|
|
/* generate a key for the arpbalance hash, before the vhid is hashed */
|
2009-04-18 18:58:02 +04:00
|
|
|
memcpy(&sha1ctx, &sc->sc_sha1, sizeof(sha1ctx));
|
2006-05-18 13:05:49 +04:00
|
|
|
SHA1Final((unsigned char *)kmd, &sha1ctx);
|
|
|
|
sc->sc_hashkey[0] = kmd[0] ^ kmd[1];
|
|
|
|
sc->sc_hashkey[1] = kmd[2] ^ kmd[3];
|
|
|
|
|
|
|
|
/* the rest of the precomputation */
|
|
|
|
SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
|
|
|
|
|
|
|
|
/* Hash the addresses from smallest to largest, not interface order */
|
|
|
|
#ifdef INET
|
|
|
|
cur.s_addr = 0;
|
|
|
|
do {
|
2017-11-22 10:40:45 +03:00
|
|
|
int s;
|
2006-05-18 13:05:49 +04:00
|
|
|
found = 0;
|
|
|
|
last = cur;
|
|
|
|
cur.s_addr = 0xffffffff;
|
2017-11-22 10:40:45 +03:00
|
|
|
s = pserialize_read_enter();
|
2016-07-07 12:32:01 +03:00
|
|
|
IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
|
2006-05-18 13:05:49 +04:00
|
|
|
in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
|
|
|
|
if (ifa->ifa_addr->sa_family == AF_INET &&
|
|
|
|
ntohl(in.s_addr) > ntohl(last.s_addr) &&
|
|
|
|
ntohl(in.s_addr) < ntohl(cur.s_addr)) {
|
|
|
|
cur.s_addr = in.s_addr;
|
|
|
|
found++;
|
|
|
|
}
|
|
|
|
}
|
2017-11-22 10:40:45 +03:00
|
|
|
pserialize_read_exit(s);
|
2006-05-18 13:05:49 +04:00
|
|
|
if (found)
|
|
|
|
SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur));
|
|
|
|
} while (found);
|
|
|
|
#endif /* INET */
|
|
|
|
|
|
|
|
#ifdef INET6
|
|
|
|
memset(&cur6, 0x00, sizeof(cur6));
|
|
|
|
do {
|
2017-11-22 10:40:45 +03:00
|
|
|
int s;
|
2006-05-18 13:05:49 +04:00
|
|
|
found = 0;
|
|
|
|
last6 = cur6;
|
|
|
|
memset(&cur6, 0xff, sizeof(cur6));
|
2017-11-22 10:40:45 +03:00
|
|
|
s = pserialize_read_enter();
|
2016-07-07 12:32:01 +03:00
|
|
|
IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
|
2006-05-18 13:05:49 +04:00
|
|
|
in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
|
|
|
|
if (IN6_IS_ADDR_LINKLOCAL(&in6))
|
|
|
|
in6.s6_addr16[1] = 0;
|
|
|
|
if (ifa->ifa_addr->sa_family == AF_INET6 &&
|
|
|
|
memcmp(&in6, &last6, sizeof(in6)) > 0 &&
|
|
|
|
memcmp(&in6, &cur6, sizeof(in6)) < 0) {
|
|
|
|
cur6 = in6;
|
|
|
|
found++;
|
|
|
|
}
|
|
|
|
}
|
2017-11-22 10:40:45 +03:00
|
|
|
pserialize_read_exit(s);
|
2006-05-18 13:05:49 +04:00
|
|
|
if (found)
|
|
|
|
SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6));
|
|
|
|
} while (found);
|
|
|
|
#endif /* INET6 */
|
|
|
|
|
|
|
|
/* convert ipad to opad */
|
|
|
|
for (i = 0; i < sizeof(sc->sc_pad); i++)
|
|
|
|
sc->sc_pad[i] ^= 0x36 ^ 0x5c;
|
|
|
|
}
|
|
|
|
|
2014-07-31 04:56:23 +04:00
|
|
|
static void
|
2006-05-18 13:05:49 +04:00
|
|
|
carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2],
|
|
|
|
unsigned char md[20])
|
|
|
|
{
|
|
|
|
SHA1_CTX sha1ctx;
|
|
|
|
|
|
|
|
/* fetch first half of inner hash */
|
2009-04-18 18:58:02 +04:00
|
|
|
memcpy(&sha1ctx, &sc->sc_sha1, sizeof(sha1ctx));
|
2006-05-18 13:05:49 +04:00
|
|
|
|
|
|
|
SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
|
|
|
|
SHA1Final(md, &sha1ctx);
|
|
|
|
|
|
|
|
/* outer hash */
|
|
|
|
SHA1Init(&sha1ctx);
|
|
|
|
SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
|
|
|
|
SHA1Update(&sha1ctx, md, 20);
|
|
|
|
SHA1Final(md, &sha1ctx);
|
|
|
|
}
|
|
|
|
|
2014-07-31 04:56:23 +04:00
|
|
|
static int
|
2006-05-18 13:05:49 +04:00
|
|
|
carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2],
|
|
|
|
unsigned char md[20])
|
|
|
|
{
|
|
|
|
unsigned char md2[20];
|
|
|
|
|
|
|
|
carp_hmac_generate(sc, counter, md2);
|
|
|
|
|
2009-03-18 18:14:29 +03:00
|
|
|
return (memcmp(md, md2, sizeof(md2)));
|
2006-05-18 13:05:49 +04:00
|
|
|
}
|
|
|
|
|
2014-07-31 04:56:23 +04:00
|
|
|
static void
|
2006-05-18 13:05:49 +04:00
|
|
|
carp_setroute(struct carp_softc *sc, int cmd)
|
|
|
|
{
|
|
|
|
struct ifaddr *ifa;
|
2017-11-22 10:40:45 +03:00
|
|
|
int s, bound;
|
2006-05-18 13:05:49 +04:00
|
|
|
|
2014-05-13 23:36:16 +04:00
|
|
|
KERNEL_LOCK(1, NULL);
|
2017-11-22 10:40:45 +03:00
|
|
|
bound = curlwp_bind();
|
|
|
|
s = pserialize_read_enter();
|
2016-07-07 12:32:01 +03:00
|
|
|
IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
|
2017-11-22 10:40:45 +03:00
|
|
|
struct psref psref;
|
|
|
|
ifa_acquire(ifa, &psref);
|
|
|
|
pserialize_read_exit(s);
|
|
|
|
|
2006-05-18 13:05:49 +04:00
|
|
|
switch (ifa->ifa_addr->sa_family) {
|
|
|
|
case AF_INET: {
|
|
|
|
int count = 0;
|
|
|
|
struct rtentry *rt;
|
|
|
|
int hr_otherif, nr_ourif;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Avoid screwing with the routes if there are other
|
|
|
|
* carp interfaces which are master and have the same
|
|
|
|
* address.
|
|
|
|
*/
|
|
|
|
if (sc->sc_carpdev != NULL &&
|
|
|
|
sc->sc_carpdev->if_carp != NULL) {
|
|
|
|
count = carp_addrcount(
|
|
|
|
(struct carp_if *)sc->sc_carpdev->if_carp,
|
|
|
|
ifatoia(ifa), CARP_COUNT_MASTER);
|
|
|
|
if ((cmd == RTM_ADD && count != 1) ||
|
|
|
|
(cmd == RTM_DELETE && count != 0))
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Remove the existing host route, if any */
|
|
|
|
rtrequest(RTM_DELETE, ifa->ifa_addr,
|
|
|
|
ifa->ifa_addr, ifa->ifa_netmask,
|
|
|
|
RTF_HOST, NULL);
|
|
|
|
|
Take steps to hide the radix_node implementation of the forwarding table
from the forwarding table's users:
Introduce rt_walktree() for walking the routing table and
applying a function to each rtentry. Replace most
rn_walktree() calls with it.
Use rt_getkey()/rt_setkey() to get/set a route's destination.
Keep a pointer to the sockaddr key in the rtentry, so that
rtentry users do not have to grovel in the radix_node for
the key.
Add a RTM_GET method to rtrequest. Use that instead of
radix_node lookups in, e.g., carp(4).
Add sys/net/link_proto.c, which supplies sockaddr routines for
link-layer socket addresses (sockaddr_dl).
Cosmetic:
Constify. KNF. Stop open-coding LIST_FOREACH, TAILQ_FOREACH,
et cetera. Use NULL instead of 0 for null pointers. Use
__arraycount(). Reduce gratuitous parenthesization.
Stop using variadic arguments for rip6_output(), it is
unnecessary.
Remove the unnecessary rtentry member rt_genmask and the
code to maintain it, since nothing actually used it.
Make rt_maskedcopy() easier to read by using meaningful variable
names.
Extract a subroutine intern_netmask() for looking up a netmask in
the masks table.
Start converting backslash-ridden IPv6 macros in
sys/netinet6/in6_var.h into inline subroutines that one
can read without special eyeglasses.
One functional change: when the kernel serves an RTM_GET, RTM_LOCK,
or RTM_CHANGE request, it applies the netmask (if supplied) to a
destination before searching for it in the forwarding table.
I have changed sys/netinet/ip_carp.c, carp_setroute(), to remove
the unlawful radix_node knowledge.
Apart from the changes to carp(4), netiso, ATM, and strip(4), I
have run the changes on three nodes in my wireless routing testbed,
which involves IPv4 + IPv6 dynamic routing acrobatics, and it's
working beautifully so far.
2007-07-20 00:48:52 +04:00
|
|
|
rt = NULL;
|
|
|
|
(void)rtrequest(RTM_GET, ifa->ifa_addr, ifa->ifa_addr,
|
|
|
|
ifa->ifa_netmask, RTF_HOST, &rt);
|
2006-05-18 13:05:49 +04:00
|
|
|
hr_otherif = (rt && rt->rt_ifp != &sc->sc_if &&
|
2016-04-04 10:37:07 +03:00
|
|
|
(rt->rt_flags & RTF_CONNECTED));
|
Take steps to hide the radix_node implementation of the forwarding table
from the forwarding table's users:
Introduce rt_walktree() for walking the routing table and
applying a function to each rtentry. Replace most
rn_walktree() calls with it.
Use rt_getkey()/rt_setkey() to get/set a route's destination.
Keep a pointer to the sockaddr key in the rtentry, so that
rtentry users do not have to grovel in the radix_node for
the key.
Add a RTM_GET method to rtrequest. Use that instead of
radix_node lookups in, e.g., carp(4).
Add sys/net/link_proto.c, which supplies sockaddr routines for
link-layer socket addresses (sockaddr_dl).
Cosmetic:
Constify. KNF. Stop open-coding LIST_FOREACH, TAILQ_FOREACH,
et cetera. Use NULL instead of 0 for null pointers. Use
__arraycount(). Reduce gratuitous parenthesization.
Stop using variadic arguments for rip6_output(), it is
unnecessary.
Remove the unnecessary rtentry member rt_genmask and the
code to maintain it, since nothing actually used it.
Make rt_maskedcopy() easier to read by using meaningful variable
names.
Extract a subroutine intern_netmask() for looking up a netmask in
the masks table.
Start converting backslash-ridden IPv6 macros in
sys/netinet6/in6_var.h into inline subroutines that one
can read without special eyeglasses.
One functional change: when the kernel serves an RTM_GET, RTM_LOCK,
or RTM_CHANGE request, it applies the netmask (if supplied) to a
destination before searching for it in the forwarding table.
I have changed sys/netinet/ip_carp.c, carp_setroute(), to remove
the unlawful radix_node knowledge.
Apart from the changes to carp(4), netiso, ATM, and strip(4), I
have run the changes on three nodes in my wireless routing testbed,
which involves IPv4 + IPv6 dynamic routing acrobatics, and it's
working beautifully so far.
2007-07-20 00:48:52 +04:00
|
|
|
if (rt != NULL) {
|
Make the routing table and rtcaches MP-safe
See the following descriptions for details.
Proposed on tech-kern and tech-net
Overview
--------
We protect the routing table with a rwock and protect
rtcaches with another rwlock. Each rtentry is protected
from being freed or updated via reference counting and psref.
Global rwlocks
--------------
There are two rwlocks; one for the routing table (rt_lock) and
the other for rtcaches (rtcache_lock). rtcache_lock covers
all existing rtcaches; there may have room for optimizations
(future work).
The locking order is rtcache_lock first and rt_lock is next.
rtentry references
------------------
References to an rtentry is managed with reference counting
and psref. Either of the two mechanisms is used depending on
where a rtentry is obtained. Reference counting is used when
we obtain a rtentry from the routing table directly via
rtalloc1 and rtrequest{,1} while psref is used when we obtain
a rtentry from a rtcache via rtcache_* APIs. In both cases,
a caller can sleep/block with holding an obtained rtentry.
The reasons why we use two different mechanisms are (i) only
using reference counting hurts the performance due to atomic
instructions (rtcache case) (ii) ease of implementation;
applying psref to APIs such rtaloc1 and rtrequest{,1} requires
additional works (adding a local variable and an argument).
We will finally migrate to use only psref but we can do it
when we have a lockless routing table alternative.
Reference counting for rtentry
------------------------------
rt_refcnt now doesn't count permanent references such as for
rt_timers and rtcaches, instead it is used only for temporal
references when obtaining a rtentry via rtalloc1 and rtrequest{,1}.
We can do so because destroying a rtentry always involves
removing references of rt_timers and rtcaches to the rtentry
and we don't need to track such references. This also makes
it easy to wait for readers to release references on deleting
or updating a rtentry, i.e., we can simply wait until the
reference counter is 0 or 1. (If there are permanent references
the counter can be arbitrary.)
rt_ref increments a reference counter of a rtentry and rt_unref
decrements it. rt_ref is called inside APIs (rtalloc1 and
rtrequest{,1} so users don't need to care about it while
users must call rt_unref to an obtained rtentry after using it.
rtfree is removed and we use rt_unref and rt_free instead.
rt_unref now just decrements the counter of a given rtentry
and rt_free just tries to destroy a given rtentry.
See the next section for destructions of rtentries by rt_free.
Destructions of rtentries
-------------------------
We destroy a rtentry only when we call rtrequst{,1}(RTM_DELETE);
the original implementation can destroy in any rtfree where it's
the last reference. If we use reference counting or psref, it's
easy to understand if the place that a rtentry is destroyed is
fixed.
rt_free waits for references to a given rtentry to be released
before actually destroying the rtentry. rt_free uses a condition
variable (cv_wait) (and psref_target_destroy for psref) to wait.
Unfortunately rtrequst{,1}(RTM_DELETE) can be called in softint
that we cannot use cv_wait. In that case, we have to defer the
destruction to a workqueue.
rtentry#rt_cv, rtentry#rt_psref and global variables
(see rt_free_global) are added to conduct the procedure.
Updates of rtentries
--------------------
One difficulty to use refcnt/psref instead of rwlock for rtentry
is updates of rtentries. We need an additional mechanism to
prevent readers from seeing inconsistency of a rtentry being
updated.
We introduce RTF_UPDATING flag to rtentries that are updating.
While the flag is set to a rtentry, users cannot acquire the
rtentry. By doing so, we avoid users to see inconsistent
rtentries.
There are two options when a user tries to acquire a rtentry
with the RTF_UPDATING flag; if a user runs in softint context
the user fails to acquire a rtentry (NULL is returned).
Otherwise a user waits until the update completes by waiting
on cv.
The procedure of a updater is simpler to destruction of
a rtentry. Wait on cv (and psref) and after all readers left,
proceed with the update.
Global variables (see rt_update_global) are added to conduct
the procedure.
Currently we apply the mechanism to only RTM_CHANGE in
rtsock.c. We would have to apply other codes. See
"Known issues" section.
psref for rtentry
-----------------
When we obtain a rtentry from a rtcache via rtcache_* APIs,
psref is used to reference to the rtentry.
rtcache_ref acquires a reference to a rtentry with psref
and rtcache_unref releases the reference after using it.
rtcache_ref is called inside rtcache_* APIs and users don't
need to take care of it while users must call rtcache_unref
to release the reference.
struct psref and int bound that is needed for psref is
embedded into struct route. By doing so we don't need to
add local variables and additional argument to APIs.
However this adds another constraint to psref other than
reference counting one's; holding a reference of an rtentry
via a rtcache is allowed by just one caller at the same time.
So we must not acquire a rtentry via a rtcache twice and
avoid a recursive use of a rtcache. And also a rtcache must
be arranged to be used by a LWP/softint at the same time
somehow. For IP forwarding case, we have per-CPU rtcaches
used in softint so the constraint is guaranteed. For a h
rtcache of a PCB case, the constraint is guaranteed by the
solock of each PCB. Any other cases (pf, ipf, stf and ipsec)
are currently guaranteed by only the existence of the global
locks (softnet_lock and/or KERNEL_LOCK). If we've found the
cases that we cannot guarantee the constraint, we would need
to introduce other rtcache APIs that use simple reference
counting.
psref of rtcache is created with IPL_SOFTNET and so rtcache
shouldn't used at an IPL higher than IPL_SOFTNET.
Note that rtcache_free is used to invalidate a given rtcache.
We don't need another care by my change; just keep them as
they are.
Performance impact
------------------
When NET_MPSAFE is disabled the performance drop is 3% while
when it's enabled the drop is increased to 11%. The difference
comes from that currently we don't take any global locks and
don't use psref if NET_MPSAFE is disabled.
We can optimize the performance of the case of NET_MPSAFE
on by reducing lookups of rtcache that uses psref;
currently we do two lookups but we should be able to trim
one of two. This is a future work.
Known issues
------------
There are two known issues to be solved; one is that
a caller of rtrequest(RTM_ADD) may change rtentry (see rtinit).
We need to prevent new references during the update. Or
we may be able to remove the code (perhaps, need more
investigations).
The other is rtredirect that updates a rtentry. We need
to apply our update mechanism, however it's not easy because
rtredirect is called in softint and we cannot apply our
mechanism simply. One solution is to defer rtredirect to
a workqueue but it requires some code restructuring.
2016-12-12 06:55:57 +03:00
|
|
|
rt_unref(rt);
|
Take steps to hide the radix_node implementation of the forwarding table
from the forwarding table's users:
Introduce rt_walktree() for walking the routing table and
applying a function to each rtentry. Replace most
rn_walktree() calls with it.
Use rt_getkey()/rt_setkey() to get/set a route's destination.
Keep a pointer to the sockaddr key in the rtentry, so that
rtentry users do not have to grovel in the radix_node for
the key.
Add a RTM_GET method to rtrequest. Use that instead of
radix_node lookups in, e.g., carp(4).
Add sys/net/link_proto.c, which supplies sockaddr routines for
link-layer socket addresses (sockaddr_dl).
Cosmetic:
Constify. KNF. Stop open-coding LIST_FOREACH, TAILQ_FOREACH,
et cetera. Use NULL instead of 0 for null pointers. Use
__arraycount(). Reduce gratuitous parenthesization.
Stop using variadic arguments for rip6_output(), it is
unnecessary.
Remove the unnecessary rtentry member rt_genmask and the
code to maintain it, since nothing actually used it.
Make rt_maskedcopy() easier to read by using meaningful variable
names.
Extract a subroutine intern_netmask() for looking up a netmask in
the masks table.
Start converting backslash-ridden IPv6 macros in
sys/netinet6/in6_var.h into inline subroutines that one
can read without special eyeglasses.
One functional change: when the kernel serves an RTM_GET, RTM_LOCK,
or RTM_CHANGE request, it applies the netmask (if supplied) to a
destination before searching for it in the forwarding table.
I have changed sys/netinet/ip_carp.c, carp_setroute(), to remove
the unlawful radix_node knowledge.
Apart from the changes to carp(4), netiso, ATM, and strip(4), I
have run the changes on three nodes in my wireless routing testbed,
which involves IPv4 + IPv6 dynamic routing acrobatics, and it's
working beautifully so far.
2007-07-20 00:48:52 +04:00
|
|
|
rt = NULL;
|
|
|
|
}
|
2006-05-18 13:05:49 +04:00
|
|
|
|
|
|
|
/* Check for a network route on our interface */
|
Take steps to hide the radix_node implementation of the forwarding table
from the forwarding table's users:
Introduce rt_walktree() for walking the routing table and
applying a function to each rtentry. Replace most
rn_walktree() calls with it.
Use rt_getkey()/rt_setkey() to get/set a route's destination.
Keep a pointer to the sockaddr key in the rtentry, so that
rtentry users do not have to grovel in the radix_node for
the key.
Add a RTM_GET method to rtrequest. Use that instead of
radix_node lookups in, e.g., carp(4).
Add sys/net/link_proto.c, which supplies sockaddr routines for
link-layer socket addresses (sockaddr_dl).
Cosmetic:
Constify. KNF. Stop open-coding LIST_FOREACH, TAILQ_FOREACH,
et cetera. Use NULL instead of 0 for null pointers. Use
__arraycount(). Reduce gratuitous parenthesization.
Stop using variadic arguments for rip6_output(), it is
unnecessary.
Remove the unnecessary rtentry member rt_genmask and the
code to maintain it, since nothing actually used it.
Make rt_maskedcopy() easier to read by using meaningful variable
names.
Extract a subroutine intern_netmask() for looking up a netmask in
the masks table.
Start converting backslash-ridden IPv6 macros in
sys/netinet6/in6_var.h into inline subroutines that one
can read without special eyeglasses.
One functional change: when the kernel serves an RTM_GET, RTM_LOCK,
or RTM_CHANGE request, it applies the netmask (if supplied) to a
destination before searching for it in the forwarding table.
I have changed sys/netinet/ip_carp.c, carp_setroute(), to remove
the unlawful radix_node knowledge.
Apart from the changes to carp(4), netiso, ATM, and strip(4), I
have run the changes on three nodes in my wireless routing testbed,
which involves IPv4 + IPv6 dynamic routing acrobatics, and it's
working beautifully so far.
2007-07-20 00:48:52 +04:00
|
|
|
|
|
|
|
rt = NULL;
|
|
|
|
(void)rtrequest(RTM_GET, ifa->ifa_addr, ifa->ifa_addr,
|
|
|
|
ifa->ifa_netmask, 0, &rt);
|
2006-05-18 13:05:49 +04:00
|
|
|
nr_ourif = (rt && rt->rt_ifp == &sc->sc_if);
|
|
|
|
|
|
|
|
switch (cmd) {
|
|
|
|
case RTM_ADD:
|
|
|
|
if (hr_otherif) {
|
|
|
|
ifa->ifa_rtrequest = NULL;
|
2016-04-04 10:37:07 +03:00
|
|
|
ifa->ifa_flags &= ~RTF_CONNECTED;
|
2006-05-18 13:05:49 +04:00
|
|
|
|
|
|
|
rtrequest(RTM_ADD, ifa->ifa_addr,
|
|
|
|
ifa->ifa_addr, ifa->ifa_netmask,
|
|
|
|
RTF_UP | RTF_HOST, NULL);
|
|
|
|
}
|
|
|
|
if (!hr_otherif || nr_ourif || !rt) {
|
2016-04-04 10:37:07 +03:00
|
|
|
if (nr_ourif &&
|
|
|
|
(rt->rt_flags & RTF_CONNECTED) == 0)
|
Take steps to hide the radix_node implementation of the forwarding table
from the forwarding table's users:
Introduce rt_walktree() for walking the routing table and
applying a function to each rtentry. Replace most
rn_walktree() calls with it.
Use rt_getkey()/rt_setkey() to get/set a route's destination.
Keep a pointer to the sockaddr key in the rtentry, so that
rtentry users do not have to grovel in the radix_node for
the key.
Add a RTM_GET method to rtrequest. Use that instead of
radix_node lookups in, e.g., carp(4).
Add sys/net/link_proto.c, which supplies sockaddr routines for
link-layer socket addresses (sockaddr_dl).
Cosmetic:
Constify. KNF. Stop open-coding LIST_FOREACH, TAILQ_FOREACH,
et cetera. Use NULL instead of 0 for null pointers. Use
__arraycount(). Reduce gratuitous parenthesization.
Stop using variadic arguments for rip6_output(), it is
unnecessary.
Remove the unnecessary rtentry member rt_genmask and the
code to maintain it, since nothing actually used it.
Make rt_maskedcopy() easier to read by using meaningful variable
names.
Extract a subroutine intern_netmask() for looking up a netmask in
the masks table.
Start converting backslash-ridden IPv6 macros in
sys/netinet6/in6_var.h into inline subroutines that one
can read without special eyeglasses.
One functional change: when the kernel serves an RTM_GET, RTM_LOCK,
or RTM_CHANGE request, it applies the netmask (if supplied) to a
destination before searching for it in the forwarding table.
I have changed sys/netinet/ip_carp.c, carp_setroute(), to remove
the unlawful radix_node knowledge.
Apart from the changes to carp(4), netiso, ATM, and strip(4), I
have run the changes on three nodes in my wireless routing testbed,
which involves IPv4 + IPv6 dynamic routing acrobatics, and it's
working beautifully so far.
2007-07-20 00:48:52 +04:00
|
|
|
rtrequest(RTM_DELETE,
|
|
|
|
ifa->ifa_addr,
|
2006-05-18 13:05:49 +04:00
|
|
|
ifa->ifa_addr,
|
|
|
|
ifa->ifa_netmask, 0, NULL);
|
|
|
|
|
|
|
|
ifa->ifa_rtrequest = arp_rtrequest;
|
2016-04-04 10:37:07 +03:00
|
|
|
ifa->ifa_flags |= RTF_CONNECTED;
|
2006-05-18 13:05:49 +04:00
|
|
|
|
|
|
|
if (rtrequest(RTM_ADD, ifa->ifa_addr,
|
|
|
|
ifa->ifa_addr, ifa->ifa_netmask, 0,
|
|
|
|
NULL) == 0)
|
|
|
|
ifa->ifa_flags |= IFA_ROUTE;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case RTM_DELETE:
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
Take steps to hide the radix_node implementation of the forwarding table
from the forwarding table's users:
Introduce rt_walktree() for walking the routing table and
applying a function to each rtentry. Replace most
rn_walktree() calls with it.
Use rt_getkey()/rt_setkey() to get/set a route's destination.
Keep a pointer to the sockaddr key in the rtentry, so that
rtentry users do not have to grovel in the radix_node for
the key.
Add a RTM_GET method to rtrequest. Use that instead of
radix_node lookups in, e.g., carp(4).
Add sys/net/link_proto.c, which supplies sockaddr routines for
link-layer socket addresses (sockaddr_dl).
Cosmetic:
Constify. KNF. Stop open-coding LIST_FOREACH, TAILQ_FOREACH,
et cetera. Use NULL instead of 0 for null pointers. Use
__arraycount(). Reduce gratuitous parenthesization.
Stop using variadic arguments for rip6_output(), it is
unnecessary.
Remove the unnecessary rtentry member rt_genmask and the
code to maintain it, since nothing actually used it.
Make rt_maskedcopy() easier to read by using meaningful variable
names.
Extract a subroutine intern_netmask() for looking up a netmask in
the masks table.
Start converting backslash-ridden IPv6 macros in
sys/netinet6/in6_var.h into inline subroutines that one
can read without special eyeglasses.
One functional change: when the kernel serves an RTM_GET, RTM_LOCK,
or RTM_CHANGE request, it applies the netmask (if supplied) to a
destination before searching for it in the forwarding table.
I have changed sys/netinet/ip_carp.c, carp_setroute(), to remove
the unlawful radix_node knowledge.
Apart from the changes to carp(4), netiso, ATM, and strip(4), I
have run the changes on three nodes in my wireless routing testbed,
which involves IPv4 + IPv6 dynamic routing acrobatics, and it's
working beautifully so far.
2007-07-20 00:48:52 +04:00
|
|
|
if (rt != NULL) {
|
Make the routing table and rtcaches MP-safe
See the following descriptions for details.
Proposed on tech-kern and tech-net
Overview
--------
We protect the routing table with a rwock and protect
rtcaches with another rwlock. Each rtentry is protected
from being freed or updated via reference counting and psref.
Global rwlocks
--------------
There are two rwlocks; one for the routing table (rt_lock) and
the other for rtcaches (rtcache_lock). rtcache_lock covers
all existing rtcaches; there may have room for optimizations
(future work).
The locking order is rtcache_lock first and rt_lock is next.
rtentry references
------------------
References to an rtentry is managed with reference counting
and psref. Either of the two mechanisms is used depending on
where a rtentry is obtained. Reference counting is used when
we obtain a rtentry from the routing table directly via
rtalloc1 and rtrequest{,1} while psref is used when we obtain
a rtentry from a rtcache via rtcache_* APIs. In both cases,
a caller can sleep/block with holding an obtained rtentry.
The reasons why we use two different mechanisms are (i) only
using reference counting hurts the performance due to atomic
instructions (rtcache case) (ii) ease of implementation;
applying psref to APIs such rtaloc1 and rtrequest{,1} requires
additional works (adding a local variable and an argument).
We will finally migrate to use only psref but we can do it
when we have a lockless routing table alternative.
Reference counting for rtentry
------------------------------
rt_refcnt now doesn't count permanent references such as for
rt_timers and rtcaches, instead it is used only for temporal
references when obtaining a rtentry via rtalloc1 and rtrequest{,1}.
We can do so because destroying a rtentry always involves
removing references of rt_timers and rtcaches to the rtentry
and we don't need to track such references. This also makes
it easy to wait for readers to release references on deleting
or updating a rtentry, i.e., we can simply wait until the
reference counter is 0 or 1. (If there are permanent references
the counter can be arbitrary.)
rt_ref increments a reference counter of a rtentry and rt_unref
decrements it. rt_ref is called inside APIs (rtalloc1 and
rtrequest{,1} so users don't need to care about it while
users must call rt_unref to an obtained rtentry after using it.
rtfree is removed and we use rt_unref and rt_free instead.
rt_unref now just decrements the counter of a given rtentry
and rt_free just tries to destroy a given rtentry.
See the next section for destructions of rtentries by rt_free.
Destructions of rtentries
-------------------------
We destroy a rtentry only when we call rtrequst{,1}(RTM_DELETE);
the original implementation can destroy in any rtfree where it's
the last reference. If we use reference counting or psref, it's
easy to understand if the place that a rtentry is destroyed is
fixed.
rt_free waits for references to a given rtentry to be released
before actually destroying the rtentry. rt_free uses a condition
variable (cv_wait) (and psref_target_destroy for psref) to wait.
Unfortunately rtrequst{,1}(RTM_DELETE) can be called in softint
that we cannot use cv_wait. In that case, we have to defer the
destruction to a workqueue.
rtentry#rt_cv, rtentry#rt_psref and global variables
(see rt_free_global) are added to conduct the procedure.
Updates of rtentries
--------------------
One difficulty to use refcnt/psref instead of rwlock for rtentry
is updates of rtentries. We need an additional mechanism to
prevent readers from seeing inconsistency of a rtentry being
updated.
We introduce RTF_UPDATING flag to rtentries that are updating.
While the flag is set to a rtentry, users cannot acquire the
rtentry. By doing so, we avoid users to see inconsistent
rtentries.
There are two options when a user tries to acquire a rtentry
with the RTF_UPDATING flag; if a user runs in softint context
the user fails to acquire a rtentry (NULL is returned).
Otherwise a user waits until the update completes by waiting
on cv.
The procedure of a updater is simpler to destruction of
a rtentry. Wait on cv (and psref) and after all readers left,
proceed with the update.
Global variables (see rt_update_global) are added to conduct
the procedure.
Currently we apply the mechanism to only RTM_CHANGE in
rtsock.c. We would have to apply other codes. See
"Known issues" section.
psref for rtentry
-----------------
When we obtain a rtentry from a rtcache via rtcache_* APIs,
psref is used to reference to the rtentry.
rtcache_ref acquires a reference to a rtentry with psref
and rtcache_unref releases the reference after using it.
rtcache_ref is called inside rtcache_* APIs and users don't
need to take care of it while users must call rtcache_unref
to release the reference.
struct psref and int bound that is needed for psref is
embedded into struct route. By doing so we don't need to
add local variables and additional argument to APIs.
However this adds another constraint to psref other than
reference counting one's; holding a reference of an rtentry
via a rtcache is allowed by just one caller at the same time.
So we must not acquire a rtentry via a rtcache twice and
avoid a recursive use of a rtcache. And also a rtcache must
be arranged to be used by a LWP/softint at the same time
somehow. For IP forwarding case, we have per-CPU rtcaches
used in softint so the constraint is guaranteed. For a h
rtcache of a PCB case, the constraint is guaranteed by the
solock of each PCB. Any other cases (pf, ipf, stf and ipsec)
are currently guaranteed by only the existence of the global
locks (softnet_lock and/or KERNEL_LOCK). If we've found the
cases that we cannot guarantee the constraint, we would need
to introduce other rtcache APIs that use simple reference
counting.
psref of rtcache is created with IPL_SOFTNET and so rtcache
shouldn't used at an IPL higher than IPL_SOFTNET.
Note that rtcache_free is used to invalidate a given rtcache.
We don't need another care by my change; just keep them as
they are.
Performance impact
------------------
When NET_MPSAFE is disabled the performance drop is 3% while
when it's enabled the drop is increased to 11%. The difference
comes from that currently we don't take any global locks and
don't use psref if NET_MPSAFE is disabled.
We can optimize the performance of the case of NET_MPSAFE
on by reducing lookups of rtcache that uses psref;
currently we do two lookups but we should be able to trim
one of two. This is a future work.
Known issues
------------
There are two known issues to be solved; one is that
a caller of rtrequest(RTM_ADD) may change rtentry (see rtinit).
We need to prevent new references during the update. Or
we may be able to remove the code (perhaps, need more
investigations).
The other is rtredirect that updates a rtentry. We need
to apply our update mechanism, however it's not easy because
rtredirect is called in softint and we cannot apply our
mechanism simply. One solution is to defer rtredirect to
a workqueue but it requires some code restructuring.
2016-12-12 06:55:57 +03:00
|
|
|
rt_unref(rt);
|
Take steps to hide the radix_node implementation of the forwarding table
from the forwarding table's users:
Introduce rt_walktree() for walking the routing table and
applying a function to each rtentry. Replace most
rn_walktree() calls with it.
Use rt_getkey()/rt_setkey() to get/set a route's destination.
Keep a pointer to the sockaddr key in the rtentry, so that
rtentry users do not have to grovel in the radix_node for
the key.
Add a RTM_GET method to rtrequest. Use that instead of
radix_node lookups in, e.g., carp(4).
Add sys/net/link_proto.c, which supplies sockaddr routines for
link-layer socket addresses (sockaddr_dl).
Cosmetic:
Constify. KNF. Stop open-coding LIST_FOREACH, TAILQ_FOREACH,
et cetera. Use NULL instead of 0 for null pointers. Use
__arraycount(). Reduce gratuitous parenthesization.
Stop using variadic arguments for rip6_output(), it is
unnecessary.
Remove the unnecessary rtentry member rt_genmask and the
code to maintain it, since nothing actually used it.
Make rt_maskedcopy() easier to read by using meaningful variable
names.
Extract a subroutine intern_netmask() for looking up a netmask in
the masks table.
Start converting backslash-ridden IPv6 macros in
sys/netinet6/in6_var.h into inline subroutines that one
can read without special eyeglasses.
One functional change: when the kernel serves an RTM_GET, RTM_LOCK,
or RTM_CHANGE request, it applies the netmask (if supplied) to a
destination before searching for it in the forwarding table.
I have changed sys/netinet/ip_carp.c, carp_setroute(), to remove
the unlawful radix_node knowledge.
Apart from the changes to carp(4), netiso, ATM, and strip(4), I
have run the changes on three nodes in my wireless routing testbed,
which involves IPv4 + IPv6 dynamic routing acrobatics, and it's
working beautifully so far.
2007-07-20 00:48:52 +04:00
|
|
|
rt = NULL;
|
|
|
|
}
|
2006-05-18 13:05:49 +04:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef INET6
|
|
|
|
case AF_INET6:
|
|
|
|
if (cmd == RTM_ADD)
|
2015-02-26 12:54:46 +03:00
|
|
|
in6_ifaddlocal(ifa);
|
2006-05-18 13:05:49 +04:00
|
|
|
else
|
2015-02-26 12:54:46 +03:00
|
|
|
in6_ifremlocal(ifa);
|
2006-05-18 13:05:49 +04:00
|
|
|
break;
|
|
|
|
#endif /* INET6 */
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
2017-11-22 10:40:45 +03:00
|
|
|
s = pserialize_read_enter();
|
|
|
|
ifa_release(ifa, &psref);
|
2006-05-18 13:05:49 +04:00
|
|
|
}
|
2017-11-22 10:40:45 +03:00
|
|
|
pserialize_read_exit(s);
|
|
|
|
curlwp_bindx(bound);
|
2014-05-13 23:36:16 +04:00
|
|
|
KERNEL_UNLOCK_ONE(NULL);
|
2006-05-18 13:05:49 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* process input packet.
|
|
|
|
* we have rearranged checks order compared to the rfc,
|
|
|
|
* but it seems more efficient this way or not possible otherwise.
|
|
|
|
*/
|
2017-02-02 05:52:10 +03:00
|
|
|
static void
|
|
|
|
_carp_proto_input(struct mbuf *m, int hlen, int proto)
|
2006-05-18 13:05:49 +04:00
|
|
|
{
|
|
|
|
struct ip *ip = mtod(m, struct ip *);
|
|
|
|
struct carp_softc *sc = NULL;
|
|
|
|
struct carp_header *ch;
|
2013-10-18 23:48:36 +04:00
|
|
|
int iplen, len;
|
2016-06-10 16:31:43 +03:00
|
|
|
struct ifnet *rcvif;
|
2006-05-18 13:05:49 +04:00
|
|
|
|
2008-04-15 10:03:28 +04:00
|
|
|
CARP_STATINC(CARP_STAT_IPACKETS);
|
2014-04-04 16:53:04 +04:00
|
|
|
MCLAIM(m, &carp_proto_mowner_rx);
|
2006-05-18 13:05:49 +04:00
|
|
|
|
|
|
|
if (!carp_opts[CARPCTL_ALLOW]) {
|
|
|
|
m_freem(m);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2016-06-10 16:31:43 +03:00
|
|
|
rcvif = m_get_rcvif_NOMPSAFE(m);
|
2006-05-18 13:05:49 +04:00
|
|
|
/* check if received on a valid carp interface */
|
2016-06-10 16:31:43 +03:00
|
|
|
if (rcvif->if_type != IFT_CARP) {
|
2008-04-15 10:03:28 +04:00
|
|
|
CARP_STATINC(CARP_STAT_BADIF);
|
2006-05-18 13:05:49 +04:00
|
|
|
CARP_LOG(sc, ("packet received on non-carp interface: %s",
|
2016-06-10 16:31:43 +03:00
|
|
|
rcvif->if_xname));
|
2006-05-18 13:05:49 +04:00
|
|
|
m_freem(m);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* verify that the IP TTL is 255. */
|
|
|
|
if (ip->ip_ttl != CARP_DFLTTL) {
|
2008-04-15 10:03:28 +04:00
|
|
|
CARP_STATINC(CARP_STAT_BADTTL);
|
2006-05-18 13:05:49 +04:00
|
|
|
CARP_LOG(sc, ("received ttl %d != %d on %s", ip->ip_ttl,
|
2016-06-10 16:31:43 +03:00
|
|
|
CARP_DFLTTL, rcvif->if_xname));
|
2006-05-18 13:05:49 +04:00
|
|
|
m_freem(m);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* verify that the received packet length is
|
|
|
|
* equal to the CARP header
|
|
|
|
*/
|
|
|
|
iplen = ip->ip_hl << 2;
|
|
|
|
len = iplen + sizeof(*ch);
|
|
|
|
if (len > m->m_pkthdr.len) {
|
2008-04-15 10:03:28 +04:00
|
|
|
CARP_STATINC(CARP_STAT_BADLEN);
|
2006-05-18 13:05:49 +04:00
|
|
|
CARP_LOG(sc, ("packet too short %d on %s", m->m_pkthdr.len,
|
2016-06-10 16:31:43 +03:00
|
|
|
rcvif->if_xname));
|
2006-05-18 13:05:49 +04:00
|
|
|
m_freem(m);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((m = m_pullup(m, len)) == NULL) {
|
2008-04-15 10:03:28 +04:00
|
|
|
CARP_STATINC(CARP_STAT_HDROPS);
|
2006-05-18 13:05:49 +04:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
ip = mtod(m, struct ip *);
|
|
|
|
ch = (struct carp_header *)((char *)ip + iplen);
|
|
|
|
/* verify the CARP checksum */
|
|
|
|
m->m_data += iplen;
|
|
|
|
if (carp_cksum(m, len - iplen)) {
|
2008-04-15 10:03:28 +04:00
|
|
|
CARP_STATINC(CARP_STAT_BADSUM);
|
2006-05-18 13:05:49 +04:00
|
|
|
CARP_LOG(sc, ("checksum failed on %s",
|
2016-06-10 16:31:43 +03:00
|
|
|
rcvif->if_xname));
|
2006-05-18 13:05:49 +04:00
|
|
|
m_freem(m);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
m->m_data -= iplen;
|
|
|
|
|
|
|
|
carp_proto_input_c(m, ch, AF_INET);
|
|
|
|
}
|
|
|
|
|
2017-02-02 05:52:10 +03:00
|
|
|
void
|
|
|
|
carp_proto_input(struct mbuf *m, ...)
|
|
|
|
{
|
|
|
|
|
|
|
|
wqinput_input(carp_wqinput, m, 0, 0);
|
|
|
|
}
|
|
|
|
|
2006-05-18 13:05:49 +04:00
|
|
|
#ifdef INET6
|
2017-02-02 05:52:10 +03:00
|
|
|
static void
|
|
|
|
_carp6_proto_input(struct mbuf *m, int off, int proto)
|
2006-05-18 13:05:49 +04:00
|
|
|
{
|
|
|
|
struct carp_softc *sc = NULL;
|
|
|
|
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
|
|
|
|
struct carp_header *ch;
|
|
|
|
u_int len;
|
2016-06-10 16:31:43 +03:00
|
|
|
struct ifnet *rcvif;
|
2006-05-18 13:05:49 +04:00
|
|
|
|
2008-04-15 10:03:28 +04:00
|
|
|
CARP_STATINC(CARP_STAT_IPACKETS6);
|
2014-04-04 16:53:04 +04:00
|
|
|
MCLAIM(m, &carp_proto6_mowner_rx);
|
2006-05-18 13:05:49 +04:00
|
|
|
|
|
|
|
if (!carp_opts[CARPCTL_ALLOW]) {
|
|
|
|
m_freem(m);
|
2017-02-02 05:52:10 +03:00
|
|
|
return;
|
2006-05-18 13:05:49 +04:00
|
|
|
}
|
|
|
|
|
2016-06-10 16:31:43 +03:00
|
|
|
rcvif = m_get_rcvif_NOMPSAFE(m);
|
|
|
|
|
2006-05-18 13:05:49 +04:00
|
|
|
/* check if received on a valid carp interface */
|
2016-06-10 16:31:43 +03:00
|
|
|
if (rcvif->if_type != IFT_CARP) {
|
2008-04-15 10:03:28 +04:00
|
|
|
CARP_STATINC(CARP_STAT_BADIF);
|
2006-05-18 13:05:49 +04:00
|
|
|
CARP_LOG(sc, ("packet received on non-carp interface: %s",
|
2016-06-10 16:31:43 +03:00
|
|
|
rcvif->if_xname));
|
2006-05-18 13:05:49 +04:00
|
|
|
m_freem(m);
|
2017-02-02 05:52:10 +03:00
|
|
|
return;
|
2006-05-18 13:05:49 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* verify that the IP TTL is 255 */
|
|
|
|
if (ip6->ip6_hlim != CARP_DFLTTL) {
|
2008-04-15 10:03:28 +04:00
|
|
|
CARP_STATINC(CARP_STAT_BADTTL);
|
2006-05-18 13:05:49 +04:00
|
|
|
CARP_LOG(sc, ("received ttl %d != %d on %s", ip6->ip6_hlim,
|
2016-06-10 16:31:43 +03:00
|
|
|
CARP_DFLTTL, rcvif->if_xname));
|
2006-05-18 13:05:49 +04:00
|
|
|
m_freem(m);
|
2017-02-02 05:52:10 +03:00
|
|
|
return;
|
2006-05-18 13:05:49 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* verify that we have a complete carp packet */
|
|
|
|
len = m->m_len;
|
2018-05-18 21:58:51 +03:00
|
|
|
M_REGION_GET(ch, struct carp_header *, m, off, sizeof(*ch));
|
2006-05-18 13:05:49 +04:00
|
|
|
if (ch == NULL) {
|
2008-04-15 10:03:28 +04:00
|
|
|
CARP_STATINC(CARP_STAT_BADLEN);
|
2006-05-18 13:05:49 +04:00
|
|
|
CARP_LOG(sc, ("packet size %u too small", len));
|
2017-02-02 05:52:10 +03:00
|
|
|
return;
|
2006-05-18 13:05:49 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* verify the CARP checksum */
|
2017-02-27 11:26:53 +03:00
|
|
|
if (carp6_cksum(m, off, sizeof(*ch))) {
|
2008-04-15 10:03:28 +04:00
|
|
|
CARP_STATINC(CARP_STAT_BADSUM);
|
2016-06-10 16:31:43 +03:00
|
|
|
CARP_LOG(sc, ("checksum failed, on %s", rcvif->if_xname));
|
2006-05-18 13:05:49 +04:00
|
|
|
m_freem(m);
|
2017-02-02 05:52:10 +03:00
|
|
|
return;
|
2006-05-18 13:05:49 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
carp_proto_input_c(m, ch, AF_INET6);
|
2017-02-02 05:52:10 +03:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
carp6_proto_input(struct mbuf **mp, int *offp, int proto)
|
|
|
|
{
|
|
|
|
|
|
|
|
wqinput_input(carp6_wqinput, *mp, *offp, proto);
|
|
|
|
|
|
|
|
return IPPROTO_DONE;
|
2006-05-18 13:05:49 +04:00
|
|
|
}
|
|
|
|
#endif /* INET6 */
|
|
|
|
|
2014-07-31 04:56:23 +04:00
|
|
|
static void
|
2006-05-18 13:05:49 +04:00
|
|
|
carp_proto_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
|
|
|
|
{
|
|
|
|
struct carp_softc *sc;
|
|
|
|
u_int64_t tmp_counter;
|
|
|
|
struct timeval sc_tv, ch_tv;
|
|
|
|
|
|
|
|
TAILQ_FOREACH(sc, &((struct carp_if *)
|
2016-06-10 16:31:43 +03:00
|
|
|
m_get_rcvif_NOMPSAFE(m)->if_carpdev->if_carp)->vhif_vrs, sc_list)
|
2006-05-18 13:05:49 +04:00
|
|
|
if (sc->sc_vhid == ch->carp_vhid)
|
|
|
|
break;
|
|
|
|
|
|
|
|
if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
|
|
|
|
(IFF_UP|IFF_RUNNING)) {
|
2008-04-15 10:03:28 +04:00
|
|
|
CARP_STATINC(CARP_STAT_BADVHID);
|
2006-05-18 13:05:49 +04:00
|
|
|
m_freem(m);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2006-05-24 17:39:37 +04:00
|
|
|
/*
|
|
|
|
* Check if our own advertisement was duplicated
|
|
|
|
* from a non simplex interface.
|
|
|
|
* XXX If there is no address on our physical interface
|
|
|
|
* there is no way to distinguish our ads from the ones
|
|
|
|
* another carp host might have sent us.
|
|
|
|
*/
|
|
|
|
if ((sc->sc_carpdev->if_flags & IFF_SIMPLEX) == 0) {
|
|
|
|
struct sockaddr sa;
|
|
|
|
struct ifaddr *ifa;
|
2016-08-01 06:15:30 +03:00
|
|
|
int s;
|
2006-05-24 17:39:37 +04:00
|
|
|
|
2009-03-18 19:00:08 +03:00
|
|
|
memset(&sa, 0, sizeof(sa));
|
2006-05-24 17:39:37 +04:00
|
|
|
sa.sa_family = af;
|
2016-08-01 06:15:30 +03:00
|
|
|
s = pserialize_read_enter();
|
2006-05-24 17:39:37 +04:00
|
|
|
ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
|
|
|
|
|
|
|
|
if (ifa && af == AF_INET) {
|
|
|
|
struct ip *ip = mtod(m, struct ip *);
|
|
|
|
if (ip->ip_src.s_addr ==
|
|
|
|
ifatoia(ifa)->ia_addr.sin_addr.s_addr) {
|
2016-08-01 06:15:30 +03:00
|
|
|
pserialize_read_exit(s);
|
2006-05-24 17:39:37 +04:00
|
|
|
m_freem(m);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#ifdef INET6
|
|
|
|
if (ifa && af == AF_INET6) {
|
|
|
|
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
|
|
|
|
struct in6_addr in6_src, in6_found;
|
|
|
|
|
|
|
|
in6_src = ip6->ip6_src;
|
|
|
|
in6_found = ifatoia6(ifa)->ia_addr.sin6_addr;
|
|
|
|
if (IN6_IS_ADDR_LINKLOCAL(&in6_src))
|
|
|
|
in6_src.s6_addr16[1] = 0;
|
|
|
|
if (IN6_IS_ADDR_LINKLOCAL(&in6_found))
|
|
|
|
in6_found.s6_addr16[1] = 0;
|
|
|
|
if (IN6_ARE_ADDR_EQUAL(&in6_src, &in6_found)) {
|
2016-08-01 06:15:30 +03:00
|
|
|
pserialize_read_exit(s);
|
2006-05-24 17:39:37 +04:00
|
|
|
m_freem(m);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif /* INET6 */
|
2016-08-01 06:15:30 +03:00
|
|
|
pserialize_read_exit(s);
|
2006-05-24 17:39:37 +04:00
|
|
|
}
|
|
|
|
|
2009-01-11 05:45:45 +03:00
|
|
|
nanotime(&sc->sc_if.if_lastchange);
|
2006-05-18 13:05:49 +04:00
|
|
|
sc->sc_if.if_ipackets++;
|
|
|
|
sc->sc_if.if_ibytes += m->m_pkthdr.len;
|
|
|
|
|
|
|
|
/* verify the CARP version. */
|
|
|
|
if (ch->carp_version != CARP_VERSION) {
|
2008-04-15 10:03:28 +04:00
|
|
|
CARP_STATINC(CARP_STAT_BADVER);
|
2006-05-18 13:05:49 +04:00
|
|
|
sc->sc_if.if_ierrors++;
|
|
|
|
CARP_LOG(sc, ("invalid version %d != %d",
|
|
|
|
ch->carp_version, CARP_VERSION));
|
|
|
|
m_freem(m);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* verify the hash */
|
|
|
|
if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
|
2016-07-23 16:37:10 +03:00
|
|
|
struct ip *ip;
|
2017-04-19 06:23:06 +03:00
|
|
|
char ipbuf[INET_ADDRSTRLEN];
|
|
|
|
#ifdef INET6
|
2016-07-23 16:37:10 +03:00
|
|
|
struct ip6_hdr *ip6;
|
2017-01-16 10:33:36 +03:00
|
|
|
char ip6buf[INET6_ADDRSTRLEN];
|
2017-04-19 06:23:06 +03:00
|
|
|
#endif
|
2016-07-23 16:37:10 +03:00
|
|
|
|
2008-04-15 10:03:28 +04:00
|
|
|
CARP_STATINC(CARP_STAT_BADAUTH);
|
2006-05-18 13:05:49 +04:00
|
|
|
sc->sc_if.if_ierrors++;
|
2016-07-23 16:37:10 +03:00
|
|
|
|
|
|
|
switch(af) {
|
|
|
|
case AF_INET:
|
|
|
|
ip = mtod(m, struct ip *);
|
|
|
|
CARP_LOG(sc, ("incorrect hash from %s",
|
2017-05-12 20:53:53 +03:00
|
|
|
IN_PRINT(ipbuf, &ip->ip_src)));
|
2016-07-23 16:37:10 +03:00
|
|
|
break;
|
|
|
|
|
2017-04-19 06:23:06 +03:00
|
|
|
#ifdef INET6
|
2016-07-23 16:37:10 +03:00
|
|
|
case AF_INET6:
|
|
|
|
ip6 = mtod(m, struct ip6_hdr *);
|
|
|
|
CARP_LOG(sc, ("incorrect hash from %s",
|
2017-01-16 18:44:46 +03:00
|
|
|
IN6_PRINT(ip6buf, &ip6->ip6_src)));
|
2016-07-23 16:37:10 +03:00
|
|
|
break;
|
2017-04-19 06:23:06 +03:00
|
|
|
#endif
|
2016-07-23 16:37:10 +03:00
|
|
|
|
|
|
|
default: CARP_LOG(sc, ("incorrect hash"));
|
|
|
|
break;
|
|
|
|
}
|
2006-05-18 13:05:49 +04:00
|
|
|
m_freem(m);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
tmp_counter = ntohl(ch->carp_counter[0]);
|
|
|
|
tmp_counter = tmp_counter<<32;
|
|
|
|
tmp_counter += ntohl(ch->carp_counter[1]);
|
|
|
|
|
|
|
|
/* XXX Replay protection goes here */
|
|
|
|
|
|
|
|
sc->sc_init_counter = 0;
|
|
|
|
sc->sc_counter = tmp_counter;
|
|
|
|
|
|
|
|
|
|
|
|
sc_tv.tv_sec = sc->sc_advbase;
|
|
|
|
if (carp_suppress_preempt && sc->sc_advskew < 240)
|
|
|
|
sc_tv.tv_usec = 240 * 1000000 / 256;
|
|
|
|
else
|
|
|
|
sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
|
|
|
|
ch_tv.tv_sec = ch->carp_advbase;
|
|
|
|
ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
|
|
|
|
|
|
|
|
switch (sc->sc_state) {
|
|
|
|
case INIT:
|
|
|
|
break;
|
|
|
|
case MASTER:
|
|
|
|
/*
|
|
|
|
* If we receive an advertisement from a backup who's going to
|
|
|
|
* be more frequent than us, go into BACKUP state.
|
|
|
|
*/
|
|
|
|
if (timercmp(&sc_tv, &ch_tv, >) ||
|
|
|
|
timercmp(&sc_tv, &ch_tv, ==)) {
|
|
|
|
callout_stop(&sc->sc_ad_tmo);
|
2009-06-07 10:11:18 +04:00
|
|
|
CARP_LOG(sc, ("MASTER -> BACKUP (more frequent advertisement received)"));
|
2006-05-18 13:05:49 +04:00
|
|
|
carp_set_state(sc, BACKUP);
|
|
|
|
carp_setrun(sc, 0);
|
|
|
|
carp_setroute(sc, RTM_DELETE);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case BACKUP:
|
|
|
|
/*
|
|
|
|
* If we're pre-empting masters who advertise slower than us,
|
|
|
|
* and this one claims to be slower, treat him as down.
|
|
|
|
*/
|
|
|
|
if (carp_opts[CARPCTL_PREEMPT] && timercmp(&sc_tv, &ch_tv, <)) {
|
2009-06-07 10:11:18 +04:00
|
|
|
CARP_LOG(sc, ("BACKUP -> MASTER (preempting a slower master)"));
|
2006-05-18 13:05:49 +04:00
|
|
|
carp_master_down(sc);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the master is going to advertise at such a low frequency
|
|
|
|
* that he's guaranteed to time out, we'd might as well just
|
|
|
|
* treat him as timed out now.
|
|
|
|
*/
|
|
|
|
sc_tv.tv_sec = sc->sc_advbase * 3;
|
|
|
|
if (timercmp(&sc_tv, &ch_tv, <)) {
|
2009-06-07 10:11:18 +04:00
|
|
|
CARP_LOG(sc, ("BACKUP -> MASTER (master timed out)"));
|
2006-05-18 13:05:49 +04:00
|
|
|
carp_master_down(sc);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Otherwise, we reset the counter and wait for the next
|
|
|
|
* advertisement.
|
|
|
|
*/
|
|
|
|
carp_setrun(sc, af);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
m_freem(m);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Interface side of the CARP implementation.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* ARGSUSED */
|
|
|
|
void
|
2006-11-16 04:32:37 +03:00
|
|
|
carpattach(int n)
|
2006-05-18 13:05:49 +04:00
|
|
|
{
|
|
|
|
if_clone_attach(&carp_cloner);
|
2008-04-15 10:03:28 +04:00
|
|
|
|
|
|
|
carpstat_percpu = percpu_alloc(sizeof(uint64_t) * CARP_NSTATS);
|
2006-05-18 13:05:49 +04:00
|
|
|
}
|
|
|
|
|
2014-07-31 04:56:23 +04:00
|
|
|
static int
|
2006-05-18 13:05:49 +04:00
|
|
|
carp_clone_create(struct if_clone *ifc, int unit)
|
|
|
|
{
|
|
|
|
extern int ifqmaxlen;
|
|
|
|
struct carp_softc *sc;
|
|
|
|
struct ifnet *ifp;
|
2017-10-23 12:31:17 +03:00
|
|
|
int rv;
|
2006-05-18 13:05:49 +04:00
|
|
|
|
2008-12-19 21:49:37 +03:00
|
|
|
sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT|M_ZERO);
|
2006-05-18 13:05:49 +04:00
|
|
|
if (!sc)
|
|
|
|
return (ENOMEM);
|
|
|
|
|
|
|
|
sc->sc_suppress = 0;
|
|
|
|
sc->sc_advbase = CARP_DFLTINTV;
|
|
|
|
sc->sc_vhid = -1; /* required setting */
|
|
|
|
sc->sc_advskew = 0;
|
|
|
|
sc->sc_init_counter = 1;
|
|
|
|
sc->sc_naddrs = sc->sc_naddrs6 = 0;
|
|
|
|
#ifdef INET6
|
|
|
|
sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
|
|
|
|
#endif /* INET6 */
|
|
|
|
|
2007-07-10 00:51:58 +04:00
|
|
|
callout_init(&sc->sc_ad_tmo, 0);
|
|
|
|
callout_init(&sc->sc_md_tmo, 0);
|
|
|
|
callout_init(&sc->sc_md6_tmo, 0);
|
2006-05-18 13:05:49 +04:00
|
|
|
|
|
|
|
callout_setfunc(&sc->sc_ad_tmo, carp_send_ad, sc);
|
|
|
|
callout_setfunc(&sc->sc_md_tmo, carp_master_down, sc);
|
|
|
|
callout_setfunc(&sc->sc_md6_tmo, carp_master_down, sc);
|
|
|
|
|
|
|
|
LIST_INIT(&sc->carp_mc_listhead);
|
|
|
|
ifp = &sc->sc_if;
|
|
|
|
ifp->if_softc = sc;
|
|
|
|
snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name,
|
|
|
|
unit);
|
|
|
|
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
|
|
|
|
ifp->if_ioctl = carp_ioctl;
|
|
|
|
ifp->if_start = carp_start;
|
|
|
|
IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
|
|
|
|
IFQ_SET_READY(&ifp->if_snd);
|
2017-10-23 12:31:17 +03:00
|
|
|
rv = if_initialize(ifp);
|
|
|
|
if (rv != 0) {
|
|
|
|
callout_destroy(&sc->sc_ad_tmo);
|
|
|
|
callout_destroy(&sc->sc_md_tmo);
|
|
|
|
callout_destroy(&sc->sc_md6_tmo);
|
|
|
|
free(ifp->if_softc, M_DEVBUF);
|
|
|
|
|
|
|
|
return rv;
|
|
|
|
}
|
2016-12-28 10:26:24 +03:00
|
|
|
ether_ifattach(ifp, NULL);
|
2006-05-18 13:05:49 +04:00
|
|
|
carp_set_enaddr(sc);
|
2016-12-28 10:26:24 +03:00
|
|
|
/* Overwrite ethernet defaults */
|
|
|
|
ifp->if_type = IFT_CARP;
|
|
|
|
ifp->if_output = carp_output;
|
|
|
|
if_register(ifp);
|
|
|
|
|
2006-05-18 13:05:49 +04:00
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2014-07-31 04:56:23 +04:00
|
|
|
static int
|
2006-05-18 13:05:49 +04:00
|
|
|
carp_clone_destroy(struct ifnet *ifp)
|
|
|
|
{
|
2007-07-10 00:51:58 +04:00
|
|
|
struct carp_softc *sc = ifp->if_softc;
|
|
|
|
|
2006-05-18 13:05:49 +04:00
|
|
|
carpdetach(ifp->if_softc);
|
|
|
|
ether_ifdetach(ifp);
|
|
|
|
if_detach(ifp);
|
2007-07-10 00:51:58 +04:00
|
|
|
callout_destroy(&sc->sc_ad_tmo);
|
|
|
|
callout_destroy(&sc->sc_md_tmo);
|
|
|
|
callout_destroy(&sc->sc_md6_tmo);
|
2006-05-18 13:05:49 +04:00
|
|
|
free(ifp->if_softc, M_DEVBUF);
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2014-07-31 04:56:23 +04:00
|
|
|
static void
|
2006-05-18 13:05:49 +04:00
|
|
|
carpdetach(struct carp_softc *sc)
|
|
|
|
{
|
|
|
|
struct carp_if *cif;
|
|
|
|
int s;
|
|
|
|
|
|
|
|
callout_stop(&sc->sc_ad_tmo);
|
|
|
|
callout_stop(&sc->sc_md_tmo);
|
|
|
|
callout_stop(&sc->sc_md6_tmo);
|
|
|
|
|
|
|
|
if (sc->sc_suppress)
|
|
|
|
carp_suppress_preempt--;
|
|
|
|
sc->sc_suppress = 0;
|
|
|
|
|
|
|
|
if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
|
|
|
|
carp_suppress_preempt--;
|
|
|
|
sc->sc_sendad_errors = 0;
|
|
|
|
|
|
|
|
carp_set_state(sc, INIT);
|
|
|
|
sc->sc_if.if_flags &= ~IFF_UP;
|
|
|
|
carp_setrun(sc, 0);
|
|
|
|
carp_multicast_cleanup(sc);
|
|
|
|
|
2014-05-13 23:36:16 +04:00
|
|
|
KERNEL_LOCK(1, NULL);
|
2006-05-18 13:05:49 +04:00
|
|
|
s = splnet();
|
|
|
|
if (sc->sc_carpdev != NULL) {
|
|
|
|
/* XXX linkstatehook removal */
|
|
|
|
cif = (struct carp_if *)sc->sc_carpdev->if_carp;
|
|
|
|
TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
|
|
|
|
if (!--cif->vhif_nvrs) {
|
|
|
|
ifpromisc(sc->sc_carpdev, 0);
|
|
|
|
sc->sc_carpdev->if_carp = NULL;
|
2008-12-17 23:51:31 +03:00
|
|
|
free(cif, M_IFADDR);
|
2006-05-18 13:05:49 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
sc->sc_carpdev = NULL;
|
|
|
|
splx(s);
|
2014-05-13 23:36:16 +04:00
|
|
|
KERNEL_UNLOCK_ONE(NULL);
|
2006-05-18 13:05:49 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Detach an interface from the carp. */
|
|
|
|
void
|
|
|
|
carp_ifdetach(struct ifnet *ifp)
|
|
|
|
{
|
|
|
|
struct carp_softc *sc, *nextsc;
|
|
|
|
struct carp_if *cif = (struct carp_if *)ifp->if_carp;
|
|
|
|
|
|
|
|
for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) {
|
|
|
|
nextsc = TAILQ_NEXT(sc, sc_list);
|
|
|
|
carpdetach(sc);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-03-21 18:33:25 +03:00
|
|
|
static void
|
2006-11-16 04:32:37 +03:00
|
|
|
carp_prepare_ad(struct mbuf *m, struct carp_softc *sc,
|
2006-10-12 05:30:41 +04:00
|
|
|
struct carp_header *ch)
|
2006-05-18 13:05:49 +04:00
|
|
|
{
|
|
|
|
if (sc->sc_init_counter) {
|
|
|
|
/* this could also be seconds since unix epoch */
|
First step of random number subsystem rework described in
<20111022023242.BA26F14A158@mail.netbsd.org>. This change includes
the following:
An initial cleanup and minor reorganization of the entropy pool
code in sys/dev/rnd.c and sys/dev/rndpool.c. Several bugs are
fixed. Some effort is made to accumulate entropy more quickly at
boot time.
A generic interface, "rndsink", is added, for stream generators to
request that they be re-keyed with good quality entropy from the pool
as soon as it is available.
The arc4random()/arc4randbytes() implementation in libkern is
adjusted to use the rndsink interface for rekeying, which helps
address the problem of low-quality keys at boot time.
An implementation of the FIPS 140-2 statistical tests for random
number generator quality is provided (libkern/rngtest.c). This
is based on Greg Rose's implementation from Qualcomm.
A new random stream generator, nist_ctr_drbg, is provided. It is
based on an implementation of the NIST SP800-90 CTR_DRBG by
Henric Jungheim. This generator users AES in a modified counter
mode to generate a backtracking-resistant random stream.
An abstraction layer, "cprng", is provided for in-kernel consumers
of randomness. The arc4random/arc4randbytes API is deprecated for
in-kernel use. It is replaced by "cprng_strong". The current
cprng_fast implementation wraps the existing arc4random
implementation. The current cprng_strong implementation wraps the
new CTR_DRBG implementation. Both interfaces are rekeyed from
the entropy pool automatically at intervals justifiable from best
current cryptographic practice.
In some quick tests, cprng_fast() is about the same speed as
the old arc4randbytes(), and cprng_strong() is about 20% faster
than rnd_extract_data(). Performance is expected to improve.
The AES code in src/crypto/rijndael is no longer an optional
kernel component, as it is required by cprng_strong, which is
not an optional kernel component.
The entropy pool output is subjected to the rngtest tests at
startup time; if it fails, the system will reboot. There is
approximately a 3/10000 chance of a false positive from these
tests. Entropy pool _input_ from hardware random numbers is
subjected to the rngtest tests at attach time, as well as the
FIPS continuous-output test, to detect bad or stuck hardware
RNGs; if any are detected, they are detached, but the system
continues to run.
A problem with rndctl(8) is fixed -- datastructures with
pointers in arrays are no longer passed to userspace (this
was not a security problem, but rather a major issue for
compat32). A new kernel will require a new rndctl.
The sysctl kern.arandom() and kern.urandom() nodes are hooked
up to the new generators, but the /dev/*random pseudodevices
are not, yet.
Manual pages for the new kernel interfaces are forthcoming.
2011-11-20 02:51:18 +04:00
|
|
|
sc->sc_counter = cprng_fast64();
|
2006-05-18 13:05:49 +04:00
|
|
|
} else
|
|
|
|
sc->sc_counter++;
|
|
|
|
|
|
|
|
ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff);
|
|
|
|
ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff);
|
|
|
|
|
|
|
|
carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
|
|
|
|
}
|
|
|
|
|
2014-07-31 04:56:23 +04:00
|
|
|
static void
|
2006-05-18 13:05:49 +04:00
|
|
|
carp_send_ad_all(void)
|
|
|
|
{
|
|
|
|
struct ifnet *ifp;
|
|
|
|
struct carp_if *cif;
|
|
|
|
struct carp_softc *vh;
|
2016-05-12 05:24:16 +03:00
|
|
|
int s;
|
2016-06-16 05:38:40 +03:00
|
|
|
int bound = curlwp_bind();
|
2006-05-18 13:05:49 +04:00
|
|
|
|
2016-05-12 05:24:16 +03:00
|
|
|
s = pserialize_read_enter();
|
|
|
|
IFNET_READER_FOREACH(ifp) {
|
|
|
|
struct psref psref;
|
2006-05-18 13:05:49 +04:00
|
|
|
if (ifp->if_carp == NULL || ifp->if_type == IFT_CARP)
|
|
|
|
continue;
|
|
|
|
|
2017-03-14 12:03:08 +03:00
|
|
|
if_acquire(ifp, &psref);
|
2016-05-12 05:24:16 +03:00
|
|
|
pserialize_read_exit(s);
|
|
|
|
|
2006-05-18 13:05:49 +04:00
|
|
|
cif = (struct carp_if *)ifp->if_carp;
|
|
|
|
TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
|
|
|
|
if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
|
|
|
|
(IFF_UP|IFF_RUNNING) && vh->sc_state == MASTER)
|
|
|
|
carp_send_ad(vh);
|
|
|
|
}
|
2016-05-12 05:24:16 +03:00
|
|
|
|
|
|
|
s = pserialize_read_enter();
|
2017-03-14 12:03:08 +03:00
|
|
|
if_release(ifp, &psref);
|
2006-05-18 13:05:49 +04:00
|
|
|
}
|
2016-05-12 05:24:16 +03:00
|
|
|
pserialize_read_exit(s);
|
2016-06-16 05:38:40 +03:00
|
|
|
curlwp_bindx(bound);
|
2006-05-18 13:05:49 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-07-31 04:56:23 +04:00
|
|
|
static void
|
2006-05-18 13:05:49 +04:00
|
|
|
carp_send_ad(void *v)
|
|
|
|
{
|
|
|
|
struct carp_header ch;
|
|
|
|
struct timeval tv;
|
|
|
|
struct carp_softc *sc = v;
|
|
|
|
struct carp_header *ch_ptr;
|
|
|
|
struct mbuf *m;
|
|
|
|
int error, len, advbase, advskew, s;
|
|
|
|
struct sockaddr sa;
|
|
|
|
|
2014-05-13 23:36:16 +04:00
|
|
|
KERNEL_LOCK(1, NULL);
|
2006-05-18 13:05:49 +04:00
|
|
|
s = splsoftnet();
|
|
|
|
|
|
|
|
advbase = advskew = 0; /* Sssssh compiler */
|
|
|
|
if (sc->sc_carpdev == NULL) {
|
|
|
|
sc->sc_if.if_oerrors++;
|
|
|
|
goto retry_later;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* bow out if we've gone to backup (the carp interface is going down) */
|
|
|
|
if (sc->sc_bow_out) {
|
|
|
|
sc->sc_bow_out = 0;
|
|
|
|
advbase = 255;
|
|
|
|
advskew = 255;
|
|
|
|
} else {
|
|
|
|
advbase = sc->sc_advbase;
|
|
|
|
if (!carp_suppress_preempt || sc->sc_advskew > 240)
|
|
|
|
advskew = sc->sc_advskew;
|
|
|
|
else
|
|
|
|
advskew = 240;
|
|
|
|
tv.tv_sec = advbase;
|
|
|
|
tv.tv_usec = advskew * 1000000 / 256;
|
|
|
|
}
|
|
|
|
|
|
|
|
ch.carp_version = CARP_VERSION;
|
|
|
|
ch.carp_type = CARP_ADVERTISEMENT;
|
|
|
|
ch.carp_vhid = sc->sc_vhid;
|
|
|
|
ch.carp_advbase = advbase;
|
|
|
|
ch.carp_advskew = advskew;
|
|
|
|
ch.carp_authlen = 7; /* XXX DEFINE */
|
|
|
|
ch.carp_pad1 = 0; /* must be zero */
|
|
|
|
ch.carp_cksum = 0;
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef INET
|
|
|
|
if (sc->sc_naddrs) {
|
|
|
|
struct ip *ip;
|
2016-08-01 06:15:30 +03:00
|
|
|
struct ifaddr *ifa;
|
|
|
|
int _s;
|
2006-05-18 13:05:49 +04:00
|
|
|
|
|
|
|
MGETHDR(m, M_DONTWAIT, MT_HEADER);
|
|
|
|
if (m == NULL) {
|
|
|
|
sc->sc_if.if_oerrors++;
|
2008-04-15 10:03:28 +04:00
|
|
|
CARP_STATINC(CARP_STAT_ONOMEM);
|
2006-05-18 13:05:49 +04:00
|
|
|
/* XXX maybe less ? */
|
|
|
|
goto retry_later;
|
|
|
|
}
|
2014-04-04 16:53:04 +04:00
|
|
|
MCLAIM(m, &carp_proto_mowner_tx);
|
2006-05-18 13:05:49 +04:00
|
|
|
len = sizeof(*ip) + sizeof(ch);
|
|
|
|
m->m_pkthdr.len = len;
|
2016-06-10 16:27:10 +03:00
|
|
|
m_reset_rcvif(m);
|
2006-05-18 13:05:49 +04:00
|
|
|
m->m_len = len;
|
|
|
|
MH_ALIGN(m, m->m_len);
|
|
|
|
m->m_flags |= M_MCAST;
|
|
|
|
ip = mtod(m, struct ip *);
|
|
|
|
ip->ip_v = IPVERSION;
|
|
|
|
ip->ip_hl = sizeof(*ip) >> 2;
|
|
|
|
ip->ip_tos = IPTOS_LOWDELAY;
|
|
|
|
ip->ip_len = htons(len);
|
2007-12-21 05:07:54 +03:00
|
|
|
ip->ip_id = 0; /* no need for id, we don't support fragments */
|
2006-05-18 13:05:49 +04:00
|
|
|
ip->ip_off = htons(IP_DF);
|
|
|
|
ip->ip_ttl = CARP_DFLTTL;
|
|
|
|
ip->ip_p = IPPROTO_CARP;
|
|
|
|
ip->ip_sum = 0;
|
|
|
|
|
2009-03-18 19:00:08 +03:00
|
|
|
memset(&sa, 0, sizeof(sa));
|
2006-05-18 13:05:49 +04:00
|
|
|
sa.sa_family = AF_INET;
|
2016-08-01 06:15:30 +03:00
|
|
|
_s = pserialize_read_enter();
|
2006-05-18 13:05:49 +04:00
|
|
|
ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
|
|
|
|
if (ifa == NULL)
|
|
|
|
ip->ip_src.s_addr = 0;
|
|
|
|
else
|
|
|
|
ip->ip_src.s_addr =
|
|
|
|
ifatoia(ifa)->ia_addr.sin_addr.s_addr;
|
2016-08-01 06:15:30 +03:00
|
|
|
pserialize_read_exit(_s);
|
2006-05-18 13:05:49 +04:00
|
|
|
ip->ip_dst.s_addr = INADDR_CARP_GROUP;
|
|
|
|
|
|
|
|
ch_ptr = (struct carp_header *)(&ip[1]);
|
2009-04-18 18:58:02 +04:00
|
|
|
memcpy(ch_ptr, &ch, sizeof(ch));
|
2018-03-21 18:33:25 +03:00
|
|
|
carp_prepare_ad(m, sc, ch_ptr);
|
2006-05-18 13:05:49 +04:00
|
|
|
|
|
|
|
m->m_data += sizeof(*ip);
|
|
|
|
ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip));
|
|
|
|
m->m_data -= sizeof(*ip);
|
|
|
|
|
2009-01-11 05:45:45 +03:00
|
|
|
nanotime(&sc->sc_if.if_lastchange);
|
2006-05-18 13:05:49 +04:00
|
|
|
sc->sc_if.if_opackets++;
|
|
|
|
sc->sc_if.if_obytes += len;
|
2008-04-15 10:03:28 +04:00
|
|
|
CARP_STATINC(CARP_STAT_OPACKETS);
|
2006-05-18 13:05:49 +04:00
|
|
|
|
|
|
|
error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo,
|
|
|
|
NULL);
|
|
|
|
if (error) {
|
|
|
|
if (error == ENOBUFS)
|
2008-04-15 10:03:28 +04:00
|
|
|
CARP_STATINC(CARP_STAT_ONOMEM);
|
2006-05-18 13:05:49 +04:00
|
|
|
else
|
|
|
|
CARP_LOG(sc, ("ip_output failed: %d", error));
|
|
|
|
sc->sc_if.if_oerrors++;
|
|
|
|
if (sc->sc_sendad_errors < INT_MAX)
|
|
|
|
sc->sc_sendad_errors++;
|
|
|
|
if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
|
|
|
|
carp_suppress_preempt++;
|
|
|
|
if (carp_suppress_preempt == 1)
|
|
|
|
carp_send_ad_all();
|
|
|
|
}
|
|
|
|
sc->sc_sendad_success = 0;
|
|
|
|
} else {
|
|
|
|
if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
|
|
|
|
if (++sc->sc_sendad_success >=
|
|
|
|
CARP_SENDAD_MIN_SUCCESS) {
|
|
|
|
carp_suppress_preempt--;
|
|
|
|
sc->sc_sendad_errors = 0;
|
|
|
|
}
|
|
|
|
} else
|
|
|
|
sc->sc_sendad_errors = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif /* INET */
|
2017-02-27 11:26:53 +03:00
|
|
|
#ifdef INET6
|
2006-05-18 13:05:49 +04:00
|
|
|
if (sc->sc_naddrs6) {
|
|
|
|
struct ip6_hdr *ip6;
|
2016-08-01 06:15:30 +03:00
|
|
|
struct ifaddr *ifa;
|
|
|
|
int _s;
|
2006-05-18 13:05:49 +04:00
|
|
|
|
|
|
|
MGETHDR(m, M_DONTWAIT, MT_HEADER);
|
|
|
|
if (m == NULL) {
|
|
|
|
sc->sc_if.if_oerrors++;
|
2008-04-15 10:03:28 +04:00
|
|
|
CARP_STATINC(CARP_STAT_ONOMEM);
|
2006-05-18 13:05:49 +04:00
|
|
|
/* XXX maybe less ? */
|
|
|
|
goto retry_later;
|
|
|
|
}
|
2014-04-04 16:53:04 +04:00
|
|
|
MCLAIM(m, &carp_proto6_mowner_tx);
|
2006-05-18 13:05:49 +04:00
|
|
|
len = sizeof(*ip6) + sizeof(ch);
|
|
|
|
m->m_pkthdr.len = len;
|
2016-06-10 16:27:10 +03:00
|
|
|
m_reset_rcvif(m);
|
2006-05-18 13:05:49 +04:00
|
|
|
m->m_len = len;
|
|
|
|
MH_ALIGN(m, m->m_len);
|
|
|
|
m->m_flags |= M_MCAST;
|
|
|
|
ip6 = mtod(m, struct ip6_hdr *);
|
2009-03-18 19:00:08 +03:00
|
|
|
memset(ip6, 0, sizeof(*ip6));
|
2006-05-18 13:05:49 +04:00
|
|
|
ip6->ip6_vfc |= IPV6_VERSION;
|
|
|
|
ip6->ip6_hlim = CARP_DFLTTL;
|
|
|
|
ip6->ip6_nxt = IPPROTO_CARP;
|
|
|
|
|
|
|
|
/* set the source address */
|
2009-03-18 19:00:08 +03:00
|
|
|
memset(&sa, 0, sizeof(sa));
|
2006-05-18 13:05:49 +04:00
|
|
|
sa.sa_family = AF_INET6;
|
2016-08-01 06:15:30 +03:00
|
|
|
_s = pserialize_read_enter();
|
2006-05-18 13:05:49 +04:00
|
|
|
ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
|
|
|
|
if (ifa == NULL) /* This should never happen with IPv6 */
|
2009-03-18 19:00:08 +03:00
|
|
|
memset(&ip6->ip6_src, 0, sizeof(struct in6_addr));
|
2006-05-18 13:05:49 +04:00
|
|
|
else
|
|
|
|
bcopy(ifatoia6(ifa)->ia_addr.sin6_addr.s6_addr,
|
|
|
|
&ip6->ip6_src, sizeof(struct in6_addr));
|
2016-08-01 06:15:30 +03:00
|
|
|
pserialize_read_exit(_s);
|
2006-05-18 13:05:49 +04:00
|
|
|
/* set the multicast destination */
|
|
|
|
|
2008-03-15 19:44:03 +03:00
|
|
|
ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
|
2006-05-18 13:05:49 +04:00
|
|
|
ip6->ip6_dst.s6_addr8[15] = 0x12;
|
2017-02-27 11:26:53 +03:00
|
|
|
if (in6_setscope(&ip6->ip6_dst, &sc->sc_if, NULL) != 0) {
|
2008-03-15 19:44:03 +03:00
|
|
|
sc->sc_if.if_oerrors++;
|
|
|
|
m_freem(m);
|
|
|
|
CARP_LOG(sc, ("in6_setscope failed"));
|
|
|
|
goto retry_later;
|
|
|
|
}
|
2006-05-18 13:05:49 +04:00
|
|
|
|
|
|
|
ch_ptr = (struct carp_header *)(&ip6[1]);
|
2009-04-18 18:58:02 +04:00
|
|
|
memcpy(ch_ptr, &ch, sizeof(ch));
|
2018-03-21 18:33:25 +03:00
|
|
|
carp_prepare_ad(m, sc, ch_ptr);
|
2006-05-18 13:05:49 +04:00
|
|
|
|
2017-02-27 11:26:53 +03:00
|
|
|
ch_ptr->carp_cksum = carp6_cksum(m, sizeof(*ip6),
|
|
|
|
len - sizeof(*ip6));
|
2006-05-18 13:05:49 +04:00
|
|
|
|
2009-01-11 05:45:45 +03:00
|
|
|
nanotime(&sc->sc_if.if_lastchange);
|
2006-05-18 13:05:49 +04:00
|
|
|
sc->sc_if.if_opackets++;
|
|
|
|
sc->sc_if.if_obytes += len;
|
2008-04-15 10:03:28 +04:00
|
|
|
CARP_STATINC(CARP_STAT_OPACKETS6);
|
2006-05-18 13:05:49 +04:00
|
|
|
|
|
|
|
error = ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL);
|
|
|
|
if (error) {
|
|
|
|
if (error == ENOBUFS)
|
2008-04-15 10:03:28 +04:00
|
|
|
CARP_STATINC(CARP_STAT_ONOMEM);
|
2006-05-18 13:05:49 +04:00
|
|
|
else
|
|
|
|
CARP_LOG(sc, ("ip6_output failed: %d", error));
|
|
|
|
sc->sc_if.if_oerrors++;
|
|
|
|
if (sc->sc_sendad_errors < INT_MAX)
|
|
|
|
sc->sc_sendad_errors++;
|
|
|
|
if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
|
|
|
|
carp_suppress_preempt++;
|
|
|
|
if (carp_suppress_preempt == 1)
|
|
|
|
carp_send_ad_all();
|
|
|
|
}
|
|
|
|
sc->sc_sendad_success = 0;
|
|
|
|
} else {
|
|
|
|
if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
|
|
|
|
if (++sc->sc_sendad_success >=
|
|
|
|
CARP_SENDAD_MIN_SUCCESS) {
|
|
|
|
carp_suppress_preempt--;
|
|
|
|
sc->sc_sendad_errors = 0;
|
|
|
|
}
|
|
|
|
} else
|
|
|
|
sc->sc_sendad_errors = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif /* INET6 */
|
|
|
|
|
|
|
|
retry_later:
|
|
|
|
splx(s);
|
2014-05-13 23:36:16 +04:00
|
|
|
KERNEL_UNLOCK_ONE(NULL);
|
2006-05-18 13:05:49 +04:00
|
|
|
if (advbase != 255 || advskew != 255)
|
|
|
|
callout_schedule(&sc->sc_ad_tmo, tvtohz(&tv));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Broadcast a gratuitous ARP request containing
|
|
|
|
* the virtual router MAC address for each IP address
|
|
|
|
* associated with the virtual router.
|
|
|
|
*/
|
2014-07-31 04:56:23 +04:00
|
|
|
static void
|
2006-05-18 13:05:49 +04:00
|
|
|
carp_send_arp(struct carp_softc *sc)
|
|
|
|
{
|
|
|
|
struct ifaddr *ifa;
|
2017-11-22 10:40:45 +03:00
|
|
|
int s, bound;
|
2006-05-18 13:05:49 +04:00
|
|
|
|
2014-05-13 23:36:16 +04:00
|
|
|
KERNEL_LOCK(1, NULL);
|
2017-11-22 10:40:45 +03:00
|
|
|
bound = curlwp_bind();
|
|
|
|
s = pserialize_read_enter();
|
2016-07-07 12:32:01 +03:00
|
|
|
IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
|
2017-11-22 10:40:45 +03:00
|
|
|
struct psref psref;
|
2006-05-18 13:05:49 +04:00
|
|
|
|
|
|
|
if (ifa->ifa_addr->sa_family != AF_INET)
|
|
|
|
continue;
|
|
|
|
|
2017-11-22 10:40:45 +03:00
|
|
|
ifa_acquire(ifa, &psref);
|
|
|
|
pserialize_read_exit(s);
|
|
|
|
|
2016-10-11 15:32:30 +03:00
|
|
|
arpannounce(sc->sc_carpdev, ifa, CLLADDR(sc->sc_if.if_sadl));
|
2017-11-22 10:40:45 +03:00
|
|
|
|
|
|
|
s = pserialize_read_enter();
|
|
|
|
ifa_release(ifa, &psref);
|
2006-05-18 13:05:49 +04:00
|
|
|
}
|
2017-11-22 10:40:45 +03:00
|
|
|
pserialize_read_exit(s);
|
|
|
|
curlwp_bindx(bound);
|
2014-05-13 23:36:16 +04:00
|
|
|
KERNEL_UNLOCK_ONE(NULL);
|
2006-05-18 13:05:49 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef INET6
|
2014-07-31 04:56:23 +04:00
|
|
|
static void
|
2006-05-18 13:05:49 +04:00
|
|
|
carp_send_na(struct carp_softc *sc)
|
|
|
|
{
|
|
|
|
struct ifaddr *ifa;
|
|
|
|
struct in6_addr *in6;
|
|
|
|
static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
|
2017-11-22 10:40:45 +03:00
|
|
|
int s, bound;
|
2014-05-13 23:36:16 +04:00
|
|
|
|
|
|
|
KERNEL_LOCK(1, NULL);
|
2017-11-22 10:40:45 +03:00
|
|
|
bound = curlwp_bind();
|
|
|
|
s = pserialize_read_enter();
|
2016-07-07 12:32:01 +03:00
|
|
|
IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
|
2017-11-22 10:40:45 +03:00
|
|
|
struct psref psref;
|
2006-05-18 13:05:49 +04:00
|
|
|
|
|
|
|
if (ifa->ifa_addr->sa_family != AF_INET6)
|
|
|
|
continue;
|
|
|
|
|
2017-11-22 10:40:45 +03:00
|
|
|
ifa_acquire(ifa, &psref);
|
|
|
|
pserialize_read_exit(s);
|
|
|
|
|
2006-05-18 13:05:49 +04:00
|
|
|
in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
|
|
|
|
nd6_na_output(sc->sc_carpdev, &mcast, in6,
|
|
|
|
ND_NA_FLAG_OVERRIDE, 1, NULL);
|
2017-11-22 10:40:45 +03:00
|
|
|
|
|
|
|
s = pserialize_read_enter();
|
|
|
|
ifa_release(ifa, &psref);
|
2006-05-18 13:05:49 +04:00
|
|
|
}
|
2017-11-22 10:40:45 +03:00
|
|
|
pserialize_read_exit(s);
|
|
|
|
curlwp_bindx(bound);
|
2014-05-13 23:36:16 +04:00
|
|
|
KERNEL_UNLOCK_ONE(NULL);
|
2006-05-18 13:05:49 +04:00
|
|
|
}
|
|
|
|
#endif /* INET6 */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Based on bridge_hash() in if_bridge.c
|
|
|
|
*/
|
|
|
|
#define mix(a,b,c) \
|
|
|
|
do { \
|
|
|
|
a -= b; a -= c; a ^= (c >> 13); \
|
|
|
|
b -= c; b -= a; b ^= (a << 8); \
|
|
|
|
c -= a; c -= b; c ^= (b >> 13); \
|
|
|
|
a -= b; a -= c; a ^= (c >> 12); \
|
|
|
|
b -= c; b -= a; b ^= (a << 16); \
|
|
|
|
c -= a; c -= b; c ^= (b >> 5); \
|
|
|
|
a -= b; a -= c; a ^= (c >> 3); \
|
|
|
|
b -= c; b -= a; b ^= (a << 10); \
|
|
|
|
c -= a; c -= b; c ^= (b >> 15); \
|
|
|
|
} while (0)
|
|
|
|
|
2014-07-31 04:56:23 +04:00
|
|
|
static u_int32_t
|
2006-05-18 13:05:49 +04:00
|
|
|
carp_hash(struct carp_softc *sc, u_char *src)
|
|
|
|
{
|
|
|
|
u_int32_t a = 0x9e3779b9, b = sc->sc_hashkey[0], c = sc->sc_hashkey[1];
|
|
|
|
|
|
|
|
c += sc->sc_key[3] << 24;
|
|
|
|
c += sc->sc_key[2] << 16;
|
|
|
|
c += sc->sc_key[1] << 8;
|
|
|
|
c += sc->sc_key[0];
|
|
|
|
b += src[5] << 8;
|
|
|
|
b += src[4];
|
|
|
|
a += src[3] << 24;
|
|
|
|
a += src[2] << 16;
|
|
|
|
a += src[1] << 8;
|
|
|
|
a += src[0];
|
|
|
|
|
|
|
|
mix(a, b, c);
|
|
|
|
return (c);
|
|
|
|
}
|
|
|
|
|
2014-07-31 04:56:23 +04:00
|
|
|
static int
|
2006-05-18 13:05:49 +04:00
|
|
|
carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type)
|
|
|
|
{
|
|
|
|
struct carp_softc *vh;
|
|
|
|
struct ifaddr *ifa;
|
|
|
|
int count = 0;
|
|
|
|
|
|
|
|
TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
|
|
|
|
if ((type == CARP_COUNT_RUNNING &&
|
|
|
|
(vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
|
|
|
|
(IFF_UP|IFF_RUNNING)) ||
|
|
|
|
(type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) {
|
2017-11-22 10:40:45 +03:00
|
|
|
int s = pserialize_read_enter();
|
2016-07-07 12:32:01 +03:00
|
|
|
IFADDR_READER_FOREACH(ifa, &vh->sc_if) {
|
2006-05-18 13:05:49 +04:00
|
|
|
if (ifa->ifa_addr->sa_family == AF_INET &&
|
|
|
|
ia->ia_addr.sin_addr.s_addr ==
|
|
|
|
ifatoia(ifa)->ia_addr.sin_addr.s_addr)
|
|
|
|
count++;
|
|
|
|
}
|
2017-11-22 10:40:45 +03:00
|
|
|
pserialize_read_exit(s);
|
2006-05-18 13:05:49 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return (count);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
carp_iamatch(struct in_ifaddr *ia, u_char *src,
|
|
|
|
u_int32_t *count, u_int32_t index)
|
|
|
|
{
|
|
|
|
struct carp_softc *sc = ia->ia_ifp->if_softc;
|
|
|
|
|
|
|
|
if (carp_opts[CARPCTL_ARPBALANCE]) {
|
|
|
|
/*
|
|
|
|
* We use the source ip to decide which virtual host should
|
|
|
|
* handle the request. If we're master of that virtual host,
|
|
|
|
* then we respond, otherwise, just drop the arp packet on
|
|
|
|
* the floor.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* Count the elegible carp interfaces with this address */
|
|
|
|
if (*count == 0)
|
|
|
|
*count = carp_addrcount(
|
|
|
|
(struct carp_if *)ia->ia_ifp->if_carpdev->if_carp,
|
|
|
|
ia, CARP_COUNT_RUNNING);
|
|
|
|
|
|
|
|
/* This should never happen, but... */
|
|
|
|
if (*count == 0)
|
|
|
|
return (0);
|
|
|
|
|
|
|
|
if (carp_hash(sc, src) % *count == index - 1 &&
|
|
|
|
sc->sc_state == MASTER) {
|
|
|
|
return (1);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (sc->sc_state == MASTER)
|
|
|
|
return (1);
|
|
|
|
}
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef INET6
|
|
|
|
struct ifaddr *
|
|
|
|
carp_iamatch6(void *v, struct in6_addr *taddr)
|
|
|
|
{
|
|
|
|
struct carp_if *cif = v;
|
|
|
|
struct carp_softc *vh;
|
|
|
|
struct ifaddr *ifa;
|
|
|
|
|
|
|
|
TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
|
2017-11-22 10:40:45 +03:00
|
|
|
int s = pserialize_read_enter();
|
2016-07-07 12:32:01 +03:00
|
|
|
IFADDR_READER_FOREACH(ifa, &vh->sc_if) {
|
2006-05-18 13:05:49 +04:00
|
|
|
if (IN6_ARE_ADDR_EQUAL(taddr,
|
|
|
|
&ifatoia6(ifa)->ia_addr.sin6_addr) &&
|
|
|
|
((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
|
|
|
|
(IFF_UP|IFF_RUNNING)) && vh->sc_state == MASTER)
|
|
|
|
return (ifa);
|
|
|
|
}
|
2017-11-22 10:40:45 +03:00
|
|
|
pserialize_read_exit(s);
|
2006-05-18 13:05:49 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
#endif /* INET6 */
|
|
|
|
|
|
|
|
struct ifnet *
|
|
|
|
carp_ourether(void *v, struct ether_header *eh, u_char iftype, int src)
|
|
|
|
{
|
|
|
|
struct carp_if *cif = (struct carp_if *)v;
|
|
|
|
struct carp_softc *vh;
|
|
|
|
u_int8_t *ena;
|
|
|
|
|
|
|
|
if (src)
|
|
|
|
ena = (u_int8_t *)&eh->ether_shost;
|
|
|
|
else
|
|
|
|
ena = (u_int8_t *)&eh->ether_dhost;
|
|
|
|
|
|
|
|
switch (iftype) {
|
|
|
|
case IFT_ETHER:
|
|
|
|
case IFT_FDDI:
|
|
|
|
if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1)
|
|
|
|
return (NULL);
|
|
|
|
break;
|
|
|
|
case IFT_ISO88025:
|
|
|
|
if (ena[0] != 3 || ena[1] || ena[4] || ena[5])
|
|
|
|
return (NULL);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return (NULL);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list)
|
|
|
|
if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
|
|
|
|
(IFF_UP|IFF_RUNNING) && vh->sc_state == MASTER &&
|
2009-03-18 18:14:29 +03:00
|
|
|
!memcmp(ena, CLLADDR(vh->sc_if.if_sadl),
|
2006-05-18 13:05:49 +04:00
|
|
|
ETHER_ADDR_LEN)) {
|
|
|
|
return (&vh->sc_if);
|
|
|
|
}
|
|
|
|
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
carp_input(struct mbuf *m, u_int8_t *shost, u_int8_t *dhost, u_int16_t etype)
|
|
|
|
{
|
|
|
|
struct ether_header eh;
|
2016-06-10 16:31:43 +03:00
|
|
|
struct carp_if *cif = (struct carp_if *)m_get_rcvif_NOMPSAFE(m)->if_carp;
|
2006-05-18 13:05:49 +04:00
|
|
|
struct ifnet *ifp;
|
|
|
|
|
2009-04-18 18:58:02 +04:00
|
|
|
memcpy(&eh.ether_shost, shost, sizeof(eh.ether_shost));
|
|
|
|
memcpy(&eh.ether_dhost, dhost, sizeof(eh.ether_dhost));
|
2006-05-18 13:05:49 +04:00
|
|
|
eh.ether_type = etype;
|
|
|
|
|
|
|
|
if (m->m_flags & (M_BCAST|M_MCAST)) {
|
|
|
|
struct carp_softc *vh;
|
|
|
|
struct mbuf *m0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* XXX Should really check the list of multicast addresses
|
|
|
|
* for each CARP interface _before_ copying.
|
|
|
|
*/
|
|
|
|
TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
|
|
|
|
m0 = m_copym(m, 0, M_COPYALL, M_DONTWAIT);
|
|
|
|
if (m0 == NULL)
|
|
|
|
continue;
|
2016-06-10 16:27:10 +03:00
|
|
|
m_set_rcvif(m0, &vh->sc_if);
|
2006-05-18 13:05:49 +04:00
|
|
|
ether_input(&vh->sc_if, m0);
|
|
|
|
}
|
|
|
|
return (1);
|
|
|
|
}
|
|
|
|
|
2016-06-10 16:31:43 +03:00
|
|
|
ifp = carp_ourether(cif, &eh, m_get_rcvif_NOMPSAFE(m)->if_type, 0);
|
2006-05-18 13:05:49 +04:00
|
|
|
if (ifp == NULL) {
|
|
|
|
return (1);
|
|
|
|
}
|
|
|
|
|
2016-06-10 16:27:10 +03:00
|
|
|
m_set_rcvif(m, ifp);
|
2006-05-18 13:05:49 +04:00
|
|
|
|
2018-06-26 09:47:57 +03:00
|
|
|
bpf_mtap(ifp, m, BPF_D_IN);
|
2006-05-18 13:05:49 +04:00
|
|
|
ifp->if_ipackets++;
|
|
|
|
ether_input(ifp, m);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2014-07-31 04:56:23 +04:00
|
|
|
static void
|
2006-05-18 13:05:49 +04:00
|
|
|
carp_master_down(void *v)
|
|
|
|
{
|
|
|
|
struct carp_softc *sc = v;
|
|
|
|
|
|
|
|
switch (sc->sc_state) {
|
|
|
|
case INIT:
|
|
|
|
printf("%s: master_down event in INIT state\n",
|
|
|
|
sc->sc_if.if_xname);
|
|
|
|
break;
|
|
|
|
case MASTER:
|
|
|
|
break;
|
|
|
|
case BACKUP:
|
2009-06-07 10:11:18 +04:00
|
|
|
CARP_LOG(sc, ("INIT -> MASTER (preempting)"));
|
2006-05-18 13:05:49 +04:00
|
|
|
carp_set_state(sc, MASTER);
|
|
|
|
carp_send_ad(sc);
|
|
|
|
carp_send_arp(sc);
|
|
|
|
#ifdef INET6
|
|
|
|
carp_send_na(sc);
|
|
|
|
#endif /* INET6 */
|
|
|
|
carp_setrun(sc, 0);
|
|
|
|
carp_setroute(sc, RTM_ADD);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* When in backup state, af indicates whether to reset the master down timer
|
|
|
|
* for v4 or v6. If it's set to zero, reset the ones which are already pending.
|
|
|
|
*/
|
2014-07-31 04:56:23 +04:00
|
|
|
static void
|
2006-05-18 13:05:49 +04:00
|
|
|
carp_setrun(struct carp_softc *sc, sa_family_t af)
|
|
|
|
{
|
|
|
|
struct timeval tv;
|
|
|
|
|
|
|
|
if (sc->sc_carpdev == NULL) {
|
|
|
|
sc->sc_if.if_flags &= ~IFF_RUNNING;
|
|
|
|
carp_set_state(sc, INIT);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (sc->sc_if.if_flags & IFF_UP && sc->sc_vhid > 0 &&
|
|
|
|
(sc->sc_naddrs || sc->sc_naddrs6) && !sc->sc_suppress) {
|
|
|
|
sc->sc_if.if_flags |= IFF_RUNNING;
|
|
|
|
} else {
|
|
|
|
sc->sc_if.if_flags &= ~IFF_RUNNING;
|
|
|
|
carp_setroute(sc, RTM_DELETE);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (sc->sc_state) {
|
|
|
|
case INIT:
|
|
|
|
carp_set_state(sc, BACKUP);
|
|
|
|
carp_setroute(sc, RTM_DELETE);
|
|
|
|
carp_setrun(sc, 0);
|
|
|
|
break;
|
|
|
|
case BACKUP:
|
|
|
|
callout_stop(&sc->sc_ad_tmo);
|
|
|
|
tv.tv_sec = 3 * sc->sc_advbase;
|
|
|
|
tv.tv_usec = sc->sc_advskew * 1000000 / 256;
|
|
|
|
switch (af) {
|
|
|
|
#ifdef INET
|
|
|
|
case AF_INET:
|
|
|
|
callout_schedule(&sc->sc_md_tmo, tvtohz(&tv));
|
|
|
|
break;
|
|
|
|
#endif /* INET */
|
2017-02-27 11:26:53 +03:00
|
|
|
#ifdef INET6
|
2006-05-18 13:05:49 +04:00
|
|
|
case AF_INET6:
|
|
|
|
callout_schedule(&sc->sc_md6_tmo, tvtohz(&tv));
|
|
|
|
break;
|
|
|
|
#endif /* INET6 */
|
|
|
|
default:
|
|
|
|
if (sc->sc_naddrs)
|
|
|
|
callout_schedule(&sc->sc_md_tmo, tvtohz(&tv));
|
2017-02-27 11:26:53 +03:00
|
|
|
#ifdef INET6
|
2006-05-18 13:05:49 +04:00
|
|
|
if (sc->sc_naddrs6)
|
|
|
|
callout_schedule(&sc->sc_md6_tmo, tvtohz(&tv));
|
2016-07-23 15:19:07 +03:00
|
|
|
#endif /* INET6 */
|
2006-05-18 13:05:49 +04:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case MASTER:
|
|
|
|
tv.tv_sec = sc->sc_advbase;
|
|
|
|
tv.tv_usec = sc->sc_advskew * 1000000 / 256;
|
|
|
|
callout_schedule(&sc->sc_ad_tmo, tvtohz(&tv));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-07-31 04:56:23 +04:00
|
|
|
static void
|
2006-05-18 13:05:49 +04:00
|
|
|
carp_multicast_cleanup(struct carp_softc *sc)
|
|
|
|
{
|
|
|
|
struct ip_moptions *imo = &sc->sc_imo;
|
|
|
|
#ifdef INET6
|
|
|
|
struct ip6_moptions *im6o = &sc->sc_im6o;
|
|
|
|
#endif
|
|
|
|
u_int16_t n = imo->imo_num_memberships;
|
|
|
|
|
|
|
|
/* Clean up our own multicast memberships */
|
|
|
|
while (n-- > 0) {
|
|
|
|
if (imo->imo_membership[n] != NULL) {
|
|
|
|
in_delmulti(imo->imo_membership[n]);
|
|
|
|
imo->imo_membership[n] = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
imo->imo_num_memberships = 0;
|
2016-06-21 06:28:27 +03:00
|
|
|
imo->imo_multicast_if_index = 0;
|
2006-05-18 13:05:49 +04:00
|
|
|
|
|
|
|
#ifdef INET6
|
|
|
|
while (!LIST_EMPTY(&im6o->im6o_memberships)) {
|
|
|
|
struct in6_multi_mship *imm =
|
|
|
|
LIST_FIRST(&im6o->im6o_memberships);
|
|
|
|
|
|
|
|
LIST_REMOVE(imm, i6mm_chain);
|
|
|
|
in6_leavegroup(imm);
|
|
|
|
}
|
2016-06-21 06:28:27 +03:00
|
|
|
im6o->im6o_multicast_if_index = 0;
|
2006-05-18 13:05:49 +04:00
|
|
|
#endif
|
|
|
|
|
|
|
|
/* And any other multicast memberships */
|
|
|
|
carp_ether_purgemulti(sc);
|
|
|
|
}
|
|
|
|
|
2014-07-31 04:56:23 +04:00
|
|
|
static int
|
2006-05-18 13:05:49 +04:00
|
|
|
carp_set_ifp(struct carp_softc *sc, struct ifnet *ifp)
|
|
|
|
{
|
|
|
|
struct carp_if *cif, *ncif = NULL;
|
|
|
|
struct carp_softc *vr, *after = NULL;
|
|
|
|
int myself = 0, error = 0;
|
|
|
|
int s;
|
|
|
|
|
|
|
|
if (ifp == sc->sc_carpdev)
|
|
|
|
return (0);
|
|
|
|
|
|
|
|
if (ifp != NULL) {
|
|
|
|
if ((ifp->if_flags & IFF_MULTICAST) == 0)
|
|
|
|
return (EADDRNOTAVAIL);
|
|
|
|
|
|
|
|
if (ifp->if_type == IFT_CARP)
|
|
|
|
return (EINVAL);
|
|
|
|
|
|
|
|
if (ifp->if_carp == NULL) {
|
2008-12-17 23:51:31 +03:00
|
|
|
ncif = malloc(sizeof(*cif), M_IFADDR, M_NOWAIT);
|
2006-05-18 13:05:49 +04:00
|
|
|
if (ncif == NULL)
|
|
|
|
return (ENOBUFS);
|
|
|
|
if ((error = ifpromisc(ifp, 1))) {
|
2008-12-17 23:51:31 +03:00
|
|
|
free(ncif, M_IFADDR);
|
2006-05-18 13:05:49 +04:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
ncif->vhif_ifp = ifp;
|
|
|
|
TAILQ_INIT(&ncif->vhif_vrs);
|
|
|
|
} else {
|
|
|
|
cif = (struct carp_if *)ifp->if_carp;
|
|
|
|
TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
|
|
|
|
if (vr != sc && vr->sc_vhid == sc->sc_vhid)
|
|
|
|
return (EINVAL);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* detach from old interface */
|
|
|
|
if (sc->sc_carpdev != NULL)
|
|
|
|
carpdetach(sc);
|
|
|
|
|
|
|
|
/* join multicast groups */
|
|
|
|
if (sc->sc_naddrs < 0 &&
|
|
|
|
(error = carp_join_multicast(sc)) != 0) {
|
|
|
|
if (ncif != NULL)
|
2008-12-17 23:51:31 +03:00
|
|
|
free(ncif, M_IFADDR);
|
2006-05-18 13:05:49 +04:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef INET6
|
|
|
|
if (sc->sc_naddrs6 < 0 &&
|
|
|
|
(error = carp_join_multicast6(sc)) != 0) {
|
|
|
|
if (ncif != NULL)
|
2008-12-17 23:51:31 +03:00
|
|
|
free(ncif, M_IFADDR);
|
2006-05-18 13:05:49 +04:00
|
|
|
carp_multicast_cleanup(sc);
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* attach carp interface to physical interface */
|
|
|
|
if (ncif != NULL)
|
2007-03-04 08:59:00 +03:00
|
|
|
ifp->if_carp = (void *)ncif;
|
2006-05-18 13:05:49 +04:00
|
|
|
sc->sc_carpdev = ifp;
|
2012-08-20 18:14:32 +04:00
|
|
|
sc->sc_if.if_capabilities = ifp->if_capabilities &
|
|
|
|
(IFCAP_TSOv4 | IFCAP_TSOv6 |
|
|
|
|
IFCAP_CSUM_IPv4_Tx|IFCAP_CSUM_IPv4_Rx|
|
|
|
|
IFCAP_CSUM_TCPv4_Tx|IFCAP_CSUM_TCPv4_Rx|
|
|
|
|
IFCAP_CSUM_UDPv4_Tx|IFCAP_CSUM_UDPv4_Rx|
|
|
|
|
IFCAP_CSUM_TCPv6_Tx|IFCAP_CSUM_TCPv6_Rx|
|
|
|
|
IFCAP_CSUM_UDPv6_Tx|IFCAP_CSUM_UDPv6_Rx);
|
|
|
|
|
2006-05-18 13:05:49 +04:00
|
|
|
cif = (struct carp_if *)ifp->if_carp;
|
|
|
|
TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
|
|
|
|
if (vr == sc)
|
|
|
|
myself = 1;
|
|
|
|
if (vr->sc_vhid < sc->sc_vhid)
|
|
|
|
after = vr;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!myself) {
|
|
|
|
/* We're trying to keep things in order */
|
|
|
|
if (after == NULL) {
|
|
|
|
TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
|
|
|
|
} else {
|
|
|
|
TAILQ_INSERT_AFTER(&cif->vhif_vrs, after,
|
|
|
|
sc, sc_list);
|
|
|
|
}
|
|
|
|
cif->vhif_nvrs++;
|
|
|
|
}
|
|
|
|
if (sc->sc_naddrs || sc->sc_naddrs6)
|
|
|
|
sc->sc_if.if_flags |= IFF_UP;
|
|
|
|
carp_set_enaddr(sc);
|
2014-05-13 23:36:16 +04:00
|
|
|
KERNEL_LOCK(1, NULL);
|
2006-05-18 13:05:49 +04:00
|
|
|
s = splnet();
|
|
|
|
/* XXX linkstatehooks establish */
|
|
|
|
carp_carpdev_state(ifp);
|
|
|
|
splx(s);
|
2014-05-13 23:36:16 +04:00
|
|
|
KERNEL_UNLOCK_ONE(NULL);
|
2006-05-18 13:05:49 +04:00
|
|
|
} else {
|
|
|
|
carpdetach(sc);
|
|
|
|
sc->sc_if.if_flags &= ~(IFF_UP|IFF_RUNNING);
|
|
|
|
}
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2014-07-31 04:56:23 +04:00
|
|
|
static void
|
2006-05-18 13:05:49 +04:00
|
|
|
carp_set_enaddr(struct carp_softc *sc)
|
|
|
|
{
|
Use malloc(9) for sockaddrs instead of pool(9), and remove dom_sa_pool
and dom_sa_len members from struct domain. Pools of fixed-size
objects are too rigid for sockaddr_dls, whose size can vary over
a wide range.
Return sockaddr_dl to its "historical" size. Now that I'm using
malloc(9) instead of pool(9) to allocate sockaddr_dl, I can create
a sockaddr_dl of any size in the kernel, so expanding sockaddr_dl
is useless.
Avoid using sizeof(struct sockaddr_dl) in the kernel.
Introduce sockaddr_dl_alloc() for allocating & initializing an
arbitrary sockaddr_dl on the heap.
Add an argument, the sockaddr length, to sockaddr_alloc(),
sockaddr_copy(), and sockaddr_dl_setaddr().
Constify: LLADDR() -> CLLADDR().
Where the kernel overwrites LLADDR(), use sockaddr_dl_setaddr(),
instead. Used properly, sockaddr_dl_setaddr() will not overrun
the end of the sockaddr.
2007-08-30 06:17:34 +04:00
|
|
|
uint8_t enaddr[ETHER_ADDR_LEN];
|
2006-05-18 13:05:49 +04:00
|
|
|
if (sc->sc_carpdev && sc->sc_carpdev->if_type == IFT_ISO88025) {
|
Use malloc(9) for sockaddrs instead of pool(9), and remove dom_sa_pool
and dom_sa_len members from struct domain. Pools of fixed-size
objects are too rigid for sockaddr_dls, whose size can vary over
a wide range.
Return sockaddr_dl to its "historical" size. Now that I'm using
malloc(9) instead of pool(9) to allocate sockaddr_dl, I can create
a sockaddr_dl of any size in the kernel, so expanding sockaddr_dl
is useless.
Avoid using sizeof(struct sockaddr_dl) in the kernel.
Introduce sockaddr_dl_alloc() for allocating & initializing an
arbitrary sockaddr_dl on the heap.
Add an argument, the sockaddr length, to sockaddr_alloc(),
sockaddr_copy(), and sockaddr_dl_setaddr().
Constify: LLADDR() -> CLLADDR().
Where the kernel overwrites LLADDR(), use sockaddr_dl_setaddr(),
instead. Used properly, sockaddr_dl_setaddr() will not overrun
the end of the sockaddr.
2007-08-30 06:17:34 +04:00
|
|
|
enaddr[0] = 3;
|
|
|
|
enaddr[1] = 0;
|
|
|
|
enaddr[2] = 0x40 >> (sc->sc_vhid - 1);
|
|
|
|
enaddr[3] = 0x40000 >> (sc->sc_vhid - 1);
|
|
|
|
enaddr[4] = 0;
|
|
|
|
enaddr[5] = 0;
|
2006-05-18 13:05:49 +04:00
|
|
|
} else {
|
Use malloc(9) for sockaddrs instead of pool(9), and remove dom_sa_pool
and dom_sa_len members from struct domain. Pools of fixed-size
objects are too rigid for sockaddr_dls, whose size can vary over
a wide range.
Return sockaddr_dl to its "historical" size. Now that I'm using
malloc(9) instead of pool(9) to allocate sockaddr_dl, I can create
a sockaddr_dl of any size in the kernel, so expanding sockaddr_dl
is useless.
Avoid using sizeof(struct sockaddr_dl) in the kernel.
Introduce sockaddr_dl_alloc() for allocating & initializing an
arbitrary sockaddr_dl on the heap.
Add an argument, the sockaddr length, to sockaddr_alloc(),
sockaddr_copy(), and sockaddr_dl_setaddr().
Constify: LLADDR() -> CLLADDR().
Where the kernel overwrites LLADDR(), use sockaddr_dl_setaddr(),
instead. Used properly, sockaddr_dl_setaddr() will not overrun
the end of the sockaddr.
2007-08-30 06:17:34 +04:00
|
|
|
enaddr[0] = 0;
|
|
|
|
enaddr[1] = 0;
|
|
|
|
enaddr[2] = 0x5e;
|
|
|
|
enaddr[3] = 0;
|
|
|
|
enaddr[4] = 1;
|
|
|
|
enaddr[5] = sc->sc_vhid;
|
2006-05-18 13:05:49 +04:00
|
|
|
}
|
*** Summary ***
When a link-layer address changes (e.g., ifconfig ex0 link
02:de:ad:be:ef:02 active), send a gratuitous ARP and/or a Neighbor
Advertisement to update the network-/link-layer address bindings
on our LAN peers.
Refuse a change of ethernet address to the address 00:00:00:00:00:00
or to any multicast/broadcast address. (Thanks matt@.)
Reorder ifnet ioctl operations so that driver ioctls may inherit
the functions of their "class"---ether_ioctl(), fddi_ioctl(), et
cetera---and the class ioctls may inherit from the generic ioctl,
ifioctl_common(), but both driver- and class-ioctls may override
the generic behavior. Make network drivers share more code.
Distinguish a "factory" link-layer address from others for the
purposes of both protecting that address from deletion and computing
EUI64.
Return consistent, appropriate error codes from network drivers.
Improve readability. KNF.
*** Details ***
In if_attach(), always initialize the interface ioctl routine,
ifnet->if_ioctl, if the driver has not already initialized it.
Delete if_ioctl == NULL tests everywhere else, because it cannot
happen.
In the ioctl routines of network interfaces, inherit common ioctl
behaviors by calling either ifioctl_common() or whichever ioctl
routine is appropriate for the class of interface---e.g., ether_ioctl()
for ethernets.
Stop (ab)using SIOCSIFADDR and start to use SIOCINITIFADDR. In
the user->kernel interface, SIOCSIFADDR's argument was an ifreq,
but on the protocol->ifnet interface, SIOCSIFADDR's argument was
an ifaddr. That was confusing, and it would work against me as I
make it possible for a network interface to overload most ioctls.
On the protocol->ifnet interface, replace SIOCSIFADDR with
SIOCINITIFADDR. In ifioctl(), return EPERM if userland tries to
invoke SIOCINITIFADDR.
In ifioctl(), give the interface the first shot at handling most
interface ioctls, and give the protocol the second shot, instead
of the other way around. Finally, let compatibility code (COMPAT_OSOCK)
take a shot.
Pull device initialization out of switch statements under
SIOCINITIFADDR. For example, pull ..._init() out of any switch
statement that looks like this:
switch (...->sa_family) {
case ...:
..._init();
...
break;
...
default:
..._init();
...
break;
}
Rewrite many if-else clauses that handle all permutations of IFF_UP
and IFF_RUNNING to use a switch statement,
switch (x & (IFF_UP|IFF_RUNNING)) {
case 0:
...
break;
case IFF_RUNNING:
...
break;
case IFF_UP:
...
break;
case IFF_UP|IFF_RUNNING:
...
break;
}
unifdef lots of code containing #ifdef FreeBSD, #ifdef NetBSD, and
#ifdef SIOCSIFMTU, especially in fwip(4) and in ndis(4).
In ipw(4), remove an if_set_sadl() call that is out of place.
In nfe(4), reuse the jumbo MTU logic in ether_ioctl().
Let ethernets register a callback for setting h/w state such as
promiscuous mode and the multicast filter in accord with a change
in the if_flags: ether_set_ifflags_cb() registers a callback that
returns ENETRESET if the caller should reset the ethernet by calling
if_init(), 0 on success, != 0 on failure. Pull common code from
ex(4), gem(4), nfe(4), sip(4), tlp(4), vge(4) into ether_ioctl(),
and register if_flags callbacks for those drivers.
Return ENOTTY instead of EINVAL for inappropriate ioctls. In
zyd(4), use ENXIO instead of ENOTTY to indicate that the device is
not any longer attached.
Add to if_set_sadl() a boolean 'factory' argument that indicates
whether a link-layer address was assigned by the factory or some
other source. In a comment, recommend using the factory address
for generating an EUI64, and update in6_get_hw_ifid() to prefer a
factory address to any other link-layer address.
Add a routing message, RTM_LLINFO_UPD, that tells protocols to
update the binding of network-layer addresses to link-layer addresses.
Implement this message in IPv4 and IPv6 by sending a gratuitous
ARP or a neighbor advertisement, respectively. Generate RTM_LLINFO_UPD
messages on a change of an interface's link-layer address.
In ether_ioctl(), do not let SIOCALIFADDR set a link-layer address
that is broadcast/multicast or equal to 00:00:00:00:00:00.
Make ether_ioctl() call ifioctl_common() to handle ioctls that it
does not understand.
In gif(4), initialize if_softc and use it, instead of assuming that
the gif_softc and ifp overlap.
Let ifioctl_common() handle SIOCGIFADDR.
Sprinkle rtcache_invariants(), which checks on DIAGNOSTIC kernels
that certain invariants on a struct route are satisfied.
In agr(4), rewrite agr_ioctl_filter() to be a bit more explicit
about the ioctls that we do not allow on an agr(4) member interface.
bzero -> memset. Delete unnecessary casts to void *. Use
sockaddr_in_init() and sockaddr_in6_init(). Compare pointers with
NULL instead of "testing truth". Replace some instances of (type
*)0 with NULL. Change some K&R prototypes to ANSI C, and join
lines.
2008-11-07 03:20:01 +03:00
|
|
|
if_set_sadl(&sc->sc_if, enaddr, sizeof(enaddr), false);
|
2006-05-18 13:05:49 +04:00
|
|
|
}
|
|
|
|
|
2014-07-31 04:56:23 +04:00
|
|
|
#if 0
|
|
|
|
static void
|
2006-05-18 13:05:49 +04:00
|
|
|
carp_addr_updated(void *v)
|
|
|
|
{
|
|
|
|
struct carp_softc *sc = (struct carp_softc *) v;
|
|
|
|
struct ifaddr *ifa;
|
|
|
|
int new_naddrs = 0, new_naddrs6 = 0;
|
|
|
|
|
2016-07-07 12:32:01 +03:00
|
|
|
IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
|
2006-05-18 13:05:49 +04:00
|
|
|
if (ifa->ifa_addr->sa_family == AF_INET)
|
|
|
|
new_naddrs++;
|
|
|
|
else if (ifa->ifa_addr->sa_family == AF_INET6)
|
|
|
|
new_naddrs6++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Handle a callback after SIOCDIFADDR */
|
|
|
|
if (new_naddrs < sc->sc_naddrs || new_naddrs6 < sc->sc_naddrs6) {
|
|
|
|
struct in_addr mc_addr;
|
|
|
|
|
|
|
|
sc->sc_naddrs = new_naddrs;
|
|
|
|
sc->sc_naddrs6 = new_naddrs6;
|
|
|
|
|
|
|
|
/* Re-establish multicast membership removed by in_control */
|
|
|
|
mc_addr.s_addr = INADDR_CARP_GROUP;
|
2014-05-30 03:02:48 +04:00
|
|
|
if (!in_multi_group(mc_addr, &sc->sc_if, 0)) {
|
2009-03-18 19:00:08 +03:00
|
|
|
memset(&sc->sc_imo, 0, sizeof(sc->sc_imo));
|
2006-05-18 13:05:49 +04:00
|
|
|
|
|
|
|
if (sc->sc_carpdev != NULL && sc->sc_naddrs > 0)
|
|
|
|
carp_join_multicast(sc);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) {
|
|
|
|
sc->sc_if.if_flags &= ~IFF_UP;
|
|
|
|
carp_set_state(sc, INIT);
|
|
|
|
} else
|
|
|
|
carp_hmac_prepare(sc);
|
|
|
|
}
|
|
|
|
|
|
|
|
carp_setrun(sc, 0);
|
|
|
|
}
|
2014-07-31 04:56:23 +04:00
|
|
|
#endif
|
2006-05-18 13:05:49 +04:00
|
|
|
|
2014-07-31 04:56:23 +04:00
|
|
|
static int
|
2006-05-18 13:05:49 +04:00
|
|
|
carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
|
|
|
|
{
|
|
|
|
struct ifnet *ifp = sc->sc_carpdev;
|
|
|
|
struct in_ifaddr *ia, *ia_if;
|
|
|
|
int error = 0;
|
2016-08-01 06:15:30 +03:00
|
|
|
int s;
|
2006-05-18 13:05:49 +04:00
|
|
|
|
|
|
|
if (sin->sin_addr.s_addr == 0) {
|
|
|
|
if (!(sc->sc_if.if_flags & IFF_UP))
|
|
|
|
carp_set_state(sc, INIT);
|
|
|
|
if (sc->sc_naddrs)
|
|
|
|
sc->sc_if.if_flags |= IFF_UP;
|
|
|
|
carp_setrun(sc, 0);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* we have to do this by hand to ensure we don't match on ourselves */
|
|
|
|
ia_if = NULL;
|
2016-08-01 06:15:30 +03:00
|
|
|
s = pserialize_read_enter();
|
2016-07-06 11:42:34 +03:00
|
|
|
IN_ADDRLIST_READER_FOREACH(ia) {
|
2006-05-18 13:05:49 +04:00
|
|
|
/* and, yeah, we need a multicast-capable iface too */
|
|
|
|
if (ia->ia_ifp != &sc->sc_if &&
|
|
|
|
ia->ia_ifp->if_type != IFT_CARP &&
|
|
|
|
(ia->ia_ifp->if_flags & IFF_MULTICAST) &&
|
|
|
|
(sin->sin_addr.s_addr & ia->ia_subnetmask) ==
|
|
|
|
ia->ia_subnet) {
|
|
|
|
if (!ia_if)
|
|
|
|
ia_if = ia;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ia_if) {
|
|
|
|
ia = ia_if;
|
|
|
|
if (ifp) {
|
|
|
|
if (ifp != ia->ia_ifp)
|
|
|
|
return (EADDRNOTAVAIL);
|
|
|
|
} else {
|
2016-08-01 06:15:30 +03:00
|
|
|
/* FIXME NOMPSAFE */
|
2006-05-18 13:05:49 +04:00
|
|
|
ifp = ia->ia_ifp;
|
|
|
|
}
|
|
|
|
}
|
2016-08-01 06:15:30 +03:00
|
|
|
pserialize_read_exit(s);
|
2006-05-18 13:05:49 +04:00
|
|
|
|
|
|
|
if ((error = carp_set_ifp(sc, ifp)))
|
|
|
|
return (error);
|
|
|
|
|
|
|
|
if (sc->sc_carpdev == NULL)
|
|
|
|
return (EADDRNOTAVAIL);
|
|
|
|
|
|
|
|
if (sc->sc_naddrs == 0 && (error = carp_join_multicast(sc)) != 0)
|
|
|
|
return (error);
|
|
|
|
|
|
|
|
sc->sc_naddrs++;
|
|
|
|
if (sc->sc_carpdev != NULL)
|
|
|
|
sc->sc_if.if_flags |= IFF_UP;
|
|
|
|
|
|
|
|
carp_set_state(sc, INIT);
|
|
|
|
carp_setrun(sc, 0);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Hook if_addrhooks so that we get a callback after in_ifinit has run,
|
|
|
|
* to correct any inappropriate routes that it inserted.
|
|
|
|
*/
|
|
|
|
if (sc->ah_cookie == 0) {
|
|
|
|
/* XXX link address hook */
|
|
|
|
}
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2014-07-31 04:56:23 +04:00
|
|
|
static int
|
2006-05-18 13:05:49 +04:00
|
|
|
carp_join_multicast(struct carp_softc *sc)
|
|
|
|
{
|
|
|
|
struct ip_moptions *imo = &sc->sc_imo, tmpimo;
|
|
|
|
struct in_addr addr;
|
|
|
|
|
2009-03-18 19:00:08 +03:00
|
|
|
memset(&tmpimo, 0, sizeof(tmpimo));
|
2006-05-18 13:05:49 +04:00
|
|
|
addr.s_addr = INADDR_CARP_GROUP;
|
|
|
|
if ((tmpimo.imo_membership[0] =
|
|
|
|
in_addmulti(&addr, &sc->sc_if)) == NULL) {
|
|
|
|
return (ENOBUFS);
|
|
|
|
}
|
|
|
|
|
|
|
|
imo->imo_membership[0] = tmpimo.imo_membership[0];
|
|
|
|
imo->imo_num_memberships = 1;
|
2016-06-21 06:28:27 +03:00
|
|
|
imo->imo_multicast_if_index = sc->sc_if.if_index;
|
2006-05-18 13:05:49 +04:00
|
|
|
imo->imo_multicast_ttl = CARP_DFLTTL;
|
|
|
|
imo->imo_multicast_loop = 0;
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef INET6
|
2014-07-31 04:56:23 +04:00
|
|
|
static int
|
2006-05-18 13:05:49 +04:00
|
|
|
carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
|
|
|
|
{
|
|
|
|
struct ifnet *ifp = sc->sc_carpdev;
|
|
|
|
struct in6_ifaddr *ia, *ia_if;
|
|
|
|
int error = 0;
|
2016-08-01 06:15:30 +03:00
|
|
|
int s;
|
2006-05-18 13:05:49 +04:00
|
|
|
|
|
|
|
if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
|
|
|
|
if (!(sc->sc_if.if_flags & IFF_UP))
|
|
|
|
carp_set_state(sc, INIT);
|
|
|
|
if (sc->sc_naddrs6)
|
|
|
|
sc->sc_if.if_flags |= IFF_UP;
|
|
|
|
carp_setrun(sc, 0);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* we have to do this by hand to ensure we don't match on ourselves */
|
|
|
|
ia_if = NULL;
|
2016-08-01 06:15:30 +03:00
|
|
|
s = pserialize_read_enter();
|
2016-07-04 09:48:14 +03:00
|
|
|
IN6_ADDRLIST_READER_FOREACH(ia) {
|
2006-05-18 13:05:49 +04:00
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < 4; i++) {
|
|
|
|
if ((sin6->sin6_addr.s6_addr32[i] &
|
|
|
|
ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
|
|
|
|
(ia->ia_addr.sin6_addr.s6_addr32[i] &
|
|
|
|
ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
/* and, yeah, we need a multicast-capable iface too */
|
|
|
|
if (ia->ia_ifp != &sc->sc_if &&
|
|
|
|
ia->ia_ifp->if_type != IFT_CARP &&
|
|
|
|
(ia->ia_ifp->if_flags & IFF_MULTICAST) &&
|
|
|
|
(i == 4)) {
|
|
|
|
if (!ia_if)
|
|
|
|
ia_if = ia;
|
|
|
|
}
|
|
|
|
}
|
2016-08-01 06:15:30 +03:00
|
|
|
pserialize_read_exit(s);
|
2006-05-18 13:05:49 +04:00
|
|
|
|
|
|
|
if (ia_if) {
|
|
|
|
ia = ia_if;
|
|
|
|
if (sc->sc_carpdev) {
|
|
|
|
if (sc->sc_carpdev != ia->ia_ifp)
|
|
|
|
return (EADDRNOTAVAIL);
|
|
|
|
} else {
|
|
|
|
ifp = ia->ia_ifp;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((error = carp_set_ifp(sc, ifp)))
|
|
|
|
return (error);
|
|
|
|
|
|
|
|
if (sc->sc_carpdev == NULL)
|
|
|
|
return (EADDRNOTAVAIL);
|
|
|
|
|
|
|
|
if (sc->sc_naddrs6 == 0 && (error = carp_join_multicast6(sc)) != 0)
|
|
|
|
return (error);
|
|
|
|
|
|
|
|
sc->sc_naddrs6++;
|
|
|
|
if (sc->sc_carpdev != NULL)
|
|
|
|
sc->sc_if.if_flags |= IFF_UP;
|
|
|
|
carp_set_state(sc, INIT);
|
|
|
|
carp_setrun(sc, 0);
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2014-07-31 04:56:23 +04:00
|
|
|
static int
|
2006-05-18 13:05:49 +04:00
|
|
|
carp_join_multicast6(struct carp_softc *sc)
|
|
|
|
{
|
|
|
|
struct in6_multi_mship *imm, *imm2;
|
|
|
|
struct ip6_moptions *im6o = &sc->sc_im6o;
|
|
|
|
struct sockaddr_in6 addr6;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
/* Join IPv6 CARP multicast group */
|
2009-03-18 19:00:08 +03:00
|
|
|
memset(&addr6, 0, sizeof(addr6));
|
2006-05-18 13:05:49 +04:00
|
|
|
addr6.sin6_family = AF_INET6;
|
|
|
|
addr6.sin6_len = sizeof(addr6);
|
|
|
|
addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
|
|
|
|
addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
|
|
|
|
addr6.sin6_addr.s6_addr8[15] = 0x12;
|
|
|
|
if ((imm = in6_joingroup(&sc->sc_if,
|
|
|
|
&addr6.sin6_addr, &error, 0)) == NULL) {
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
/* join solicited multicast address */
|
2009-03-18 19:00:08 +03:00
|
|
|
memset(&addr6.sin6_addr, 0, sizeof(addr6.sin6_addr));
|
2006-05-18 13:05:49 +04:00
|
|
|
addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
|
|
|
|
addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
|
|
|
|
addr6.sin6_addr.s6_addr32[1] = 0;
|
|
|
|
addr6.sin6_addr.s6_addr32[2] = htonl(1);
|
|
|
|
addr6.sin6_addr.s6_addr32[3] = 0;
|
|
|
|
addr6.sin6_addr.s6_addr8[12] = 0xff;
|
|
|
|
if ((imm2 = in6_joingroup(&sc->sc_if,
|
|
|
|
&addr6.sin6_addr, &error, 0)) == NULL) {
|
|
|
|
in6_leavegroup(imm);
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* apply v6 multicast membership */
|
2016-06-21 06:28:27 +03:00
|
|
|
im6o->im6o_multicast_if_index = sc->sc_if.if_index;
|
2006-05-18 13:05:49 +04:00
|
|
|
if (imm)
|
|
|
|
LIST_INSERT_HEAD(&im6o->im6o_memberships, imm,
|
|
|
|
i6mm_chain);
|
|
|
|
if (imm2)
|
|
|
|
LIST_INSERT_HEAD(&im6o->im6o_memberships, imm2,
|
|
|
|
i6mm_chain);
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif /* INET6 */
|
|
|
|
|
2014-07-31 04:56:23 +04:00
|
|
|
static int
|
*** Summary ***
When a link-layer address changes (e.g., ifconfig ex0 link
02:de:ad:be:ef:02 active), send a gratuitous ARP and/or a Neighbor
Advertisement to update the network-/link-layer address bindings
on our LAN peers.
Refuse a change of ethernet address to the address 00:00:00:00:00:00
or to any multicast/broadcast address. (Thanks matt@.)
Reorder ifnet ioctl operations so that driver ioctls may inherit
the functions of their "class"---ether_ioctl(), fddi_ioctl(), et
cetera---and the class ioctls may inherit from the generic ioctl,
ifioctl_common(), but both driver- and class-ioctls may override
the generic behavior. Make network drivers share more code.
Distinguish a "factory" link-layer address from others for the
purposes of both protecting that address from deletion and computing
EUI64.
Return consistent, appropriate error codes from network drivers.
Improve readability. KNF.
*** Details ***
In if_attach(), always initialize the interface ioctl routine,
ifnet->if_ioctl, if the driver has not already initialized it.
Delete if_ioctl == NULL tests everywhere else, because it cannot
happen.
In the ioctl routines of network interfaces, inherit common ioctl
behaviors by calling either ifioctl_common() or whichever ioctl
routine is appropriate for the class of interface---e.g., ether_ioctl()
for ethernets.
Stop (ab)using SIOCSIFADDR and start to use SIOCINITIFADDR. In
the user->kernel interface, SIOCSIFADDR's argument was an ifreq,
but on the protocol->ifnet interface, SIOCSIFADDR's argument was
an ifaddr. That was confusing, and it would work against me as I
make it possible for a network interface to overload most ioctls.
On the protocol->ifnet interface, replace SIOCSIFADDR with
SIOCINITIFADDR. In ifioctl(), return EPERM if userland tries to
invoke SIOCINITIFADDR.
In ifioctl(), give the interface the first shot at handling most
interface ioctls, and give the protocol the second shot, instead
of the other way around. Finally, let compatibility code (COMPAT_OSOCK)
take a shot.
Pull device initialization out of switch statements under
SIOCINITIFADDR. For example, pull ..._init() out of any switch
statement that looks like this:
switch (...->sa_family) {
case ...:
..._init();
...
break;
...
default:
..._init();
...
break;
}
Rewrite many if-else clauses that handle all permutations of IFF_UP
and IFF_RUNNING to use a switch statement,
switch (x & (IFF_UP|IFF_RUNNING)) {
case 0:
...
break;
case IFF_RUNNING:
...
break;
case IFF_UP:
...
break;
case IFF_UP|IFF_RUNNING:
...
break;
}
unifdef lots of code containing #ifdef FreeBSD, #ifdef NetBSD, and
#ifdef SIOCSIFMTU, especially in fwip(4) and in ndis(4).
In ipw(4), remove an if_set_sadl() call that is out of place.
In nfe(4), reuse the jumbo MTU logic in ether_ioctl().
Let ethernets register a callback for setting h/w state such as
promiscuous mode and the multicast filter in accord with a change
in the if_flags: ether_set_ifflags_cb() registers a callback that
returns ENETRESET if the caller should reset the ethernet by calling
if_init(), 0 on success, != 0 on failure. Pull common code from
ex(4), gem(4), nfe(4), sip(4), tlp(4), vge(4) into ether_ioctl(),
and register if_flags callbacks for those drivers.
Return ENOTTY instead of EINVAL for inappropriate ioctls. In
zyd(4), use ENXIO instead of ENOTTY to indicate that the device is
not any longer attached.
Add to if_set_sadl() a boolean 'factory' argument that indicates
whether a link-layer address was assigned by the factory or some
other source. In a comment, recommend using the factory address
for generating an EUI64, and update in6_get_hw_ifid() to prefer a
factory address to any other link-layer address.
Add a routing message, RTM_LLINFO_UPD, that tells protocols to
update the binding of network-layer addresses to link-layer addresses.
Implement this message in IPv4 and IPv6 by sending a gratuitous
ARP or a neighbor advertisement, respectively. Generate RTM_LLINFO_UPD
messages on a change of an interface's link-layer address.
In ether_ioctl(), do not let SIOCALIFADDR set a link-layer address
that is broadcast/multicast or equal to 00:00:00:00:00:00.
Make ether_ioctl() call ifioctl_common() to handle ioctls that it
does not understand.
In gif(4), initialize if_softc and use it, instead of assuming that
the gif_softc and ifp overlap.
Let ifioctl_common() handle SIOCGIFADDR.
Sprinkle rtcache_invariants(), which checks on DIAGNOSTIC kernels
that certain invariants on a struct route are satisfied.
In agr(4), rewrite agr_ioctl_filter() to be a bit more explicit
about the ioctls that we do not allow on an agr(4) member interface.
bzero -> memset. Delete unnecessary casts to void *. Use
sockaddr_in_init() and sockaddr_in6_init(). Compare pointers with
NULL instead of "testing truth". Replace some instances of (type
*)0 with NULL. Change some K&R prototypes to ANSI C, and join
lines.
2008-11-07 03:20:01 +03:00
|
|
|
carp_ioctl(struct ifnet *ifp, u_long cmd, void *data)
|
2006-05-18 13:05:49 +04:00
|
|
|
{
|
2006-07-24 02:06:03 +04:00
|
|
|
struct lwp *l = curlwp; /* XXX */
|
2006-05-18 13:05:49 +04:00
|
|
|
struct carp_softc *sc = ifp->if_softc, *vr;
|
|
|
|
struct carpreq carpr;
|
|
|
|
struct ifaddr *ifa;
|
|
|
|
struct ifreq *ifr;
|
|
|
|
struct ifnet *cdev = NULL;
|
|
|
|
int error = 0;
|
|
|
|
|
*** Summary ***
When a link-layer address changes (e.g., ifconfig ex0 link
02:de:ad:be:ef:02 active), send a gratuitous ARP and/or a Neighbor
Advertisement to update the network-/link-layer address bindings
on our LAN peers.
Refuse a change of ethernet address to the address 00:00:00:00:00:00
or to any multicast/broadcast address. (Thanks matt@.)
Reorder ifnet ioctl operations so that driver ioctls may inherit
the functions of their "class"---ether_ioctl(), fddi_ioctl(), et
cetera---and the class ioctls may inherit from the generic ioctl,
ifioctl_common(), but both driver- and class-ioctls may override
the generic behavior. Make network drivers share more code.
Distinguish a "factory" link-layer address from others for the
purposes of both protecting that address from deletion and computing
EUI64.
Return consistent, appropriate error codes from network drivers.
Improve readability. KNF.
*** Details ***
In if_attach(), always initialize the interface ioctl routine,
ifnet->if_ioctl, if the driver has not already initialized it.
Delete if_ioctl == NULL tests everywhere else, because it cannot
happen.
In the ioctl routines of network interfaces, inherit common ioctl
behaviors by calling either ifioctl_common() or whichever ioctl
routine is appropriate for the class of interface---e.g., ether_ioctl()
for ethernets.
Stop (ab)using SIOCSIFADDR and start to use SIOCINITIFADDR. In
the user->kernel interface, SIOCSIFADDR's argument was an ifreq,
but on the protocol->ifnet interface, SIOCSIFADDR's argument was
an ifaddr. That was confusing, and it would work against me as I
make it possible for a network interface to overload most ioctls.
On the protocol->ifnet interface, replace SIOCSIFADDR with
SIOCINITIFADDR. In ifioctl(), return EPERM if userland tries to
invoke SIOCINITIFADDR.
In ifioctl(), give the interface the first shot at handling most
interface ioctls, and give the protocol the second shot, instead
of the other way around. Finally, let compatibility code (COMPAT_OSOCK)
take a shot.
Pull device initialization out of switch statements under
SIOCINITIFADDR. For example, pull ..._init() out of any switch
statement that looks like this:
switch (...->sa_family) {
case ...:
..._init();
...
break;
...
default:
..._init();
...
break;
}
Rewrite many if-else clauses that handle all permutations of IFF_UP
and IFF_RUNNING to use a switch statement,
switch (x & (IFF_UP|IFF_RUNNING)) {
case 0:
...
break;
case IFF_RUNNING:
...
break;
case IFF_UP:
...
break;
case IFF_UP|IFF_RUNNING:
...
break;
}
unifdef lots of code containing #ifdef FreeBSD, #ifdef NetBSD, and
#ifdef SIOCSIFMTU, especially in fwip(4) and in ndis(4).
In ipw(4), remove an if_set_sadl() call that is out of place.
In nfe(4), reuse the jumbo MTU logic in ether_ioctl().
Let ethernets register a callback for setting h/w state such as
promiscuous mode and the multicast filter in accord with a change
in the if_flags: ether_set_ifflags_cb() registers a callback that
returns ENETRESET if the caller should reset the ethernet by calling
if_init(), 0 on success, != 0 on failure. Pull common code from
ex(4), gem(4), nfe(4), sip(4), tlp(4), vge(4) into ether_ioctl(),
and register if_flags callbacks for those drivers.
Return ENOTTY instead of EINVAL for inappropriate ioctls. In
zyd(4), use ENXIO instead of ENOTTY to indicate that the device is
not any longer attached.
Add to if_set_sadl() a boolean 'factory' argument that indicates
whether a link-layer address was assigned by the factory or some
other source. In a comment, recommend using the factory address
for generating an EUI64, and update in6_get_hw_ifid() to prefer a
factory address to any other link-layer address.
Add a routing message, RTM_LLINFO_UPD, that tells protocols to
update the binding of network-layer addresses to link-layer addresses.
Implement this message in IPv4 and IPv6 by sending a gratuitous
ARP or a neighbor advertisement, respectively. Generate RTM_LLINFO_UPD
messages on a change of an interface's link-layer address.
In ether_ioctl(), do not let SIOCALIFADDR set a link-layer address
that is broadcast/multicast or equal to 00:00:00:00:00:00.
Make ether_ioctl() call ifioctl_common() to handle ioctls that it
does not understand.
In gif(4), initialize if_softc and use it, instead of assuming that
the gif_softc and ifp overlap.
Let ifioctl_common() handle SIOCGIFADDR.
Sprinkle rtcache_invariants(), which checks on DIAGNOSTIC kernels
that certain invariants on a struct route are satisfied.
In agr(4), rewrite agr_ioctl_filter() to be a bit more explicit
about the ioctls that we do not allow on an agr(4) member interface.
bzero -> memset. Delete unnecessary casts to void *. Use
sockaddr_in_init() and sockaddr_in6_init(). Compare pointers with
NULL instead of "testing truth". Replace some instances of (type
*)0 with NULL. Change some K&R prototypes to ANSI C, and join
lines.
2008-11-07 03:20:01 +03:00
|
|
|
ifa = (struct ifaddr *)data;
|
|
|
|
ifr = (struct ifreq *)data;
|
2006-05-18 13:05:49 +04:00
|
|
|
|
|
|
|
switch (cmd) {
|
*** Summary ***
When a link-layer address changes (e.g., ifconfig ex0 link
02:de:ad:be:ef:02 active), send a gratuitous ARP and/or a Neighbor
Advertisement to update the network-/link-layer address bindings
on our LAN peers.
Refuse a change of ethernet address to the address 00:00:00:00:00:00
or to any multicast/broadcast address. (Thanks matt@.)
Reorder ifnet ioctl operations so that driver ioctls may inherit
the functions of their "class"---ether_ioctl(), fddi_ioctl(), et
cetera---and the class ioctls may inherit from the generic ioctl,
ifioctl_common(), but both driver- and class-ioctls may override
the generic behavior. Make network drivers share more code.
Distinguish a "factory" link-layer address from others for the
purposes of both protecting that address from deletion and computing
EUI64.
Return consistent, appropriate error codes from network drivers.
Improve readability. KNF.
*** Details ***
In if_attach(), always initialize the interface ioctl routine,
ifnet->if_ioctl, if the driver has not already initialized it.
Delete if_ioctl == NULL tests everywhere else, because it cannot
happen.
In the ioctl routines of network interfaces, inherit common ioctl
behaviors by calling either ifioctl_common() or whichever ioctl
routine is appropriate for the class of interface---e.g., ether_ioctl()
for ethernets.
Stop (ab)using SIOCSIFADDR and start to use SIOCINITIFADDR. In
the user->kernel interface, SIOCSIFADDR's argument was an ifreq,
but on the protocol->ifnet interface, SIOCSIFADDR's argument was
an ifaddr. That was confusing, and it would work against me as I
make it possible for a network interface to overload most ioctls.
On the protocol->ifnet interface, replace SIOCSIFADDR with
SIOCINITIFADDR. In ifioctl(), return EPERM if userland tries to
invoke SIOCINITIFADDR.
In ifioctl(), give the interface the first shot at handling most
interface ioctls, and give the protocol the second shot, instead
of the other way around. Finally, let compatibility code (COMPAT_OSOCK)
take a shot.
Pull device initialization out of switch statements under
SIOCINITIFADDR. For example, pull ..._init() out of any switch
statement that looks like this:
switch (...->sa_family) {
case ...:
..._init();
...
break;
...
default:
..._init();
...
break;
}
Rewrite many if-else clauses that handle all permutations of IFF_UP
and IFF_RUNNING to use a switch statement,
switch (x & (IFF_UP|IFF_RUNNING)) {
case 0:
...
break;
case IFF_RUNNING:
...
break;
case IFF_UP:
...
break;
case IFF_UP|IFF_RUNNING:
...
break;
}
unifdef lots of code containing #ifdef FreeBSD, #ifdef NetBSD, and
#ifdef SIOCSIFMTU, especially in fwip(4) and in ndis(4).
In ipw(4), remove an if_set_sadl() call that is out of place.
In nfe(4), reuse the jumbo MTU logic in ether_ioctl().
Let ethernets register a callback for setting h/w state such as
promiscuous mode and the multicast filter in accord with a change
in the if_flags: ether_set_ifflags_cb() registers a callback that
returns ENETRESET if the caller should reset the ethernet by calling
if_init(), 0 on success, != 0 on failure. Pull common code from
ex(4), gem(4), nfe(4), sip(4), tlp(4), vge(4) into ether_ioctl(),
and register if_flags callbacks for those drivers.
Return ENOTTY instead of EINVAL for inappropriate ioctls. In
zyd(4), use ENXIO instead of ENOTTY to indicate that the device is
not any longer attached.
Add to if_set_sadl() a boolean 'factory' argument that indicates
whether a link-layer address was assigned by the factory or some
other source. In a comment, recommend using the factory address
for generating an EUI64, and update in6_get_hw_ifid() to prefer a
factory address to any other link-layer address.
Add a routing message, RTM_LLINFO_UPD, that tells protocols to
update the binding of network-layer addresses to link-layer addresses.
Implement this message in IPv4 and IPv6 by sending a gratuitous
ARP or a neighbor advertisement, respectively. Generate RTM_LLINFO_UPD
messages on a change of an interface's link-layer address.
In ether_ioctl(), do not let SIOCALIFADDR set a link-layer address
that is broadcast/multicast or equal to 00:00:00:00:00:00.
Make ether_ioctl() call ifioctl_common() to handle ioctls that it
does not understand.
In gif(4), initialize if_softc and use it, instead of assuming that
the gif_softc and ifp overlap.
Let ifioctl_common() handle SIOCGIFADDR.
Sprinkle rtcache_invariants(), which checks on DIAGNOSTIC kernels
that certain invariants on a struct route are satisfied.
In agr(4), rewrite agr_ioctl_filter() to be a bit more explicit
about the ioctls that we do not allow on an agr(4) member interface.
bzero -> memset. Delete unnecessary casts to void *. Use
sockaddr_in_init() and sockaddr_in6_init(). Compare pointers with
NULL instead of "testing truth". Replace some instances of (type
*)0 with NULL. Change some K&R prototypes to ANSI C, and join
lines.
2008-11-07 03:20:01 +03:00
|
|
|
case SIOCINITIFADDR:
|
2006-05-18 13:05:49 +04:00
|
|
|
switch (ifa->ifa_addr->sa_family) {
|
|
|
|
#ifdef INET
|
|
|
|
case AF_INET:
|
|
|
|
sc->sc_if.if_flags |= IFF_UP;
|
2009-04-18 18:58:02 +04:00
|
|
|
memcpy(ifa->ifa_dstaddr, ifa->ifa_addr,
|
2006-05-18 13:05:49 +04:00
|
|
|
sizeof(struct sockaddr));
|
|
|
|
error = carp_set_addr(sc, satosin(ifa->ifa_addr));
|
|
|
|
break;
|
|
|
|
#endif /* INET */
|
|
|
|
#ifdef INET6
|
|
|
|
case AF_INET6:
|
|
|
|
sc->sc_if.if_flags|= IFF_UP;
|
|
|
|
error = carp_set_addr6(sc, satosin6(ifa->ifa_addr));
|
|
|
|
break;
|
|
|
|
#endif /* INET6 */
|
|
|
|
default:
|
|
|
|
error = EAFNOSUPPORT;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case SIOCSIFFLAGS:
|
*** Summary ***
When a link-layer address changes (e.g., ifconfig ex0 link
02:de:ad:be:ef:02 active), send a gratuitous ARP and/or a Neighbor
Advertisement to update the network-/link-layer address bindings
on our LAN peers.
Refuse a change of ethernet address to the address 00:00:00:00:00:00
or to any multicast/broadcast address. (Thanks matt@.)
Reorder ifnet ioctl operations so that driver ioctls may inherit
the functions of their "class"---ether_ioctl(), fddi_ioctl(), et
cetera---and the class ioctls may inherit from the generic ioctl,
ifioctl_common(), but both driver- and class-ioctls may override
the generic behavior. Make network drivers share more code.
Distinguish a "factory" link-layer address from others for the
purposes of both protecting that address from deletion and computing
EUI64.
Return consistent, appropriate error codes from network drivers.
Improve readability. KNF.
*** Details ***
In if_attach(), always initialize the interface ioctl routine,
ifnet->if_ioctl, if the driver has not already initialized it.
Delete if_ioctl == NULL tests everywhere else, because it cannot
happen.
In the ioctl routines of network interfaces, inherit common ioctl
behaviors by calling either ifioctl_common() or whichever ioctl
routine is appropriate for the class of interface---e.g., ether_ioctl()
for ethernets.
Stop (ab)using SIOCSIFADDR and start to use SIOCINITIFADDR. In
the user->kernel interface, SIOCSIFADDR's argument was an ifreq,
but on the protocol->ifnet interface, SIOCSIFADDR's argument was
an ifaddr. That was confusing, and it would work against me as I
make it possible for a network interface to overload most ioctls.
On the protocol->ifnet interface, replace SIOCSIFADDR with
SIOCINITIFADDR. In ifioctl(), return EPERM if userland tries to
invoke SIOCINITIFADDR.
In ifioctl(), give the interface the first shot at handling most
interface ioctls, and give the protocol the second shot, instead
of the other way around. Finally, let compatibility code (COMPAT_OSOCK)
take a shot.
Pull device initialization out of switch statements under
SIOCINITIFADDR. For example, pull ..._init() out of any switch
statement that looks like this:
switch (...->sa_family) {
case ...:
..._init();
...
break;
...
default:
..._init();
...
break;
}
Rewrite many if-else clauses that handle all permutations of IFF_UP
and IFF_RUNNING to use a switch statement,
switch (x & (IFF_UP|IFF_RUNNING)) {
case 0:
...
break;
case IFF_RUNNING:
...
break;
case IFF_UP:
...
break;
case IFF_UP|IFF_RUNNING:
...
break;
}
unifdef lots of code containing #ifdef FreeBSD, #ifdef NetBSD, and
#ifdef SIOCSIFMTU, especially in fwip(4) and in ndis(4).
In ipw(4), remove an if_set_sadl() call that is out of place.
In nfe(4), reuse the jumbo MTU logic in ether_ioctl().
Let ethernets register a callback for setting h/w state such as
promiscuous mode and the multicast filter in accord with a change
in the if_flags: ether_set_ifflags_cb() registers a callback that
returns ENETRESET if the caller should reset the ethernet by calling
if_init(), 0 on success, != 0 on failure. Pull common code from
ex(4), gem(4), nfe(4), sip(4), tlp(4), vge(4) into ether_ioctl(),
and register if_flags callbacks for those drivers.
Return ENOTTY instead of EINVAL for inappropriate ioctls. In
zyd(4), use ENXIO instead of ENOTTY to indicate that the device is
not any longer attached.
Add to if_set_sadl() a boolean 'factory' argument that indicates
whether a link-layer address was assigned by the factory or some
other source. In a comment, recommend using the factory address
for generating an EUI64, and update in6_get_hw_ifid() to prefer a
factory address to any other link-layer address.
Add a routing message, RTM_LLINFO_UPD, that tells protocols to
update the binding of network-layer addresses to link-layer addresses.
Implement this message in IPv4 and IPv6 by sending a gratuitous
ARP or a neighbor advertisement, respectively. Generate RTM_LLINFO_UPD
messages on a change of an interface's link-layer address.
In ether_ioctl(), do not let SIOCALIFADDR set a link-layer address
that is broadcast/multicast or equal to 00:00:00:00:00:00.
Make ether_ioctl() call ifioctl_common() to handle ioctls that it
does not understand.
In gif(4), initialize if_softc and use it, instead of assuming that
the gif_softc and ifp overlap.
Let ifioctl_common() handle SIOCGIFADDR.
Sprinkle rtcache_invariants(), which checks on DIAGNOSTIC kernels
that certain invariants on a struct route are satisfied.
In agr(4), rewrite agr_ioctl_filter() to be a bit more explicit
about the ioctls that we do not allow on an agr(4) member interface.
bzero -> memset. Delete unnecessary casts to void *. Use
sockaddr_in_init() and sockaddr_in6_init(). Compare pointers with
NULL instead of "testing truth". Replace some instances of (type
*)0 with NULL. Change some K&R prototypes to ANSI C, and join
lines.
2008-11-07 03:20:01 +03:00
|
|
|
if ((error = ifioctl_common(ifp, cmd, data)) != 0)
|
|
|
|
break;
|
2006-05-18 13:05:49 +04:00
|
|
|
if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) {
|
|
|
|
callout_stop(&sc->sc_ad_tmo);
|
|
|
|
callout_stop(&sc->sc_md_tmo);
|
|
|
|
callout_stop(&sc->sc_md6_tmo);
|
|
|
|
if (sc->sc_state == MASTER) {
|
|
|
|
/* we need the interface up to bow out */
|
|
|
|
sc->sc_if.if_flags |= IFF_UP;
|
|
|
|
sc->sc_bow_out = 1;
|
|
|
|
carp_send_ad(sc);
|
|
|
|
}
|
|
|
|
sc->sc_if.if_flags &= ~IFF_UP;
|
|
|
|
carp_set_state(sc, INIT);
|
|
|
|
carp_setrun(sc, 0);
|
|
|
|
} else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) {
|
|
|
|
sc->sc_if.if_flags |= IFF_UP;
|
|
|
|
carp_setrun(sc, 0);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case SIOCSVH:
|
2006-10-25 22:11:22 +04:00
|
|
|
if (l == NULL)
|
|
|
|
break;
|
|
|
|
if ((error = kauth_authorize_network(l->l_cred,
|
|
|
|
KAUTH_NETWORK_INTERFACE,
|
2006-10-30 03:58:21 +03:00
|
|
|
KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
|
2006-10-25 22:11:22 +04:00
|
|
|
NULL)) != 0)
|
2006-05-18 13:05:49 +04:00
|
|
|
break;
|
|
|
|
if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
|
|
|
|
break;
|
|
|
|
error = 1;
|
|
|
|
if (carpr.carpr_carpdev[0] != '\0' &&
|
|
|
|
(cdev = ifunit(carpr.carpr_carpdev)) == NULL)
|
|
|
|
return (EINVAL);
|
|
|
|
if ((error = carp_set_ifp(sc, cdev)))
|
|
|
|
return (error);
|
|
|
|
if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) {
|
|
|
|
switch (carpr.carpr_state) {
|
|
|
|
case BACKUP:
|
|
|
|
callout_stop(&sc->sc_ad_tmo);
|
|
|
|
carp_set_state(sc, BACKUP);
|
|
|
|
carp_setrun(sc, 0);
|
|
|
|
carp_setroute(sc, RTM_DELETE);
|
|
|
|
break;
|
|
|
|
case MASTER:
|
|
|
|
carp_master_down(sc);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (carpr.carpr_vhid > 0) {
|
|
|
|
if (carpr.carpr_vhid > 255) {
|
|
|
|
error = EINVAL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (sc->sc_carpdev) {
|
|
|
|
struct carp_if *cif;
|
|
|
|
cif = (struct carp_if *)sc->sc_carpdev->if_carp;
|
|
|
|
TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
|
|
|
|
if (vr != sc &&
|
|
|
|
vr->sc_vhid == carpr.carpr_vhid)
|
|
|
|
return (EINVAL);
|
|
|
|
}
|
|
|
|
sc->sc_vhid = carpr.carpr_vhid;
|
|
|
|
carp_set_enaddr(sc);
|
|
|
|
carp_set_state(sc, INIT);
|
|
|
|
error--;
|
|
|
|
}
|
|
|
|
if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) {
|
|
|
|
if (carpr.carpr_advskew > 254) {
|
|
|
|
error = EINVAL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (carpr.carpr_advbase > 255) {
|
|
|
|
error = EINVAL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
sc->sc_advbase = carpr.carpr_advbase;
|
|
|
|
sc->sc_advskew = carpr.carpr_advskew;
|
|
|
|
error--;
|
|
|
|
}
|
2009-04-18 18:58:02 +04:00
|
|
|
memcpy(sc->sc_key, carpr.carpr_key, sizeof(sc->sc_key));
|
2006-05-18 13:05:49 +04:00
|
|
|
if (error > 0)
|
|
|
|
error = EINVAL;
|
|
|
|
else {
|
|
|
|
error = 0;
|
|
|
|
carp_setrun(sc, 0);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case SIOCGVH:
|
2009-03-18 19:00:08 +03:00
|
|
|
memset(&carpr, 0, sizeof(carpr));
|
2006-05-18 13:05:49 +04:00
|
|
|
if (sc->sc_carpdev != NULL)
|
|
|
|
strlcpy(carpr.carpr_carpdev, sc->sc_carpdev->if_xname,
|
|
|
|
IFNAMSIZ);
|
|
|
|
carpr.carpr_state = sc->sc_state;
|
|
|
|
carpr.carpr_vhid = sc->sc_vhid;
|
|
|
|
carpr.carpr_advbase = sc->sc_advbase;
|
|
|
|
carpr.carpr_advskew = sc->sc_advskew;
|
2006-05-25 19:22:05 +04:00
|
|
|
|
2009-05-13 02:01:20 +04:00
|
|
|
if ((l != NULL) && (error = kauth_authorize_network(l->l_cred,
|
2006-10-25 22:11:22 +04:00
|
|
|
KAUTH_NETWORK_INTERFACE,
|
2006-10-30 03:58:21 +03:00
|
|
|
KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
|
2009-05-13 01:48:42 +04:00
|
|
|
NULL)) == 0)
|
2009-04-18 18:58:02 +04:00
|
|
|
memcpy(carpr.carpr_key, sc->sc_key,
|
2006-05-18 13:05:49 +04:00
|
|
|
sizeof(carpr.carpr_key));
|
|
|
|
error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
|
|
|
|
break;
|
|
|
|
|
|
|
|
case SIOCADDMULTI:
|
|
|
|
error = carp_ether_addmulti(sc, ifr);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case SIOCDELMULTI:
|
|
|
|
error = carp_ether_delmulti(sc, ifr);
|
|
|
|
break;
|
|
|
|
|
2012-08-20 18:14:32 +04:00
|
|
|
case SIOCSIFCAP:
|
2012-08-20 20:01:37 +04:00
|
|
|
if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
|
2012-08-20 18:14:32 +04:00
|
|
|
error = 0;
|
|
|
|
break;
|
|
|
|
|
2006-05-18 13:05:49 +04:00
|
|
|
default:
|
*** Summary ***
When a link-layer address changes (e.g., ifconfig ex0 link
02:de:ad:be:ef:02 active), send a gratuitous ARP and/or a Neighbor
Advertisement to update the network-/link-layer address bindings
on our LAN peers.
Refuse a change of ethernet address to the address 00:00:00:00:00:00
or to any multicast/broadcast address. (Thanks matt@.)
Reorder ifnet ioctl operations so that driver ioctls may inherit
the functions of their "class"---ether_ioctl(), fddi_ioctl(), et
cetera---and the class ioctls may inherit from the generic ioctl,
ifioctl_common(), but both driver- and class-ioctls may override
the generic behavior. Make network drivers share more code.
Distinguish a "factory" link-layer address from others for the
purposes of both protecting that address from deletion and computing
EUI64.
Return consistent, appropriate error codes from network drivers.
Improve readability. KNF.
*** Details ***
In if_attach(), always initialize the interface ioctl routine,
ifnet->if_ioctl, if the driver has not already initialized it.
Delete if_ioctl == NULL tests everywhere else, because it cannot
happen.
In the ioctl routines of network interfaces, inherit common ioctl
behaviors by calling either ifioctl_common() or whichever ioctl
routine is appropriate for the class of interface---e.g., ether_ioctl()
for ethernets.
Stop (ab)using SIOCSIFADDR and start to use SIOCINITIFADDR. In
the user->kernel interface, SIOCSIFADDR's argument was an ifreq,
but on the protocol->ifnet interface, SIOCSIFADDR's argument was
an ifaddr. That was confusing, and it would work against me as I
make it possible for a network interface to overload most ioctls.
On the protocol->ifnet interface, replace SIOCSIFADDR with
SIOCINITIFADDR. In ifioctl(), return EPERM if userland tries to
invoke SIOCINITIFADDR.
In ifioctl(), give the interface the first shot at handling most
interface ioctls, and give the protocol the second shot, instead
of the other way around. Finally, let compatibility code (COMPAT_OSOCK)
take a shot.
Pull device initialization out of switch statements under
SIOCINITIFADDR. For example, pull ..._init() out of any switch
statement that looks like this:
switch (...->sa_family) {
case ...:
..._init();
...
break;
...
default:
..._init();
...
break;
}
Rewrite many if-else clauses that handle all permutations of IFF_UP
and IFF_RUNNING to use a switch statement,
switch (x & (IFF_UP|IFF_RUNNING)) {
case 0:
...
break;
case IFF_RUNNING:
...
break;
case IFF_UP:
...
break;
case IFF_UP|IFF_RUNNING:
...
break;
}
unifdef lots of code containing #ifdef FreeBSD, #ifdef NetBSD, and
#ifdef SIOCSIFMTU, especially in fwip(4) and in ndis(4).
In ipw(4), remove an if_set_sadl() call that is out of place.
In nfe(4), reuse the jumbo MTU logic in ether_ioctl().
Let ethernets register a callback for setting h/w state such as
promiscuous mode and the multicast filter in accord with a change
in the if_flags: ether_set_ifflags_cb() registers a callback that
returns ENETRESET if the caller should reset the ethernet by calling
if_init(), 0 on success, != 0 on failure. Pull common code from
ex(4), gem(4), nfe(4), sip(4), tlp(4), vge(4) into ether_ioctl(),
and register if_flags callbacks for those drivers.
Return ENOTTY instead of EINVAL for inappropriate ioctls. In
zyd(4), use ENXIO instead of ENOTTY to indicate that the device is
not any longer attached.
Add to if_set_sadl() a boolean 'factory' argument that indicates
whether a link-layer address was assigned by the factory or some
other source. In a comment, recommend using the factory address
for generating an EUI64, and update in6_get_hw_ifid() to prefer a
factory address to any other link-layer address.
Add a routing message, RTM_LLINFO_UPD, that tells protocols to
update the binding of network-layer addresses to link-layer addresses.
Implement this message in IPv4 and IPv6 by sending a gratuitous
ARP or a neighbor advertisement, respectively. Generate RTM_LLINFO_UPD
messages on a change of an interface's link-layer address.
In ether_ioctl(), do not let SIOCALIFADDR set a link-layer address
that is broadcast/multicast or equal to 00:00:00:00:00:00.
Make ether_ioctl() call ifioctl_common() to handle ioctls that it
does not understand.
In gif(4), initialize if_softc and use it, instead of assuming that
the gif_softc and ifp overlap.
Let ifioctl_common() handle SIOCGIFADDR.
Sprinkle rtcache_invariants(), which checks on DIAGNOSTIC kernels
that certain invariants on a struct route are satisfied.
In agr(4), rewrite agr_ioctl_filter() to be a bit more explicit
about the ioctls that we do not allow on an agr(4) member interface.
bzero -> memset. Delete unnecessary casts to void *. Use
sockaddr_in_init() and sockaddr_in6_init(). Compare pointers with
NULL instead of "testing truth". Replace some instances of (type
*)0 with NULL. Change some K&R prototypes to ANSI C, and join
lines.
2008-11-07 03:20:01 +03:00
|
|
|
error = ether_ioctl(ifp, cmd, data);
|
2006-05-18 13:05:49 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
carp_hmac_prepare(sc);
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Start output on carp interface. This function should never be called.
|
|
|
|
*/
|
2014-07-31 04:56:23 +04:00
|
|
|
static void
|
2006-05-18 13:05:49 +04:00
|
|
|
carp_start(struct ifnet *ifp)
|
|
|
|
{
|
|
|
|
#ifdef DEBUG
|
|
|
|
printf("%s: start called\n", ifp->if_xname);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
KNF: de-__P, bzero -> memset, bcmp -> memcmp. Remove extraneous
parentheses in return statements.
Cosmetic: don't open-code TAILQ_FOREACH().
Cosmetic: change types of variables to avoid oodles of casts: in
in6_src.c, avoid casts by changing several route_in6 pointers
to struct route pointers. Remove unnecessary casts to caddr_t
elsewhere.
Pave the way for eliminating address family-specific route caches:
soon, struct route will not embed a sockaddr, but it will hold
a reference to an external sockaddr, instead. We will set the
destination sockaddr using rtcache_setdst(). (I created a stub
for it, but it isn't used anywhere, yet.) rtcache_free() will
free the sockaddr. I have extracted from rtcache_free() a helper
subroutine, rtcache_clear(). rtcache_clear() will "forget" a
cached route, but it will not forget the destination by releasing
the sockaddr. I use rtcache_clear() instead of rtcache_free()
in rtcache_update(), because rtcache_update() is not supposed
to forget the destination.
Constify:
1 Introduce const accessor for route->ro_dst, rtcache_getdst().
2 Constify the 'dst' argument to ifnet->if_output(). This
led me to constify a lot of code called by output routines.
3 Constify the sockaddr argument to protosw->pr_ctlinput. This
led me to constify a lot of code called by ctlinput routines.
4 Introduce const macros for converting from a generic sockaddr
to family-specific sockaddrs, e.g., sockaddr_in: satocsin6,
satocsin, et cetera.
2007-02-18 01:34:07 +03:00
|
|
|
carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa,
|
2016-04-28 03:16:56 +03:00
|
|
|
const struct rtentry *rt)
|
2006-05-18 13:05:49 +04:00
|
|
|
{
|
|
|
|
struct carp_softc *sc = ((struct carp_softc *)ifp->if_softc);
|
2014-05-13 23:36:16 +04:00
|
|
|
KASSERT(KERNEL_LOCKED_P());
|
2006-05-18 13:05:49 +04:00
|
|
|
|
|
|
|
if (sc->sc_carpdev != NULL && sc->sc_state == MASTER) {
|
2016-06-20 11:08:13 +03:00
|
|
|
return if_output_lock(sc->sc_carpdev, ifp, m, sa, rt);
|
2006-05-18 13:05:49 +04:00
|
|
|
} else {
|
|
|
|
m_freem(m);
|
|
|
|
return (ENETUNREACH);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-07-31 04:56:23 +04:00
|
|
|
static void
|
2006-05-18 13:05:49 +04:00
|
|
|
carp_set_state(struct carp_softc *sc, int state)
|
|
|
|
{
|
2009-05-28 01:32:31 +04:00
|
|
|
static const char *carp_states[] = { CARP_STATES };
|
2017-05-12 12:22:01 +03:00
|
|
|
int link_state;
|
|
|
|
|
2006-05-18 13:05:49 +04:00
|
|
|
if (sc->sc_state == state)
|
|
|
|
return;
|
|
|
|
|
2009-05-28 01:32:31 +04:00
|
|
|
CARP_LOG(sc, ("state transition from: %s -> to: %s", carp_states[sc->sc_state], carp_states[state]));
|
|
|
|
|
2006-05-18 13:05:49 +04:00
|
|
|
sc->sc_state = state;
|
|
|
|
switch (state) {
|
|
|
|
case BACKUP:
|
2017-05-12 12:22:01 +03:00
|
|
|
link_state = LINK_STATE_DOWN;
|
2006-05-18 13:05:49 +04:00
|
|
|
break;
|
|
|
|
case MASTER:
|
2017-05-12 12:22:01 +03:00
|
|
|
link_state = LINK_STATE_UP;
|
2006-05-18 13:05:49 +04:00
|
|
|
break;
|
|
|
|
default:
|
2017-05-12 12:22:01 +03:00
|
|
|
link_state = LINK_STATE_UNKNOWN;
|
2006-05-18 13:05:49 +04:00
|
|
|
break;
|
|
|
|
}
|
2017-12-06 12:54:47 +03:00
|
|
|
/*
|
|
|
|
* The lock is needed to serialize a call of
|
|
|
|
* if_link_state_change_softint from here and a call from softint.
|
|
|
|
*/
|
|
|
|
KERNEL_LOCK(1, NULL);
|
2017-05-19 11:53:51 +03:00
|
|
|
if_link_state_change_softint(&sc->sc_if, link_state);
|
2017-12-06 12:54:47 +03:00
|
|
|
KERNEL_UNLOCK_ONE(NULL);
|
2006-05-18 13:05:49 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
carp_carpdev_state(void *v)
|
|
|
|
{
|
|
|
|
struct carp_if *cif;
|
|
|
|
struct carp_softc *sc;
|
|
|
|
struct ifnet *ifp = v;
|
|
|
|
|
|
|
|
if (ifp->if_type == IFT_CARP)
|
|
|
|
return;
|
|
|
|
|
|
|
|
cif = (struct carp_if *)ifp->if_carp;
|
|
|
|
|
|
|
|
TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
|
|
|
|
int suppressed = sc->sc_suppress;
|
|
|
|
|
|
|
|
if (sc->sc_carpdev->if_link_state == LINK_STATE_DOWN ||
|
|
|
|
!(sc->sc_carpdev->if_flags & IFF_UP)) {
|
|
|
|
sc->sc_if.if_flags &= ~IFF_RUNNING;
|
|
|
|
callout_stop(&sc->sc_ad_tmo);
|
|
|
|
callout_stop(&sc->sc_md_tmo);
|
|
|
|
callout_stop(&sc->sc_md6_tmo);
|
|
|
|
carp_set_state(sc, INIT);
|
|
|
|
sc->sc_suppress = 1;
|
|
|
|
carp_setrun(sc, 0);
|
|
|
|
if (!suppressed) {
|
|
|
|
carp_suppress_preempt++;
|
|
|
|
if (carp_suppress_preempt == 1)
|
|
|
|
carp_send_ad_all();
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
carp_set_state(sc, INIT);
|
|
|
|
sc->sc_suppress = 0;
|
|
|
|
carp_setrun(sc, 0);
|
|
|
|
if (suppressed)
|
|
|
|
carp_suppress_preempt--;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-07-31 04:56:23 +04:00
|
|
|
static int
|
2006-05-18 13:05:49 +04:00
|
|
|
carp_ether_addmulti(struct carp_softc *sc, struct ifreq *ifr)
|
|
|
|
{
|
2007-09-19 09:25:33 +04:00
|
|
|
const struct sockaddr *sa = ifreq_getaddr(SIOCADDMULTI, ifr);
|
2006-05-18 13:05:49 +04:00
|
|
|
struct ifnet *ifp;
|
|
|
|
struct carp_mc_entry *mc;
|
|
|
|
u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
|
|
|
|
int error;
|
|
|
|
|
|
|
|
ifp = sc->sc_carpdev;
|
|
|
|
if (ifp == NULL)
|
|
|
|
return (EINVAL);
|
|
|
|
|
2007-09-19 09:25:33 +04:00
|
|
|
error = ether_addmulti(sa, &sc->sc_ac);
|
2006-05-18 13:05:49 +04:00
|
|
|
if (error != ENETRESET)
|
|
|
|
return (error);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This is new multicast address. We have to tell parent
|
|
|
|
* about it. Also, remember this multicast address so that
|
|
|
|
* we can delete them on unconfigure.
|
|
|
|
*/
|
2008-12-17 23:51:31 +03:00
|
|
|
mc = malloc(sizeof(struct carp_mc_entry), M_DEVBUF, M_NOWAIT);
|
2006-05-18 13:05:49 +04:00
|
|
|
if (mc == NULL) {
|
|
|
|
error = ENOMEM;
|
|
|
|
goto alloc_failed;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* As ether_addmulti() returns ENETRESET, following two
|
|
|
|
* statement shouldn't fail.
|
|
|
|
*/
|
2007-09-19 09:25:33 +04:00
|
|
|
(void)ether_multiaddr(sa, addrlo, addrhi);
|
2018-06-14 11:06:07 +03:00
|
|
|
|
|
|
|
ETHER_LOCK(&sc->sc_ac);
|
2018-06-14 10:54:57 +03:00
|
|
|
mc->mc_enm = ether_lookup_multi(addrlo, addrhi, &sc->sc_ac);
|
2018-06-14 11:06:07 +03:00
|
|
|
ETHER_UNLOCK(&sc->sc_ac);
|
|
|
|
|
2007-09-19 09:25:33 +04:00
|
|
|
memcpy(&mc->mc_addr, sa, sa->sa_len);
|
2006-05-18 13:05:49 +04:00
|
|
|
LIST_INSERT_HEAD(&sc->carp_mc_listhead, mc, mc_entries);
|
|
|
|
|
2011-10-19 05:52:22 +04:00
|
|
|
error = if_mcast_op(ifp, SIOCADDMULTI, sa);
|
2006-05-18 13:05:49 +04:00
|
|
|
if (error != 0)
|
|
|
|
goto ioctl_failed;
|
|
|
|
|
|
|
|
return (error);
|
|
|
|
|
|
|
|
ioctl_failed:
|
|
|
|
LIST_REMOVE(mc, mc_entries);
|
2008-12-17 23:51:31 +03:00
|
|
|
free(mc, M_DEVBUF);
|
2006-05-18 13:05:49 +04:00
|
|
|
alloc_failed:
|
2007-09-19 09:25:33 +04:00
|
|
|
(void)ether_delmulti(sa, &sc->sc_ac);
|
2006-05-18 13:05:49 +04:00
|
|
|
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
2014-07-31 04:56:23 +04:00
|
|
|
static int
|
2006-05-18 13:05:49 +04:00
|
|
|
carp_ether_delmulti(struct carp_softc *sc, struct ifreq *ifr)
|
|
|
|
{
|
2007-09-19 09:25:33 +04:00
|
|
|
const struct sockaddr *sa = ifreq_getaddr(SIOCDELMULTI, ifr);
|
2006-05-18 13:05:49 +04:00
|
|
|
struct ifnet *ifp;
|
|
|
|
struct ether_multi *enm;
|
|
|
|
struct carp_mc_entry *mc;
|
|
|
|
u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
|
|
|
|
int error;
|
|
|
|
|
|
|
|
ifp = sc->sc_carpdev;
|
|
|
|
if (ifp == NULL)
|
|
|
|
return (EINVAL);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Find a key to lookup carp_mc_entry. We have to do this
|
|
|
|
* before calling ether_delmulti for obvious reason.
|
|
|
|
*/
|
2007-09-19 09:25:33 +04:00
|
|
|
if ((error = ether_multiaddr(sa, addrlo, addrhi)) != 0)
|
2006-05-18 13:05:49 +04:00
|
|
|
return (error);
|
2018-06-14 11:06:07 +03:00
|
|
|
|
|
|
|
ETHER_LOCK(&sc->sc_ac);
|
2018-06-14 10:54:57 +03:00
|
|
|
enm = ether_lookup_multi(addrlo, addrhi, &sc->sc_ac);
|
2018-06-14 11:06:07 +03:00
|
|
|
ETHER_UNLOCK(&sc->sc_ac);
|
2006-05-18 13:05:49 +04:00
|
|
|
if (enm == NULL)
|
|
|
|
return (EINVAL);
|
|
|
|
|
|
|
|
LIST_FOREACH(mc, &sc->carp_mc_listhead, mc_entries)
|
|
|
|
if (mc->mc_enm == enm)
|
|
|
|
break;
|
|
|
|
|
|
|
|
/* We won't delete entries we didn't add */
|
|
|
|
if (mc == NULL)
|
|
|
|
return (EINVAL);
|
|
|
|
|
2007-09-19 09:25:33 +04:00
|
|
|
error = ether_delmulti(sa, &sc->sc_ac);
|
2006-05-18 13:05:49 +04:00
|
|
|
if (error != ENETRESET)
|
|
|
|
return (error);
|
|
|
|
|
|
|
|
/* We no longer use this multicast address. Tell parent so. */
|
2011-10-19 05:52:22 +04:00
|
|
|
error = if_mcast_op(ifp, SIOCDELMULTI, sa);
|
2006-05-18 13:05:49 +04:00
|
|
|
if (error == 0) {
|
|
|
|
/* And forget about this address. */
|
|
|
|
LIST_REMOVE(mc, mc_entries);
|
2008-12-17 23:51:31 +03:00
|
|
|
free(mc, M_DEVBUF);
|
2006-05-18 13:05:49 +04:00
|
|
|
} else
|
2007-09-19 09:25:33 +04:00
|
|
|
(void)ether_addmulti(sa, &sc->sc_ac);
|
2006-05-18 13:05:49 +04:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Delete any multicast address we have asked to add from parent
|
|
|
|
* interface. Called when the carp is being unconfigured.
|
|
|
|
*/
|
2014-07-31 04:56:23 +04:00
|
|
|
static void
|
2006-05-18 13:05:49 +04:00
|
|
|
carp_ether_purgemulti(struct carp_softc *sc)
|
|
|
|
{
|
|
|
|
struct ifnet *ifp = sc->sc_carpdev; /* Parent. */
|
|
|
|
struct carp_mc_entry *mc;
|
|
|
|
|
|
|
|
if (ifp == NULL)
|
|
|
|
return;
|
|
|
|
|
|
|
|
while ((mc = LIST_FIRST(&sc->carp_mc_listhead)) != NULL) {
|
2011-10-19 05:52:22 +04:00
|
|
|
(void)if_mcast_op(ifp, SIOCDELMULTI, sstosa(&mc->mc_addr));
|
2006-05-18 13:05:49 +04:00
|
|
|
LIST_REMOVE(mc, mc_entries);
|
2008-12-17 23:51:31 +03:00
|
|
|
free(mc, M_DEVBUF);
|
2006-05-18 13:05:49 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-04-15 10:03:28 +04:00
|
|
|
static int
|
|
|
|
sysctl_net_inet_carp_stats(SYSCTLFN_ARGS)
|
|
|
|
{
|
|
|
|
|
2008-05-04 11:22:14 +04:00
|
|
|
return (NETSTAT_SYSCTL(carpstat_percpu, CARP_NSTATS));
|
2008-04-15 10:03:28 +04:00
|
|
|
}
|
|
|
|
|
2009-09-16 19:23:04 +04:00
|
|
|
void
|
|
|
|
carp_init(void)
|
|
|
|
{
|
|
|
|
|
|
|
|
sysctl_net_inet_carp_setup(NULL);
|
2014-04-04 16:53:04 +04:00
|
|
|
#ifdef MBUFTRACE
|
|
|
|
MOWNER_ATTACH(&carp_proto_mowner_rx);
|
|
|
|
MOWNER_ATTACH(&carp_proto_mowner_tx);
|
|
|
|
MOWNER_ATTACH(&carp_proto6_mowner_rx);
|
|
|
|
MOWNER_ATTACH(&carp_proto6_mowner_tx);
|
|
|
|
#endif
|
2017-02-02 05:52:10 +03:00
|
|
|
|
|
|
|
carp_wqinput = wqinput_create("carp", _carp_proto_input);
|
|
|
|
#ifdef INET6
|
|
|
|
carp6_wqinput = wqinput_create("carp6", _carp6_proto_input);
|
|
|
|
#endif
|
2009-09-16 19:23:04 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
sysctl_net_inet_carp_setup(struct sysctllog **clog)
|
2006-05-18 13:05:49 +04:00
|
|
|
{
|
|
|
|
|
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT,
|
|
|
|
CTLTYPE_NODE, "inet", NULL,
|
|
|
|
NULL, 0, NULL, 0,
|
|
|
|
CTL_NET, PF_INET, CTL_EOL);
|
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT,
|
|
|
|
CTLTYPE_NODE, "carp",
|
|
|
|
SYSCTL_DESCR("CARP related settings"),
|
|
|
|
NULL, 0, NULL, 0,
|
|
|
|
CTL_NET, PF_INET, IPPROTO_CARP, CTL_EOL);
|
|
|
|
|
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
|
|
|
CTLTYPE_INT, "preempt",
|
|
|
|
SYSCTL_DESCR("Enable CARP Preempt"),
|
|
|
|
NULL, 0, &carp_opts[CARPCTL_PREEMPT], 0,
|
|
|
|
CTL_NET, PF_INET, IPPROTO_CARP,
|
|
|
|
CTL_CREATE, CTL_EOL);
|
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
|
|
|
CTLTYPE_INT, "arpbalance",
|
|
|
|
SYSCTL_DESCR("Enable ARP balancing"),
|
|
|
|
NULL, 0, &carp_opts[CARPCTL_ARPBALANCE], 0,
|
|
|
|
CTL_NET, PF_INET, IPPROTO_CARP,
|
|
|
|
CTL_CREATE, CTL_EOL);
|
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
|
|
|
CTLTYPE_INT, "allow",
|
|
|
|
SYSCTL_DESCR("Enable CARP"),
|
|
|
|
NULL, 0, &carp_opts[CARPCTL_ALLOW], 0,
|
|
|
|
CTL_NET, PF_INET, IPPROTO_CARP,
|
|
|
|
CTL_CREATE, CTL_EOL);
|
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
|
|
|
CTLTYPE_INT, "log",
|
|
|
|
SYSCTL_DESCR("CARP logging"),
|
|
|
|
NULL, 0, &carp_opts[CARPCTL_LOG], 0,
|
|
|
|
CTL_NET, PF_INET, IPPROTO_CARP,
|
|
|
|
CTL_CREATE, CTL_EOL);
|
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT,
|
|
|
|
CTLTYPE_STRUCT, "stats",
|
2006-05-25 19:22:05 +04:00
|
|
|
SYSCTL_DESCR("CARP statistics"),
|
2008-04-15 10:03:28 +04:00
|
|
|
sysctl_net_inet_carp_stats, 0, NULL, 0,
|
2006-05-18 13:05:49 +04:00
|
|
|
CTL_NET, PF_INET, IPPROTO_CARP, CARPCTL_STATS,
|
|
|
|
CTL_EOL);
|
|
|
|
}
|