NetBSD/sys/netinet/in_gif.c
knakahara 2da350beca Avoid having a rtcache directly in a percpu storage for tunnel protocols.
percpu(9) has a certain memory storage for each CPU and provides it by the piece
to users.  If the storages went short, percpu(9) enlarges them by allocating new
larger memory areas, replacing old ones with them and destroying the old ones.
A percpu storage referenced by a pointer gotten via percpu_getref can be
destroyed by the mechanism after a running thread sleeps even if percpu_putref
has not been called.

Using rtcache, i.e., packet processing, typically involves sleepable operations
such as rwlock so we must avoid dereferencing a rtcache that is directly stored
in a percpu storage during packet processing.  Address this situation by having
just a pointer to a rtcache in a percpu storage instead.

Reviewed by ozaki-r@ and yamaguchi@
2019-09-19 06:07:24 +00:00

400 lines
9.7 KiB
C

/* $NetBSD: in_gif.c,v 1.95 2019/09/19 06:07:25 knakahara Exp $ */
/* $KAME: in_gif.c,v 1.66 2001/07/29 04:46:09 itojun Exp $ */
/*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the project nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: in_gif.c,v 1.95 2019/09/19 06:07:25 knakahara Exp $");
#ifdef _KERNEL_OPT
#include "opt_inet.h"
#endif
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/mbuf.h>
#include <sys/errno.h>
#include <sys/ioctl.h>
#include <sys/syslog.h>
#include <sys/kernel.h>
#include <net/if.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/ip_var.h>
#include <netinet/in_gif.h>
#include <netinet/in_var.h>
#include <netinet/ip_encap.h>
#include <netinet/ip_ecn.h>
#ifdef INET6
#include <netinet/ip6.h>
#endif
#include <net/if_gif.h>
static int gif_validate4(const struct ip *, struct gif_variant *,
struct ifnet *);
int ip_gif_ttl = GIF_TTL;
static const struct encapsw in_gif_encapsw = {
.encapsw4 = {
.pr_input = in_gif_input,
.pr_ctlinput = NULL,
}
};
static int
in_gif_output(struct gif_variant *var, int family, struct mbuf *m)
{
struct rtentry *rt;
struct gif_softc *sc;
struct sockaddr_in *sin_src;
struct sockaddr_in *sin_dst;
struct ifnet *ifp;
struct route *ro_pc;
kmutex_t *lock_pc;
struct ip iphdr; /* capsule IP header, host byte ordered */
int proto, error;
u_int8_t tos;
KASSERT(gif_heldref_variant(var));
sin_src = satosin(var->gv_psrc);
sin_dst = satosin(var->gv_pdst);
ifp = &var->gv_softc->gif_if;
if (sin_src == NULL || sin_dst == NULL ||
sin_src->sin_family != AF_INET ||
sin_dst->sin_family != AF_INET) {
m_freem(m);
return EAFNOSUPPORT;
}
switch (family) {
#ifdef INET
case AF_INET:
{
const struct ip *ip;
proto = IPPROTO_IPV4;
if (m->m_len < sizeof(*ip)) {
m = m_pullup(m, sizeof(*ip));
if (m == NULL)
return ENOBUFS;
}
ip = mtod(m, const struct ip *);
tos = ip->ip_tos;
break;
}
#endif /* INET */
#ifdef INET6
case AF_INET6:
{
const struct ip6_hdr *ip6;
proto = IPPROTO_IPV6;
if (m->m_len < sizeof(*ip6)) {
m = m_pullup(m, sizeof(*ip6));
if (m == NULL)
return ENOBUFS;
}
ip6 = mtod(m, const struct ip6_hdr *);
tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
break;
}
#endif /* INET6 */
default:
#ifdef DEBUG
printf("in_gif_output: warning: unknown family %d passed\n",
family);
#endif
m_freem(m);
return EAFNOSUPPORT;
}
memset(&iphdr, 0, sizeof(iphdr));
iphdr.ip_src = sin_src->sin_addr;
/* bidirectional configured tunnel mode */
if (sin_dst->sin_addr.s_addr != INADDR_ANY)
iphdr.ip_dst = sin_dst->sin_addr;
else {
m_freem(m);
return ENETUNREACH;
}
iphdr.ip_p = proto;
/* version will be set in ip_output() */
iphdr.ip_ttl = ip_gif_ttl;
iphdr.ip_len = htons(m->m_pkthdr.len + sizeof(struct ip));
if (ifp->if_flags & IFF_LINK1)
ip_ecn_ingress(ECN_ALLOWED, &iphdr.ip_tos, &tos);
else
ip_ecn_ingress(ECN_NOCARE, &iphdr.ip_tos, &tos);
/* prepend new IP header */
M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
/* XXX Is m_pullup really necessary after M_PREPEND? */
if (m != NULL && M_UNWRITABLE(m, sizeof(struct ip)))
m = m_pullup(m, sizeof(struct ip));
if (m == NULL)
return ENOBUFS;
bcopy(&iphdr, mtod(m, struct ip *), sizeof(struct ip));
sc = var->gv_softc;
if_tunnel_get_ro(sc->gif_ro_percpu, &ro_pc, &lock_pc);
if ((rt = rtcache_lookup(ro_pc, var->gv_pdst)) == NULL) {
if_tunnel_put_ro(sc->gif_ro_percpu, lock_pc);
m_freem(m);
return ENETUNREACH;
}
/* If the route constitutes infinite encapsulation, punt. */
if (rt->rt_ifp == ifp) {
rtcache_unref(rt, ro_pc);
rtcache_free(ro_pc);
if_tunnel_put_ro(sc->gif_ro_percpu, lock_pc);
m_freem(m);
return ENETUNREACH; /*XXX*/
}
rtcache_unref(rt, ro_pc);
error = ip_output(m, NULL, ro_pc, 0, NULL, NULL);
if_tunnel_put_ro(sc->gif_ro_percpu, lock_pc);
return (error);
}
void
in_gif_input(struct mbuf *m, int off, int proto, void *eparg)
{
struct gif_softc *sc = eparg;
struct ifnet *gifp = &sc->gif_if;
const struct ip *ip;
int af;
u_int8_t otos;
KASSERT(sc != NULL);
ip = mtod(m, const struct ip *);
gifp = &sc->gif_if;
if ((gifp->if_flags & IFF_UP) == 0) {
m_freem(m);
ip_statinc(IP_STAT_NOGIF);
return;
}
#ifndef GIF_ENCAPCHECK
struct psref psref_var;
struct gif_variant *var = gif_getref_variant(sc, &psref_var);
/* other CPU do delete_tunnel */
if (var->gv_psrc == NULL || var->gv_pdst == NULL) {
gif_putref_variant(var, &psref_var);
m_freem(m);
ip_statinc(IP_STAT_NOGIF);
return;
}
struct ifnet *rcvif;
struct psref psref_rcvif;
rcvif = m_get_rcvif_psref(m, &psref_rcvif);
if (!gif_validate4(ip, var, rcvif)) {
m_put_rcvif_psref(rcvif, &psref_rcvif);
gif_putref_variant(var, &psref_var);
m_freem(m);
ip_statinc(IP_STAT_NOGIF);
return;
}
m_put_rcvif_psref(rcvif, &psref_rcvif);
gif_putref_variant(var, &psref_var);
#endif
otos = ip->ip_tos;
m_adj(m, off);
switch (proto) {
#ifdef INET
case IPPROTO_IPV4:
{
struct ip *xip;
af = AF_INET;
if (M_UNWRITABLE(m, sizeof(*xip))) {
if ((m = m_pullup(m, sizeof(*xip))) == NULL)
return;
}
xip = mtod(m, struct ip *);
if (gifp->if_flags & IFF_LINK1)
ip_ecn_egress(ECN_ALLOWED, &otos, &xip->ip_tos);
else
ip_ecn_egress(ECN_NOCARE, &otos, &xip->ip_tos);
break;
}
#endif
#ifdef INET6
case IPPROTO_IPV6:
{
struct ip6_hdr *ip6;
u_int8_t itos;
af = AF_INET6;
if (M_UNWRITABLE(m, sizeof(*ip6))) {
if ((m = m_pullup(m, sizeof(*ip6))) == NULL)
return;
}
ip6 = mtod(m, struct ip6_hdr *);
itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
if (gifp->if_flags & IFF_LINK1)
ip_ecn_egress(ECN_ALLOWED, &otos, &itos);
else
ip_ecn_egress(ECN_NOCARE, &otos, &itos);
ip6->ip6_flow &= ~htonl(0xff << 20);
ip6->ip6_flow |= htonl((u_int32_t)itos << 20);
break;
}
#endif /* INET6 */
default:
ip_statinc(IP_STAT_NOGIF);
m_freem(m);
return;
}
gif_input(m, af, gifp);
return;
}
/*
* validate outer address.
*/
static int
gif_validate4(const struct ip *ip, struct gif_variant *var, struct ifnet *ifp)
{
struct sockaddr_in *src, *dst;
int ret;
src = satosin(var->gv_psrc);
dst = satosin(var->gv_pdst);
ret = in_tunnel_validate(ip, src->sin_addr, dst->sin_addr);
if (ret == 0)
return 0;
/* ingress filters on outer source */
if ((var->gv_softc->gif_if.if_flags & IFF_LINK2) == 0 && ifp) {
union {
struct sockaddr sa;
struct sockaddr_in sin;
} u;
struct rtentry *rt;
sockaddr_in_init(&u.sin, &ip->ip_src, 0);
rt = rtalloc1(&u.sa, 0);
if (rt == NULL || rt->rt_ifp != ifp) {
#if 0
log(LOG_WARNING, "%s: packet from 0x%x dropped "
"due to ingress filter\n",
if_name(&var->gv_softc->gif_if),
(u_int32_t)ntohl(u.sin.sin_addr.s_addr));
#endif
if (rt != NULL)
rt_unref(rt);
return 0;
}
rt_unref(rt);
}
return ret;
}
#ifdef GIF_ENCAPCHECK
/*
* we know that we are in IFF_UP, outer address available, and outer family
* matched the physical addr family. see gif_encapcheck().
*/
int
gif_encapcheck4(struct mbuf *m, int off, int proto, struct gif_variant *var)
{
struct ip ip;
struct ifnet *ifp = NULL;
int r;
struct psref psref;
m_copydata(m, 0, sizeof(ip), &ip);
if ((m->m_flags & M_PKTHDR) != 0)
ifp = m_get_rcvif_psref(m, &psref);
r = gif_validate4(&ip, var, ifp);
m_put_rcvif_psref(ifp, &psref);
return r;
}
#endif
int
in_gif_attach(struct gif_variant *var)
{
#ifndef GIF_ENCAPCHECK
struct sockaddr_in mask4;
memset(&mask4, 0, sizeof(mask4));
mask4.sin_len = sizeof(struct sockaddr_in);
mask4.sin_addr.s_addr = ~0;
if (!var->gv_psrc || !var->gv_pdst)
return EINVAL;
var->gv_encap_cookie4 = encap_attach(AF_INET, -1, var->gv_psrc,
(struct sockaddr *)&mask4, var->gv_pdst, (struct sockaddr *)&mask4,
&in_gif_encapsw, var->gv_softc);
#else
var->gv_encap_cookie4 = encap_attach_func(AF_INET, -1, gif_encapcheck,
&in_gif_encapsw, var->gv_softc);
#endif
if (var->gv_encap_cookie4 == NULL)
return EEXIST;
var->gv_output = in_gif_output;
return 0;
}
int
in_gif_detach(struct gif_variant *var)
{
int error;
struct gif_softc *sc = var->gv_softc;
error = encap_detach(var->gv_encap_cookie4);
if (error == 0)
var->gv_encap_cookie4 = NULL;
if_tunnel_ro_percpu_rtcache_free(sc->gif_ro_percpu);
return error;
}