From 6ea8c2e666c31db6af559e3fb8e22452d94d96d4 Mon Sep 17 00:00:00 2001 From: ozaki-r Date: Thu, 4 Jun 2015 09:19:59 +0000 Subject: [PATCH] Pull out route lookups from L2 output routines Route lookups for routes of RTF_GATEWAY were done in L2 output routines such as ether_output, but they should be done in L3 i.e., before L2 output routines. This change places the lookups between L3 output routines (say ip_output) and the L2 output routines. The change is based on dyoung's patch submitted in the thread: https://mail-index.netbsd.org/tech-net/2013/02/01/msg003847.html You can find out detailed investigations by dyoung about the issue in there. Note that the change introduces a workaround for MPLS. ether_output knew that it needs to fill the ethertype of a frame as MPLS, based on a tag of an original route (rtentry), but now we don't pass it to ehter_output. So we have to tell that in another way. We use mtag to do so for now, which introduces some overhead. We should fix it somehow in the future. Discussed on tech-kern and tech-net. --- sys/net/if_arcsubr.c | 30 +------ sys/net/if_atmsubr.c | 34 +------- sys/net/if_ecosubr.c | 39 +-------- sys/net/if_ethersubr.c | 54 +++--------- sys/net/if_fddisubr.c | 33 +------ sys/net/if_hippisubr.c | 40 +-------- sys/net/if_ieee1394subr.c | 41 +-------- sys/net/if_mpls.c | 9 +- sys/netinet/in_offload.c | 11 +-- sys/netinet/ip_output.c | 178 ++++++++++++++++++++++++++++++++++---- sys/netinet/ip_var.h | 5 +- sys/sys/mbuf.h | 4 +- 12 files changed, 210 insertions(+), 268 deletions(-) diff --git a/sys/net/if_arcsubr.c b/sys/net/if_arcsubr.c index b5aab2a72423..d2e5cc2c3c3e 100644 --- a/sys/net/if_arcsubr.c +++ b/sys/net/if_arcsubr.c @@ -1,4 +1,4 @@ -/* $NetBSD: if_arcsubr.c,v 1.66 2014/06/05 23:48:16 rmind Exp $ */ +/* $NetBSD: if_arcsubr.c,v 1.67 2015/06/04 09:19:59 ozaki-r Exp $ */ /* * Copyright (c) 1994, 1995 Ignatios Souvatzis @@ -35,7 +35,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: if_arcsubr.c,v 1.66 2014/06/05 23:48:16 rmind Exp $"); +__KERNEL_RCSID(0, "$NetBSD: if_arcsubr.c,v 1.67 2015/06/04 09:19:59 ozaki-r Exp $"); #include "opt_inet.h" @@ -111,10 +111,9 @@ static void arc_input(struct ifnet *, struct mbuf *); */ static int arc_output(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst, - struct rtentry *rt0) + struct rtentry *rt) { struct mbuf *m, *m1, *mcopy; - struct rtentry *rt; struct arccom *ac; const struct arc_header *cah; struct arc_header *ah; @@ -134,29 +133,6 @@ arc_output(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst, myself = *CLLADDR(ifp->if_sadl); - if ((rt = rt0)) { - if ((rt->rt_flags & RTF_UP) == 0) { - if ((rt0 = rt = rtalloc1(dst, 1))) - rt->rt_refcnt--; - else - senderr(EHOSTUNREACH); - } - if (rt->rt_flags & RTF_GATEWAY) { - if (rt->rt_gwroute == 0) - goto lookup; - if (((rt = rt->rt_gwroute)->rt_flags & RTF_UP) == 0) { - rtfree(rt); rt = rt0; - lookup: rt->rt_gwroute = rtalloc1(rt->rt_gateway, 1); - if ((rt = rt->rt_gwroute) == 0) - senderr(EHOSTUNREACH); - } - } - if (rt->rt_flags & RTF_REJECT) - if (rt->rt_rmx.rmx_expire == 0 || - time_second < rt->rt_rmx.rmx_expire) - senderr(rt == rt0 ? EHOSTDOWN : EHOSTUNREACH); - } - /* * if the queueing discipline needs packet classification, * do it before prepending link headers. diff --git a/sys/net/if_atmsubr.c b/sys/net/if_atmsubr.c index 899a9babcade..11ea1f062626 100644 --- a/sys/net/if_atmsubr.c +++ b/sys/net/if_atmsubr.c @@ -1,4 +1,4 @@ -/* $NetBSD: if_atmsubr.c,v 1.52 2014/06/05 23:48:16 rmind Exp $ */ +/* $NetBSD: if_atmsubr.c,v 1.53 2015/06/04 09:19:59 ozaki-r Exp $ */ /* * Copyright (c) 1996 Charles D. Cranor and Washington University. @@ -30,7 +30,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: if_atmsubr.c,v 1.52 2014/06/05 23:48:16 rmind Exp $"); +__KERNEL_RCSID(0, "$NetBSD: if_atmsubr.c,v 1.53 2015/06/04 09:19:59 ozaki-r Exp $"); #include "opt_inet.h" #include "opt_gateway.h" @@ -89,13 +89,12 @@ __KERNEL_RCSID(0, "$NetBSD: if_atmsubr.c,v 1.52 2014/06/05 23:48:16 rmind Exp $" int atm_output(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst, - struct rtentry *rt0) + struct rtentry *rt) { uint16_t etype = 0; /* if using LLC/SNAP */ int error = 0, sz; struct atm_pseudohdr atmdst, *ad; struct mbuf *m = m0; - struct rtentry *rt; struct atmllc *atmllc; uint32_t atm_flags; ALTQ_DECL(struct altq_pktattr pktattr;) @@ -110,33 +109,6 @@ atm_output(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst, IFQ_CLASSIFY(&ifp->if_snd, m, (dst != NULL ? dst->sa_family : AF_UNSPEC), &pktattr); - /* - * check route - */ - if ((rt = rt0) != NULL) { - - if ((rt->rt_flags & RTF_UP) == 0) { /* route went down! */ - if ((rt0 = rt = RTALLOC1(dst, 0)) != NULL) - rt->rt_refcnt--; - else - senderr(EHOSTUNREACH); - } - - if (rt->rt_flags & RTF_GATEWAY) { - if (rt->rt_gwroute == 0) - goto lookup; - if (((rt = rt->rt_gwroute)->rt_flags & RTF_UP) == 0) { - rtfree(rt); rt = rt0; - lookup: rt->rt_gwroute = RTALLOC1(rt->rt_gateway, 0); - if ((rt = rt->rt_gwroute) == 0) - senderr(EHOSTUNREACH); - } - } - - /* XXX: put RTF_REJECT code here if doing ATMARP */ - - } - /* * check for non-native ATM traffic (dst != NULL) */ diff --git a/sys/net/if_ecosubr.c b/sys/net/if_ecosubr.c index 0337a6fa0946..df123e9db517 100644 --- a/sys/net/if_ecosubr.c +++ b/sys/net/if_ecosubr.c @@ -1,4 +1,4 @@ -/* $NetBSD: if_ecosubr.c,v 1.42 2015/05/20 09:17:18 ozaki-r Exp $ */ +/* $NetBSD: if_ecosubr.c,v 1.43 2015/06/04 09:19:59 ozaki-r Exp $ */ /*- * Copyright (c) 2001 Ben Harris @@ -58,7 +58,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: if_ecosubr.c,v 1.42 2015/05/20 09:17:18 ozaki-r Exp $"); +__KERNEL_RCSID(0, "$NetBSD: if_ecosubr.c,v 1.43 2015/06/04 09:19:59 ozaki-r Exp $"); #include "opt_inet.h" @@ -161,12 +161,11 @@ eco_stop(struct ifnet *ifp, int disable) static int eco_output(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst, - struct rtentry *rt0) + struct rtentry *rt) { struct eco_header ehdr, *eh; int error; struct mbuf *m = m0, *mcopy = NULL; - struct rtentry *rt; int hdrcmplt; int retry_delay, retry_count; struct m_tag *mtag; @@ -181,38 +180,6 @@ eco_output(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst, if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) senderr(ENETDOWN); - if ((rt = rt0) != NULL) { - if ((rt->rt_flags & RTF_UP) == 0) { - if ((rt0 = rt = rtalloc1(dst, 1)) != NULL) { - rt->rt_refcnt--; - if (rt->rt_ifp != ifp) - return (*rt->rt_ifp->if_output) - (ifp, m0, dst, rt); - } else - senderr(EHOSTUNREACH); - } - if ((rt->rt_flags & RTF_GATEWAY)) { - if (rt->rt_gwroute == 0) - goto lookup; - if (((rt = rt->rt_gwroute)->rt_flags & RTF_UP) == 0) { - rtfree(rt); rt = rt0; - lookup: rt->rt_gwroute = rtalloc1(rt->rt_gateway, 1); - if ((rt = rt->rt_gwroute) == 0) - senderr(EHOSTUNREACH); - /* the "G" test below also prevents rt == rt0 */ - if ((rt->rt_flags & RTF_GATEWAY) || - (rt->rt_ifp != ifp)) { - rt->rt_refcnt--; - rt0->rt_gwroute = 0; - senderr(EHOSTUNREACH); - } - } - } - if (rt->rt_flags & RTF_REJECT) - if (rt->rt_rmx.rmx_expire == 0 || - time_second < rt->rt_rmx.rmx_expire) - senderr(rt == rt0 ? EHOSTDOWN : EHOSTUNREACH); - } /* * If the queueing discipline needs packet classification, * do it before prepending link headers. diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c index 0b8e6c28ea7c..2aa05847a58f 100644 --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -1,4 +1,4 @@ -/* $NetBSD: if_ethersubr.c,v 1.209 2015/05/25 08:29:01 ozaki-r Exp $ */ +/* $NetBSD: if_ethersubr.c,v 1.210 2015/06/04 09:19:59 ozaki-r Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. @@ -61,7 +61,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: if_ethersubr.c,v 1.209 2015/05/25 08:29:01 ozaki-r Exp $"); +__KERNEL_RCSID(0, "$NetBSD: if_ethersubr.c,v 1.210 2015/06/04 09:19:59 ozaki-r Exp $"); #include "opt_inet.h" #include "opt_atalk.h" @@ -192,13 +192,12 @@ static int ether_output(struct ifnet *, struct mbuf *, static int ether_output(struct ifnet * const ifp0, struct mbuf * const m0, const struct sockaddr * const dst, - struct rtentry *rt0) + struct rtentry *rt) { uint16_t etype = 0; int error = 0, hdrcmplt = 0; uint8_t esrc[6], edst[6]; struct mbuf *m = m0; - struct rtentry *rt; struct mbuf *mcopy = NULL; struct ether_header *eh; struct ifnet *ifp = ifp0; @@ -226,7 +225,7 @@ ether_output(struct ifnet * const ifp0, struct mbuf * const m0, if (dst != NULL && ifp0->if_link_state == LINK_STATE_UP && (ifa = ifa_ifwithaddr(dst)) != NULL && ifa->ifa_ifp == ifp0) - return looutput(ifp0, m, dst, rt0); + return looutput(ifp0, m, dst, rt); ifp = ifp->if_carpdev; /* ac = (struct arpcom *)ifp; */ @@ -239,38 +238,6 @@ ether_output(struct ifnet * const ifp0, struct mbuf * const m0, if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) senderr(ENETDOWN); - if ((rt = rt0) != NULL) { - if ((rt->rt_flags & RTF_UP) == 0) { - if ((rt0 = rt = rtalloc1(dst, 1)) != NULL) { - rt->rt_refcnt--; - if (rt->rt_ifp != ifp) - return (*rt->rt_ifp->if_output) - (ifp, m0, dst, rt); - } else - senderr(EHOSTUNREACH); - } - if ((rt->rt_flags & RTF_GATEWAY)) { - if (rt->rt_gwroute == NULL) - goto lookup; - if (((rt = rt->rt_gwroute)->rt_flags & RTF_UP) == 0) { - rtfree(rt); rt = rt0; - lookup: rt->rt_gwroute = rtalloc1(rt->rt_gateway, 1); - if ((rt = rt->rt_gwroute) == NULL) - senderr(EHOSTUNREACH); - /* the "G" test below also prevents rt == rt0 */ - if ((rt->rt_flags & RTF_GATEWAY) || - (rt->rt_ifp != ifp)) { - rt->rt_refcnt--; - rt0->rt_gwroute = NULL; - senderr(EHOSTUNREACH); - } - } - } - if (rt->rt_flags & RTF_REJECT) - if (rt->rt_rmx.rmx_expire == 0 || - (u_long) time_second < rt->rt_rmx.rmx_expire) - senderr(rt == rt0 ? EHOSTDOWN : EHOSTUNREACH); - } switch (dst->sa_family) { @@ -386,13 +353,14 @@ ether_output(struct ifnet * const ifp0, struct mbuf * const m0, } #ifdef MPLS - if (rt0 != NULL && rt_gettag(rt0) != NULL && - rt_gettag(rt0)->sa_family == AF_MPLS && - (m->m_flags & (M_MCAST | M_BCAST)) == 0) { - union mpls_shim msh; - msh.s_addr = MPLS_GETSADDR(rt0); - if (msh.shim.label != MPLS_LABEL_IMPLNULL) + { + struct m_tag *mtag; + mtag = m_tag_find(m, PACKET_TAG_MPLS, NULL); + if (mtag != NULL) { + /* Having the tag itself indicates it's MPLS */ etype = htons(ETHERTYPE_MPLS); + m_tag_delete(m, mtag); + } } #endif diff --git a/sys/net/if_fddisubr.c b/sys/net/if_fddisubr.c index 5b4bf1dd6b74..2e7a4596a837 100644 --- a/sys/net/if_fddisubr.c +++ b/sys/net/if_fddisubr.c @@ -1,4 +1,4 @@ -/* $NetBSD: if_fddisubr.c,v 1.91 2015/05/25 08:31:34 ozaki-r Exp $ */ +/* $NetBSD: if_fddisubr.c,v 1.92 2015/06/04 09:19:59 ozaki-r Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. @@ -96,7 +96,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: if_fddisubr.c,v 1.91 2015/05/25 08:31:34 ozaki-r Exp $"); +__KERNEL_RCSID(0, "$NetBSD: if_fddisubr.c,v 1.92 2015/06/04 09:19:59 ozaki-r Exp $"); #include "opt_gateway.h" #include "opt_inet.h" @@ -185,13 +185,12 @@ static void fddi_input(struct ifnet *, struct mbuf *); */ static int fddi_output(struct ifnet *ifp0, struct mbuf *m0, const struct sockaddr *dst, - struct rtentry *rt0) + struct rtentry *rt) { uint16_t etype; int error = 0, hdrcmplt = 0; uint8_t esrc[6], edst[6]; struct mbuf *m = m0; - struct rtentry *rt; struct fddi_header *fh; struct mbuf *mcopy = NULL; struct ifnet *ifp = ifp0; @@ -207,7 +206,7 @@ fddi_output(struct ifnet *ifp0, struct mbuf *m0, const struct sockaddr *dst, if (dst != NULL && ifp0->if_link_state == LINK_STATE_UP && (ifa = ifa_ifwithaddr(dst)) != NULL && ifa->ifa_ifp == ifp0) - return (looutput(ifp0, m, dst, rt0)); + return (looutput(ifp0, m, dst, rt)); ifp = ifp->if_carpdev; /* ac = (struct arpcom *)ifp; */ @@ -219,30 +218,6 @@ fddi_output(struct ifnet *ifp0, struct mbuf *m0, const struct sockaddr *dst, #endif /* NCARP > 0 */ if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) senderr(ENETDOWN); -#if !defined(__bsdi__) || _BSDI_VERSION >= 199401 - if ((rt = rt0) != NULL) { - if ((rt->rt_flags & RTF_UP) == 0) { - if ((rt0 = rt = rtalloc1(dst, 1)) != NULL) - rt->rt_refcnt--; - else - senderr(EHOSTUNREACH); - } - if (rt->rt_flags & RTF_GATEWAY) { - if (rt->rt_gwroute == 0) - goto lookup; - if (((rt = rt->rt_gwroute)->rt_flags & RTF_UP) == 0) { - rtfree(rt); rt = rt0; - lookup: rt->rt_gwroute = rtalloc1(rt->rt_gateway, 1); - if ((rt = rt->rt_gwroute) == 0) - senderr(EHOSTUNREACH); - } - } - if (rt->rt_flags & RTF_REJECT) - if (rt->rt_rmx.rmx_expire == 0 || - time_second < rt->rt_rmx.rmx_expire) - senderr(rt == rt0 ? EHOSTDOWN : EHOSTUNREACH); - } -#endif /* * If the queueing discipline needs packet classification, diff --git a/sys/net/if_hippisubr.c b/sys/net/if_hippisubr.c index ff0cda9519de..07e4cf9b9bf9 100644 --- a/sys/net/if_hippisubr.c +++ b/sys/net/if_hippisubr.c @@ -1,4 +1,4 @@ -/* $NetBSD: if_hippisubr.c,v 1.42 2015/05/20 09:17:18 ozaki-r Exp $ */ +/* $NetBSD: if_hippisubr.c,v 1.43 2015/06/04 09:19:59 ozaki-r Exp $ */ /* * Copyright (c) 1982, 1989, 1993 @@ -30,7 +30,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: if_hippisubr.c,v 1.42 2015/05/20 09:17:18 ozaki-r Exp $"); +__KERNEL_RCSID(0, "$NetBSD: if_hippisubr.c,v 1.43 2015/06/04 09:19:59 ozaki-r Exp $"); #include "opt_inet.h" @@ -83,13 +83,12 @@ static void hippi_input(struct ifnet *, struct mbuf *); static int hippi_output(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst, - struct rtentry *rt0) + struct rtentry *rt) { uint16_t htype; uint32_t ifield = 0; int error = 0; struct mbuf *m = m0; - struct rtentry *rt; struct hippi_header *hh; uint32_t *cci; uint32_t d2_len; @@ -102,39 +101,6 @@ hippi_output(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst, if (m->m_flags & (M_BCAST | M_MCAST)) senderr(EOPNOTSUPP); /* XXX: some other error? */ - if ((rt = rt0) != NULL) { - if ((rt->rt_flags & RTF_UP) == 0) { - if ((rt0 = rt = rtalloc1(dst, 1)) != NULL) { - rt->rt_refcnt--; - if (rt->rt_ifp != ifp) - return (*rt->rt_ifp->if_output) - (ifp, m0, dst, rt); - } else - senderr(EHOSTUNREACH); - } - if ((rt->rt_flags & RTF_GATEWAY)) { - if (rt->rt_gwroute == 0) - goto lookup; - if (((rt = rt->rt_gwroute)->rt_flags & RTF_UP) == 0) { - rtfree(rt); rt = rt0; - lookup: rt->rt_gwroute = rtalloc1(rt->rt_gateway, 1); - if ((rt = rt->rt_gwroute) == 0) - senderr(EHOSTUNREACH); - /* the "G" test below also prevents rt == rt0 */ - if ((rt->rt_flags & RTF_GATEWAY) || - (rt->rt_ifp != ifp)) { - rt->rt_refcnt--; - rt0->rt_gwroute = 0; - senderr(EHOSTUNREACH); - } - } - } - if (rt->rt_flags & RTF_REJECT) - if (rt->rt_rmx.rmx_expire == 0 || /* XXX: no ARP */ - time_second < rt->rt_rmx.rmx_expire) - senderr(rt == rt0 ? EHOSTDOWN : EHOSTUNREACH); - } - /* * If the queueing discipline needs packet classification, * do it before prepending link headers. diff --git a/sys/net/if_ieee1394subr.c b/sys/net/if_ieee1394subr.c index cea0249cda0a..81bdbfd6430c 100644 --- a/sys/net/if_ieee1394subr.c +++ b/sys/net/if_ieee1394subr.c @@ -1,4 +1,4 @@ -/* $NetBSD: if_ieee1394subr.c,v 1.48 2014/11/28 08:29:00 ozaki-r Exp $ */ +/* $NetBSD: if_ieee1394subr.c,v 1.49 2015/06/04 09:19:59 ozaki-r Exp $ */ /* * Copyright (c) 2000 The NetBSD Foundation, Inc. @@ -30,7 +30,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: if_ieee1394subr.c,v 1.48 2014/11/28 08:29:00 ozaki-r Exp $"); +__KERNEL_RCSID(0, "$NetBSD: if_ieee1394subr.c,v 1.49 2015/06/04 09:19:59 ozaki-r Exp $"); #include "opt_inet.h" @@ -82,12 +82,11 @@ static struct mbuf *ieee1394_reass(struct ifnet *, struct mbuf *, uint16_t); static int ieee1394_output(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst, - struct rtentry *rt0) + struct rtentry *rt) { uint16_t etype = 0; struct mbuf *m; int s, hdrlen, error = 0; - struct rtentry *rt; struct mbuf *mcopy = NULL; struct ieee1394_hwaddr *hwdst, baddr; const struct ieee1394_hwaddr *myaddr; @@ -100,40 +99,6 @@ ieee1394_output(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst, if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) senderr(ENETDOWN); - if ((rt = rt0) != NULL) { - if ((rt->rt_flags & RTF_UP) == 0) { - if ((rt0 = rt = rtalloc1(dst, 1)) != NULL) { - rt->rt_refcnt--; - if (rt->rt_ifp != ifp) - return (*rt->rt_ifp->if_output) - (ifp, m0, dst, rt); - } else - senderr(EHOSTUNREACH); - } - if (rt->rt_flags & RTF_GATEWAY) { - if (rt->rt_gwroute == NULL) - goto lookup; - if (((rt = rt->rt_gwroute)->rt_flags & RTF_UP) == 0) { - rtfree(rt); - rt = rt0; - lookup: - rt->rt_gwroute = rtalloc1(rt->rt_gateway, 1); - if ((rt = rt->rt_gwroute) == NULL) - senderr(EHOSTUNREACH); - /* the "G" test below also prevents rt == rt0 */ - if ((rt->rt_flags & RTF_GATEWAY) || - (rt->rt_ifp != ifp)) { - rt->rt_refcnt--; - rt0->rt_gwroute = NULL; - senderr(EHOSTUNREACH); - } - } - } - if (rt->rt_flags & RTF_REJECT) - if (rt->rt_rmx.rmx_expire == 0 || - time_second < rt->rt_rmx.rmx_expire) - senderr(rt == rt0 ? EHOSTDOWN : EHOSTUNREACH); - } /* * If the queueing discipline needs packet classification, diff --git a/sys/net/if_mpls.c b/sys/net/if_mpls.c index 4144a646107c..cacd7e6845fd 100644 --- a/sys/net/if_mpls.c +++ b/sys/net/if_mpls.c @@ -1,4 +1,4 @@ -/* $NetBSD: if_mpls.c,v 1.16 2014/07/17 10:46:57 bouyer Exp $ */ +/* $NetBSD: if_mpls.c,v 1.17 2015/06/04 09:19:59 ozaki-r Exp $ */ /* * Copyright (c) 2010 The NetBSD Foundation, Inc. @@ -30,7 +30,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: if_mpls.c,v 1.16 2014/07/17 10:46:57 bouyer Exp $"); +__KERNEL_RCSID(0, "$NetBSD: if_mpls.c,v 1.17 2015/06/04 09:19:59 ozaki-r Exp $"); #include "opt_inet.h" #include "opt_mpls.h" @@ -53,6 +53,7 @@ __KERNEL_RCSID(0, "$NetBSD: if_mpls.c,v 1.16 2014/07/17 10:46:57 bouyer Exp $"); #include #include #include +#include #endif #ifdef INET6 @@ -469,9 +470,13 @@ mpls_send_frame(struct mbuf *m, struct ifnet *ifp, struct rtentry *rt) case IFT_ETHER: case IFT_TUNNEL: case IFT_LOOP: +#ifdef INET + ret = ip_hresolv_output(ifp, m, rt->rt_gateway, rt); +#else KERNEL_LOCK(1, NULL); ret = (*ifp->if_output)(ifp, m, rt->rt_gateway, rt); KERNEL_UNLOCK_ONE(NULL); +#endif return ret; break; default: diff --git a/sys/netinet/in_offload.c b/sys/netinet/in_offload.c index 899222c9f4d0..c793c5bad2d5 100644 --- a/sys/netinet/in_offload.c +++ b/sys/netinet/in_offload.c @@ -1,4 +1,4 @@ -/* $NetBSD: in_offload.c,v 1.5 2011/04/25 22:11:31 yamt Exp $ */ +/* $NetBSD: in_offload.c,v 1.6 2015/06/04 09:20:00 ozaki-r Exp $ */ /*- * Copyright (c)2005, 2006 YAMAMOTO Takashi, @@ -27,7 +27,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: in_offload.c,v 1.5 2011/04/25 22:11:31 yamt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: in_offload.c,v 1.6 2015/06/04 09:20:00 ozaki-r Exp $"); #include #include @@ -37,6 +37,7 @@ __KERNEL_RCSID(0, "$NetBSD: in_offload.c,v 1.5 2011/04/25 22:11:31 yamt Exp $"); #include #include #include +#include #include #include @@ -53,12 +54,8 @@ ip_tso_output_callback(void *vp, struct mbuf *m) { struct ip_tso_output_args *args = vp; struct ifnet *ifp = args->ifp; - int error; - KERNEL_LOCK(1, NULL); - error = (*ifp->if_output)(ifp, m, args->sa, args->rt); - KERNEL_UNLOCK_ONE(NULL); - return error; + return ip_hresolv_output(ifp, m, args->sa, args->rt); } int diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index 36f9ad1349a0..eef7816143a3 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -1,4 +1,4 @@ -/* $NetBSD: ip_output.c,v 1.238 2015/04/27 10:14:44 ozaki-r Exp $ */ +/* $NetBSD: ip_output.c,v 1.239 2015/06/04 09:20:00 ozaki-r Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. @@ -91,12 +91,13 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ip_output.c,v 1.238 2015/04/27 10:14:44 ozaki-r Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ip_output.c,v 1.239 2015/06/04 09:20:00 ozaki-r Exp $"); #include "opt_inet.h" #include "opt_ipsec.h" #include "opt_mrouting.h" #include "opt_net_mpsafe.h" +#include "opt_mpls.h" #include #include @@ -111,6 +112,7 @@ __KERNEL_RCSID(0, "$NetBSD: ip_output.c,v 1.238 2015/04/27 10:14:44 ozaki-r Exp #include #include +#include #include #include @@ -138,6 +140,11 @@ __KERNEL_RCSID(0, "$NetBSD: ip_output.c,v 1.238 2015/04/27 10:14:44 ozaki-r Exp #include #endif +#ifdef MPLS +#include +#include +#endif + static int ip_pcbopts(struct inpcb *, const struct sockopt *); static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *); static struct ifnet *ip_multicast_if(struct in_addr *, int *); @@ -148,6 +155,157 @@ extern pfil_head_t *inet_pfil_hook; /* XXX */ int ip_do_loopback_cksum = 0; +static bool +ip_hresolv_needed(const struct ifnet * const ifp) +{ + switch (ifp->if_type) { + case IFT_ARCNET: + case IFT_ATM: + case IFT_ECONET: + case IFT_ETHER: + case IFT_FDDI: + case IFT_HIPPI: + case IFT_IEEE1394: + return true; + default: + return false; + } +} + +static int +klock_if_output(struct ifnet * const ifp, struct mbuf * const m, + const struct sockaddr * const dst, struct rtentry *rt) +{ + int error; + +#ifndef NET_MPSAFE + KERNEL_LOCK(1, NULL); +#endif + + error = (*ifp->if_output)(ifp, m, dst, rt); + +#ifndef NET_MPSAFE + KERNEL_UNLOCK_ONE(NULL); +#endif + + return error; +} + +/* + * Send an IP packet to a host. + * + * If necessary, resolve the arbitrary IP route, rt0, to an IP host route before + * calling ifp's output routine. + */ +int +ip_hresolv_output(struct ifnet * const ifp0, struct mbuf * const m, + const struct sockaddr * const dst, struct rtentry *rt0) +{ + int error = 0; + struct ifnet *ifp = ifp0; + struct rtentry *rt; + +retry: + if (!ip_hresolv_needed(ifp)) { + rt = rt0; + goto out; + } + + if (rt0 == NULL) { + rt = NULL; + goto out; + } + + rt = rt0; + + /* + * The following block is highly questionable. How did we get here + * with a !RTF_UP route? Does rtalloc1() always return an RTF_UP + * route? + */ + if ((rt->rt_flags & RTF_UP) == 0) { + rt = rtalloc1(dst, 1); + if (rt == NULL) { + error = EHOSTUNREACH; + goto bad; + } + rt0 = rt; + rt->rt_refcnt--; + if (rt->rt_ifp != ifp) { + ifp = rt->rt_ifp; + rt0 = rt; + goto retry; + } + } + + if ((rt->rt_flags & RTF_GATEWAY) == 0) + goto out; + + rt = rt->rt_gwroute; + if (rt == NULL || (rt->rt_flags & RTF_UP) == 0) { + if (rt != NULL) { + rtfree(rt); + rt = rt0; + } + if (rt == NULL) { + error = EHOSTUNREACH; + goto bad; + } + rt = rt->rt_gwroute = rtalloc1(rt->rt_gateway, 1); + if (rt == NULL) { + error = EHOSTUNREACH; + goto bad; + } + /* the "G" test below also prevents rt == rt0 */ + if ((rt->rt_flags & RTF_GATEWAY) != 0 || rt->rt_ifp != ifp) { + rt->rt_refcnt--; + rt0->rt_gwroute = NULL; + error = EHOSTUNREACH; + goto bad; + } + } + if ((rt->rt_flags & RTF_REJECT) != 0) { + if (rt->rt_rmx.rmx_expire == 0 || + time_second < rt->rt_rmx.rmx_expire) { + error = (rt == rt0) ? EHOSTDOWN : EHOSTUNREACH; + goto bad; + } + } + +out: +#ifdef MPLS + if (rt0 != NULL && rt_gettag(rt0) != NULL && + rt_gettag(rt0)->sa_family == AF_MPLS && + (m->m_flags & (M_MCAST | M_BCAST)) == 0 && + ifp->if_type == IFT_ETHER) { + union mpls_shim msh; + msh.s_addr = MPLS_GETSADDR(rt0); + if (msh.shim.label != MPLS_LABEL_IMPLNULL) { + struct m_tag *mtag; + /* + * XXX tentative solution to tell ether_output + * it's MPLS. Need some more efficient solution. + */ + mtag = m_tag_get(PACKET_TAG_MPLS, + sizeof(int) /* dummy */, + M_NOWAIT); + if (mtag == NULL) { + error = ENOMEM; + goto bad; + } + m_tag_prepend(m, mtag); + } + } +#endif + + return klock_if_output(ifp, m, dst, rt); +bad: + if (m != NULL) + m_freem(m); + + return error; +} + /* * IP output. The packet in mbuf chain m contains a skeletal IP * header (with len, off, ttl, proto, tos, src, dst). @@ -560,13 +718,7 @@ sendit: if (__predict_true( (m->m_pkthdr.csum_flags & M_CSUM_TSOv4) == 0 || (ifp->if_capenable & IFCAP_TSOv4) != 0)) { -#ifndef NET_MPSAFE - KERNEL_LOCK(1, NULL); -#endif - error = (*ifp->if_output)(ifp, m, sa, rt); -#ifndef NET_MPSAFE - KERNEL_UNLOCK_ONE(NULL); -#endif + error = ip_hresolv_output(ifp, m, sa, rt); } else { error = ip_tso_output(ifp, m, sa, rt); } @@ -634,15 +786,9 @@ sendit: } else { KASSERT((m->m_pkthdr.csum_flags & (M_CSUM_UDPv4 | M_CSUM_TCPv4)) == 0); -#ifndef NET_MPSAFE - KERNEL_LOCK(1, NULL); -#endif - error = (*ifp->if_output)(ifp, m, + error = ip_hresolv_output(ifp, m, (m->m_flags & M_MCAST) ? sintocsa(rdst) : sintocsa(dst), rt); -#ifndef NET_MPSAFE - KERNEL_UNLOCK_ONE(NULL); -#endif } } if (error == 0) { diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h index 00436af1244f..d5ff4f7423fd 100644 --- a/sys/netinet/ip_var.h +++ b/sys/netinet/ip_var.h @@ -1,4 +1,4 @@ -/* $NetBSD: ip_var.h,v 1.107 2014/10/11 21:12:51 christos Exp $ */ +/* $NetBSD: ip_var.h,v 1.108 2015/06/04 09:20:00 ozaki-r Exp $ */ /* * Copyright (c) 1982, 1986, 1993 @@ -237,6 +237,9 @@ int rip_usrreq(struct socket *, int ip_setmoptions(struct ip_moptions **, const struct sockopt *sopt); int ip_getmoptions(struct ip_moptions *, struct sockopt *sopt); +int ip_hresolv_output(struct ifnet * const, struct mbuf * const, + const struct sockaddr * const, struct rtentry *); + /* IP Flow interface. */ void ipflow_init(void); void ipflow_poolinit(void); diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h index 344d5ae06355..17c5dec70d0c 100644 --- a/sys/sys/mbuf.h +++ b/sys/sys/mbuf.h @@ -1,4 +1,4 @@ -/* $NetBSD: mbuf.h,v 1.157 2015/03/02 16:16:08 christos Exp $ */ +/* $NetBSD: mbuf.h,v 1.158 2015/06/04 09:19:59 ozaki-r Exp $ */ /*- * Copyright (c) 1996, 1997, 1999, 2001, 2007 The NetBSD Foundation, Inc. @@ -913,6 +913,8 @@ struct m_tag *m_tag_next(struct mbuf *, struct m_tag *); * loop detection/recovery */ +#define PACKET_TAG_MPLS 29 /* Indicate it's for MPLS */ + /* * Return the number of bytes in the mbuf chain, m. */