From 3e7ae517e032481984c0726162a014645b623b8b Mon Sep 17 00:00:00 2001 From: itojun Date: Sun, 26 May 2002 16:05:43 +0000 Subject: [PATCH] path MTU discovery blackhole detection. PR 12790 (sorry for not committing it for a long time) --- sys/netinet/tcp_output.c | 15 ++++++++------- sys/netinet/tcp_subr.c | 11 +++++++---- sys/netinet/tcp_timer.c | 20 +++++++++++--------- sys/netinet/tcp_var.h | 6 +++++- usr.bin/netstat/inet.c | 5 +++-- 5 files changed, 34 insertions(+), 23 deletions(-) diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index 13ec7b3a9705..0ab01ebc6f34 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -1,4 +1,4 @@ -/* $NetBSD: tcp_output.c,v 1.79 2002/04/27 01:47:58 thorpej Exp $ */ +/* $NetBSD: tcp_output.c,v 1.80 2002/05/26 16:05:43 itojun Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. @@ -142,7 +142,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: tcp_output.c,v 1.79 2002/04/27 01:47:58 thorpej Exp $"); +__KERNEL_RCSID(0, "$NetBSD: tcp_output.c,v 1.80 2002/05/26 16:05:43 itojun Exp $"); #include "opt_inet.h" #include "opt_ipsec.h" @@ -271,12 +271,12 @@ tcp_segsize(struct tcpcb *tp, int *txsegsizep, int *rxsegsizep) ifp = rt->rt_ifp; size = tcp_mssdflt; - if (rt->rt_rmx.rmx_mtu != 0) + if (tp->t_mtudisc && rt->rt_rmx.rmx_mtu != 0) size = rt->rt_rmx.rmx_mtu - iphlen - sizeof(struct tcphdr); else if (ifp->if_flags & IFF_LOOPBACK) size = ifp->if_mtu - iphlen - sizeof(struct tcphdr); #ifdef INET - else if (inp && ip_mtudisc) + else if (inp && tp->t_mtudisc) size = ifp->if_mtu - iphlen - sizeof(struct tcphdr); else if (inp && in_localaddr(inp->inp_faddr)) size = ifp->if_mtu - iphlen - sizeof(struct tcphdr); @@ -288,7 +288,7 @@ tcp_segsize(struct tcpcb *tp, int *txsegsizep, int *rxsegsizep) /* mapped addr case */ struct in_addr d; bcopy(&in6p->in6p_faddr.s6_addr32[3], &d, sizeof(d)); - if (ip_mtudisc || in_localaddr(d)) + if (tp->t_mtudisc || in_localaddr(d)) size = ifp->if_mtu - iphlen - sizeof(struct tcphdr); } else #endif @@ -297,7 +297,8 @@ tcp_segsize(struct tcpcb *tp, int *txsegsizep, int *rxsegsizep) * for IPv6, path MTU discovery is always turned on, * or the node must use packet size <= 1280. */ - size = ifp->if_mtu - iphlen - sizeof(struct tcphdr); + size = tp->t_mtudisc ? ifp->if_mtu : IPV6_MMTU; + size -= (iphlen + sizeof(struct tcphdr)); } } #endif @@ -1121,7 +1122,7 @@ send: else opts = NULL; error = ip_output(m, opts, ro, - (ip_mtudisc ? IP_MTUDISC : 0) | + (tp->t_mtudisc ? IP_MTUDISC : 0) | (so->so_options & SO_DONTROUTE), 0); break; diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index c8e96a142dc7..8cdf3ea7767e 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -1,4 +1,4 @@ -/* $NetBSD: tcp_subr.c,v 1.127 2002/05/12 20:33:50 matt Exp $ */ +/* $NetBSD: tcp_subr.c,v 1.128 2002/05/26 16:05:44 itojun Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. @@ -102,7 +102,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: tcp_subr.c,v 1.127 2002/05/12 20:33:50 matt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: tcp_subr.c,v 1.128 2002/05/26 16:05:44 itojun Exp $"); #include "opt_inet.h" #include "opt_ipsec.h" @@ -844,7 +844,7 @@ tcp_respond(tp, template, m, th0, ack, seq, flags) #ifdef INET case AF_INET: error = ip_output(m, NULL, ro, - (ip_mtudisc ? IP_MTUDISC : 0), + (tp && tp->t_mtudisc ? IP_MTUDISC : 0), NULL); break; #endif @@ -915,10 +915,13 @@ tcp_newtcpcb(family, aux) switch (family) { case PF_INET: tp->t_inpcb = (struct inpcb *)aux; + tp->t_mtudisc = ip_mtudisc; break; #ifdef INET6 case PF_INET6: tp->t_in6pcb = (struct in6pcb *)aux; + /* for IPv6, always try to run path MTU discovery */ + tp->t_mtudisc = 1; break; #endif } @@ -1417,7 +1420,7 @@ tcp_ctlinput(cmd, sa, v) notify = tcp_quench; else if (PRC_IS_REDIRECT(cmd)) notify = in_rtchange, ip = 0; - else if (cmd == PRC_MSGSIZE && ip_mtudisc && ip && ip->ip_v == 4) { + else if (cmd == PRC_MSGSIZE && ip && ip->ip_v == 4) { /* * Check to see if we have a valid TCP connection * corresponding to the address in the ICMP message diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c index 96b638b02bb2..f57e40e8770f 100644 --- a/sys/netinet/tcp_timer.c +++ b/sys/netinet/tcp_timer.c @@ -1,4 +1,4 @@ -/* $NetBSD: tcp_timer.c,v 1.57 2001/11/13 00:32:42 lukem Exp $ */ +/* $NetBSD: tcp_timer.c,v 1.58 2002/05/26 16:05:45 itojun Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. @@ -102,7 +102,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: tcp_timer.c,v 1.57 2001/11/13 00:32:42 lukem Exp $"); +__KERNEL_RCSID(0, "$NetBSD: tcp_timer.c,v 1.58 2002/05/26 16:05:45 itojun Exp $"); #include "opt_inet.h" #include "opt_tcp_debug.h" @@ -302,7 +302,7 @@ tcp_timer_rexmt(void *arg) TCPT_RANGESET(tp->t_rxtcur, rto * tcp_backoff[tp->t_rxtshift], tp->t_rttmin, TCPTV_REXMTMAX); TCP_TIMER_ARM(tp, TCPT_REXMT, tp->t_rxtcur); -#if 0 + /* * If we are losing and we are trying path MTU discovery, * try turning it off. This will avoid black holes in @@ -311,21 +311,23 @@ tcp_timer_rexmt(void *arg) * lots more sophisticated searching to find the right * value here... */ - if (ip_mtudisc && tp->t_rxtshift > TCP_MAXRXTSHIFT / 6) { - struct rtentry *rt = NULL; + if (tp->t_mtudisc && tp->t_rxtshift > TCP_MAXRXTSHIFT / 6) { + tcpstat.tcps_pmtublackhole++; #ifdef INET + /* try turning PMTUD off */ if (tp->t_inpcb) - rt = in_pcbrtentry(tp->t_inpcb); + tp->t_mtudisc = 0; #endif #ifdef INET6 + /* try using IPv6 minimum MTU */ if (tp->t_in6pcb) - rt = in6_pcbrtentry(tp->t_in6pcb); + tp->t_mtudisc = 0; #endif - /* XXX: Black hole recovery code goes here */ + /* XXX: more sophisticated Black hole recovery code? */ } -#endif /* 0 */ + /* * If losing, let the lower level know and try for * a better route. Also, if we backed off this far, diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index f584bf9a070b..846984b061e6 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -1,4 +1,4 @@ -/* $NetBSD: tcp_var.h,v 1.90 2002/05/12 20:33:51 matt Exp $ */ +/* $NetBSD: tcp_var.h,v 1.91 2002/05/26 16:05:45 itojun Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. @@ -255,6 +255,9 @@ struct tcpcb { /* SACK stuff */ struct ipqehead timeq; /* time sequenced queue (for SACK) */ +/* path MTU discovery blackhole detection */ + int t_mtudisc; /* perform mtudisc for this tcb */ + /* pointer for syn cache entries*/ LIST_HEAD(, syn_cache) t_sc; /* list of entries by this tcb */ }; @@ -494,6 +497,7 @@ struct tcpstat { u_quad_t tcps_persistdrops; /* connections dropped in persist */ u_quad_t tcps_connsdrained; /* connections drained due to memory shortage */ + u_quad_t tcps_pmtublackhole; /* PMTUD blackhole detected */ u_quad_t tcps_sndtotal; /* total packets sent */ u_quad_t tcps_sndpack; /* data packets sent */ diff --git a/usr.bin/netstat/inet.c b/usr.bin/netstat/inet.c index d9e1c2c312c2..9327ad702c62 100644 --- a/usr.bin/netstat/inet.c +++ b/usr.bin/netstat/inet.c @@ -1,4 +1,4 @@ -/* $NetBSD: inet.c,v 1.51 2002/02/27 02:33:51 lukem Exp $ */ +/* $NetBSD: inet.c,v 1.52 2002/05/26 16:05:45 itojun Exp $ */ /* * Copyright (c) 1983, 1988, 1993 @@ -38,7 +38,7 @@ #if 0 static char sccsid[] = "from: @(#)inet.c 8.4 (Berkeley) 4/20/94"; #else -__RCSID("$NetBSD: inet.c,v 1.51 2002/02/27 02:33:51 lukem Exp $"); +__RCSID("$NetBSD: inet.c,v 1.52 2002/05/26 16:05:45 itojun Exp $"); #endif #endif /* not lint */ @@ -280,6 +280,7 @@ tcp_stats(off, name) ps(tcps_noport, "\t%llu dropped due to no socket\n"); p(tcps_connsdrained, "\t%llu connection%s drained due to memory " "shortage\n"); + p(tcps_pmtublackhole, "\t%llu PMTUD blackhole%s detected\n"); p(tcps_badsyn, "\t%llu bad connection attempt%s\n"); ps(tcps_sc_added, "\t%llu SYN cache entries added\n");