2009-09-16 19:23:04 +04:00
|
|
|
/* $NetBSD: tcp_subr.c,v 1.238 2009/09/16 15:23:05 pooka Exp $ */
|
1999-07-01 12:12:45 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
|
|
|
|
* All rights reserved.
|
2002-06-09 20:33:36 +04:00
|
|
|
*
|
1999-07-01 12:12:45 +04:00
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* 3. Neither the name of the project nor the names of its contributors
|
|
|
|
* may be used to endorse or promote products derived from this software
|
|
|
|
* without specific prior written permission.
|
2002-06-09 20:33:36 +04:00
|
|
|
*
|
1999-07-01 12:12:45 +04:00
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*/
|
1998-02-19 05:36:42 +03:00
|
|
|
|
|
|
|
/*-
|
2008-04-24 15:38:36 +04:00
|
|
|
* Copyright (c) 1997, 1998, 2000, 2001, 2008 The NetBSD Foundation, Inc.
|
1998-02-19 05:36:42 +03:00
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* This code is derived from software contributed to The NetBSD Foundation
|
|
|
|
* by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation
|
|
|
|
* Facility, NASA Ames Research Center.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
|
|
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
|
|
|
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
|
|
|
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
1994-06-29 10:29:24 +04:00
|
|
|
|
1993-03-21 12:45:37 +03:00
|
|
|
/*
|
1998-01-05 13:31:44 +03:00
|
|
|
* Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
|
1994-05-13 10:02:48 +04:00
|
|
|
* The Regents of the University of California. All rights reserved.
|
1993-03-21 12:45:37 +03:00
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
2003-08-07 20:26:28 +04:00
|
|
|
* 3. Neither the name of the University nor the names of its contributors
|
1993-03-21 12:45:37 +03:00
|
|
|
* may be used to endorse or promote products derived from this software
|
|
|
|
* without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*
|
1998-01-05 13:31:44 +03:00
|
|
|
* @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95
|
1993-03-21 12:45:37 +03:00
|
|
|
*/
|
|
|
|
|
2001-11-13 03:32:34 +03:00
|
|
|
#include <sys/cdefs.h>
|
2009-09-16 19:23:04 +04:00
|
|
|
__KERNEL_RCSID(0, "$NetBSD: tcp_subr.c,v 1.238 2009/09/16 15:23:05 pooka Exp $");
|
2001-11-13 03:32:34 +03:00
|
|
|
|
1999-07-01 12:12:45 +04:00
|
|
|
#include "opt_inet.h"
|
1999-07-10 02:57:15 +04:00
|
|
|
#include "opt_ipsec.h"
|
1998-01-12 06:00:42 +03:00
|
|
|
#include "opt_tcp_compat_42.h"
|
2001-06-02 20:17:09 +04:00
|
|
|
#include "opt_inet_csum.h"
|
2003-06-23 15:00:59 +04:00
|
|
|
#include "opt_mbuftrace.h"
|
1997-10-13 04:46:08 +04:00
|
|
|
#include "rnd.h"
|
|
|
|
|
1993-12-18 03:40:47 +03:00
|
|
|
#include <sys/param.h>
|
1994-05-13 10:02:48 +04:00
|
|
|
#include <sys/proc.h>
|
1993-12-18 03:40:47 +03:00
|
|
|
#include <sys/systm.h>
|
|
|
|
#include <sys/malloc.h>
|
|
|
|
#include <sys/mbuf.h>
|
|
|
|
#include <sys/socket.h>
|
|
|
|
#include <sys/socketvar.h>
|
|
|
|
#include <sys/protosw.h>
|
|
|
|
#include <sys/errno.h>
|
1997-07-24 01:26:40 +04:00
|
|
|
#include <sys/kernel.h>
|
1998-08-02 04:36:19 +04:00
|
|
|
#include <sys/pool.h>
|
1997-10-13 04:46:08 +04:00
|
|
|
#if NRND > 0
|
Two changes, designed to make us even more resilient against TCP
ISS attacks (which we already fend off quite well).
1. First-cut implementation of RFC1948, Steve Bellovin's cryptographic
hash method of generating TCP ISS values. Note, this code is experimental
and disabled by default (experimental enough that I don't export the
variable via sysctl yet, either). There are a couple of issues I'd
like to discuss with Steve, so this code should only be used by people
who really know what they're doing.
2. Per a recent thread on Bugtraq, it's possible to determine a system's
uptime by snooping the RFC1323 TCP timestamp options sent by a host; in
4.4BSD, timestamps are created by incrementing the tcp_now variable
at 2 Hz; there's even a company out there that uses this to determine
web server uptime. According to Newsham's paper "The Problem With
Random Increments", while NetBSD's TCP ISS generation method is much
better than the "random increment" method used by FreeBSD and OpenBSD,
it is still theoretically possible to mount an attack against NetBSD's
method if the attacker knows how many times the tcp_iss_seq variable
has been incremented. By not leaking uptime information, we can make
that much harder to determine. So, we avoid the leak by giving each
TCP connection a timebase of 0.
2001-03-20 23:07:51 +03:00
|
|
|
#include <sys/md5.h>
|
1997-10-10 05:51:07 +04:00
|
|
|
#include <sys/rnd.h>
|
1997-10-13 04:46:08 +04:00
|
|
|
#endif
|
1993-03-21 12:45:37 +03:00
|
|
|
|
1993-12-18 03:40:47 +03:00
|
|
|
#include <net/route.h>
|
|
|
|
#include <net/if.h>
|
1993-03-21 12:45:37 +03:00
|
|
|
|
1993-12-18 03:40:47 +03:00
|
|
|
#include <netinet/in.h>
|
|
|
|
#include <netinet/in_systm.h>
|
|
|
|
#include <netinet/ip.h>
|
|
|
|
#include <netinet/in_pcb.h>
|
|
|
|
#include <netinet/ip_var.h>
|
|
|
|
#include <netinet/ip_icmp.h>
|
1999-07-01 12:12:45 +04:00
|
|
|
|
|
|
|
#ifdef INET6
|
|
|
|
#ifndef INET
|
|
|
|
#include <netinet/in.h>
|
|
|
|
#endif
|
|
|
|
#include <netinet/ip6.h>
|
|
|
|
#include <netinet6/in6_pcb.h>
|
|
|
|
#include <netinet6/ip6_var.h>
|
1999-07-22 16:56:56 +04:00
|
|
|
#include <netinet6/in6_var.h>
|
1999-12-08 19:22:20 +03:00
|
|
|
#include <netinet6/ip6protosw.h>
|
2000-10-19 01:14:12 +04:00
|
|
|
#include <netinet/icmp6.h>
|
2002-05-29 11:53:39 +04:00
|
|
|
#include <netinet6/nd6.h>
|
1999-07-01 12:12:45 +04:00
|
|
|
#endif
|
|
|
|
|
1993-12-18 03:40:47 +03:00
|
|
|
#include <netinet/tcp.h>
|
|
|
|
#include <netinet/tcp_fsm.h>
|
|
|
|
#include <netinet/tcp_seq.h>
|
|
|
|
#include <netinet/tcp_timer.h>
|
|
|
|
#include <netinet/tcp_var.h>
|
2008-04-12 09:58:22 +04:00
|
|
|
#include <netinet/tcp_private.h>
|
2006-10-09 20:27:07 +04:00
|
|
|
#include <netinet/tcp_congctl.h>
|
1993-12-18 03:40:47 +03:00
|
|
|
#include <netinet/tcpip.h>
|
1993-03-21 12:45:37 +03:00
|
|
|
|
1999-07-01 12:12:45 +04:00
|
|
|
#ifdef IPSEC
|
|
|
|
#include <netinet6/ipsec.h>
|
2004-04-26 07:54:28 +04:00
|
|
|
#include <netkey/key.h>
|
1999-07-01 12:12:45 +04:00
|
|
|
#endif /*IPSEC*/
|
|
|
|
|
2003-08-15 07:42:00 +04:00
|
|
|
#ifdef FAST_IPSEC
|
|
|
|
#include <netipsec/ipsec.h>
|
Initial commit of a port of the FreeBSD implementation of RFC 2385
(MD5 signatures for TCP, as used with BGP). Credit for original
FreeBSD code goes to Bruce M. Simpson, with FreeBSD sponsorship
credited to sentex.net. Shortening of the setsockopt() name
attributed to Vincent Jardin.
This commit is a minimal, working version of the FreeBSD code, as
MFC'ed to FreeBSD-4. It has received minimal testing with a ttcp
modified to set the TCP-MD5 option; BMS's additions to tcpdump-current
(tcpdump -M) confirm that the MD5 signatures are correct. Committed
as-is for further testing between a NetBSD BGP speaker (e.g., quagga)
and industry-standard BGP speakers (e.g., Cisco, Juniper).
NOTE: This version has two potential flaws. First, I do see any code
that verifies recieved TCP-MD5 signatures. Second, the TCP-MD5
options are internally padded and assumed to be 32-bit aligned. A more
space-efficient scheme is to pack all TCP options densely (and
possibly unaligned) into the TCP header ; then do one final padding to
a 4-byte boundary. Pre-existing comments note that accounting for
TCP-option space when we add SACK is yet to be done. For now, I'm
punting on that; we can solve it properly, in a way that will handle
SACK blocks, as a separate exercise.
In case a pullup to NetBSD-2 is requested, this adds sys/netipsec/xform_tcp.c
,and modifies:
sys/net/pfkeyv2.h,v 1.15
sys/netinet/files.netinet,v 1.5
sys/netinet/ip.h,v 1.25
sys/netinet/tcp.h,v 1.15
sys/netinet/tcp_input.c,v 1.200
sys/netinet/tcp_output.c,v 1.109
sys/netinet/tcp_subr.c,v 1.165
sys/netinet/tcp_usrreq.c,v 1.89
sys/netinet/tcp_var.h,v 1.109
sys/netipsec/files.netipsec,v 1.3
sys/netipsec/ipsec.c,v 1.11
sys/netipsec/ipsec.h,v 1.7
sys/netipsec/key.c,v 1.11
share/man/man4/tcp.4,v 1.16
lib/libipsec/pfkey.c,v 1.20
lib/libipsec/pfkey_dump.c,v 1.17
lib/libipsec/policy_token.l,v 1.8
sbin/setkey/parse.y,v 1.14
sbin/setkey/setkey.8,v 1.27
sbin/setkey/token.l,v 1.15
Note that the preceding two revisions to tcp.4 will be
required to cleanly apply this diff.
2004-04-26 02:25:03 +04:00
|
|
|
#include <netipsec/xform.h>
|
2003-08-15 07:42:00 +04:00
|
|
|
#ifdef INET6
|
|
|
|
#include <netipsec/ipsec6.h>
|
|
|
|
#endif
|
Initial commit of a port of the FreeBSD implementation of RFC 2385
(MD5 signatures for TCP, as used with BGP). Credit for original
FreeBSD code goes to Bruce M. Simpson, with FreeBSD sponsorship
credited to sentex.net. Shortening of the setsockopt() name
attributed to Vincent Jardin.
This commit is a minimal, working version of the FreeBSD code, as
MFC'ed to FreeBSD-4. It has received minimal testing with a ttcp
modified to set the TCP-MD5 option; BMS's additions to tcpdump-current
(tcpdump -M) confirm that the MD5 signatures are correct. Committed
as-is for further testing between a NetBSD BGP speaker (e.g., quagga)
and industry-standard BGP speakers (e.g., Cisco, Juniper).
NOTE: This version has two potential flaws. First, I do see any code
that verifies recieved TCP-MD5 signatures. Second, the TCP-MD5
options are internally padded and assumed to be 32-bit aligned. A more
space-efficient scheme is to pack all TCP options densely (and
possibly unaligned) into the TCP header ; then do one final padding to
a 4-byte boundary. Pre-existing comments note that accounting for
TCP-option space when we add SACK is yet to be done. For now, I'm
punting on that; we can solve it properly, in a way that will handle
SACK blocks, as a separate exercise.
In case a pullup to NetBSD-2 is requested, this adds sys/netipsec/xform_tcp.c
,and modifies:
sys/net/pfkeyv2.h,v 1.15
sys/netinet/files.netinet,v 1.5
sys/netinet/ip.h,v 1.25
sys/netinet/tcp.h,v 1.15
sys/netinet/tcp_input.c,v 1.200
sys/netinet/tcp_output.c,v 1.109
sys/netinet/tcp_subr.c,v 1.165
sys/netinet/tcp_usrreq.c,v 1.89
sys/netinet/tcp_var.h,v 1.109
sys/netipsec/files.netipsec,v 1.3
sys/netipsec/ipsec.c,v 1.11
sys/netipsec/ipsec.h,v 1.7
sys/netipsec/key.c,v 1.11
share/man/man4/tcp.4,v 1.16
lib/libipsec/pfkey.c,v 1.20
lib/libipsec/pfkey_dump.c,v 1.17
lib/libipsec/policy_token.l,v 1.8
sbin/setkey/parse.y,v 1.14
sbin/setkey/setkey.8,v 1.27
sbin/setkey/token.l,v 1.15
Note that the preceding two revisions to tcp.4 will be
required to cleanly apply this diff.
2004-04-26 02:25:03 +04:00
|
|
|
#include <netipsec/key.h>
|
2003-08-15 07:42:00 +04:00
|
|
|
#endif /* FAST_IPSEC*/
|
|
|
|
|
|
|
|
|
2002-05-13 00:33:50 +04:00
|
|
|
struct inpcbtable tcbtable; /* head of queue of active tcpcb's */
|
|
|
|
u_int32_t tcp_now; /* for RFC 1323 timestamps */
|
|
|
|
|
2008-04-12 09:58:22 +04:00
|
|
|
percpu_t *tcpstat_percpu;
|
|
|
|
|
1993-03-21 12:45:37 +03:00
|
|
|
/* patchable/settable parameters for tcp */
|
|
|
|
int tcp_mssdflt = TCP_MSS;
|
2007-08-02 06:42:40 +04:00
|
|
|
int tcp_minmss = TCP_MINMSS;
|
1993-03-21 12:45:37 +03:00
|
|
|
int tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ;
|
1998-04-30 00:43:29 +04:00
|
|
|
int tcp_do_rfc1323 = 1; /* window scaling / timestamps (obsolete) */
|
Two changes, designed to make us even more resilient against TCP
ISS attacks (which we already fend off quite well).
1. First-cut implementation of RFC1948, Steve Bellovin's cryptographic
hash method of generating TCP ISS values. Note, this code is experimental
and disabled by default (experimental enough that I don't export the
variable via sysctl yet, either). There are a couple of issues I'd
like to discuss with Steve, so this code should only be used by people
who really know what they're doing.
2. Per a recent thread on Bugtraq, it's possible to determine a system's
uptime by snooping the RFC1323 TCP timestamp options sent by a host; in
4.4BSD, timestamps are created by incrementing the tcp_now variable
at 2 Hz; there's even a company out there that uses this to determine
web server uptime. According to Newsham's paper "The Problem With
Random Increments", while NetBSD's TCP ISS generation method is much
better than the "random increment" method used by FreeBSD and OpenBSD,
it is still theoretically possible to mount an attack against NetBSD's
method if the attacker knows how many times the tcp_iss_seq variable
has been incremented. By not leaking uptime information, we can make
that much harder to determine. So, we avoid the leak by giving each
TCP connection a timebase of 0.
2001-03-20 23:07:51 +03:00
|
|
|
#if NRND > 0
|
|
|
|
int tcp_do_rfc1948 = 0; /* ISS by cryptographic hash */
|
|
|
|
#endif
|
1998-04-30 00:43:29 +04:00
|
|
|
int tcp_do_sack = 1; /* selective acknowledgement */
|
|
|
|
int tcp_do_win_scale = 1; /* RFC1323 window scaling */
|
|
|
|
int tcp_do_timestamps = 1; /* RFC1323 timestamps */
|
1998-05-03 23:54:56 +04:00
|
|
|
int tcp_ack_on_push = 0; /* set to enable immediate ACK-on-PUSH */
|
2006-09-05 04:29:35 +04:00
|
|
|
int tcp_do_ecn = 0; /* Explicit Congestion Notification */
|
2003-07-03 12:28:16 +04:00
|
|
|
#ifndef TCP_INIT_WIN
|
2004-04-22 06:19:39 +04:00
|
|
|
#define TCP_INIT_WIN 0 /* initial slow start window */
|
2003-07-03 12:28:16 +04:00
|
|
|
#endif
|
|
|
|
#ifndef TCP_INIT_WIN_LOCAL
|
|
|
|
#define TCP_INIT_WIN_LOCAL 4 /* initial slow start window for local nets */
|
|
|
|
#endif
|
|
|
|
int tcp_init_win = TCP_INIT_WIN;
|
|
|
|
int tcp_init_win_local = TCP_INIT_WIN_LOCAL;
|
1998-04-14 01:18:19 +04:00
|
|
|
int tcp_mss_ifmtu = 0;
|
1998-04-29 09:16:46 +04:00
|
|
|
#ifdef TCP_COMPAT_42
|
|
|
|
int tcp_compat_42 = 1;
|
|
|
|
#else
|
|
|
|
int tcp_compat_42 = 0;
|
|
|
|
#endif
|
2000-10-18 11:21:10 +04:00
|
|
|
int tcp_rst_ppslim = 100; /* 100pps */
|
2004-04-20 20:52:12 +04:00
|
|
|
int tcp_ackdrop_ppslim = 100; /* 100pps */
|
2005-08-10 17:06:49 +04:00
|
|
|
int tcp_do_loopback_cksum = 0;
|
2006-10-19 15:40:51 +04:00
|
|
|
int tcp_do_abc = 1; /* RFC3465 Appropriate byte counting. */
|
|
|
|
int tcp_abc_aggressive = 1; /* 1: L=2*SMSS 0: L=1*SMSS */
|
2005-04-05 05:07:17 +04:00
|
|
|
int tcp_sack_tp_maxholes = 32;
|
|
|
|
int tcp_sack_globalmaxholes = 1024;
|
|
|
|
int tcp_sack_globalholes = 0;
|
2006-09-05 04:29:35 +04:00
|
|
|
int tcp_ecn_maxretries = 1;
|
2005-04-05 05:07:17 +04:00
|
|
|
|
2000-10-18 11:21:10 +04:00
|
|
|
/* tcb hash */
|
1996-01-31 06:49:23 +03:00
|
|
|
#ifndef TCBHASHSIZE
|
|
|
|
#define TCBHASHSIZE 128
|
|
|
|
#endif
|
|
|
|
int tcbhashsize = TCBHASHSIZE;
|
1993-03-21 12:45:37 +03:00
|
|
|
|
2000-10-18 11:21:10 +04:00
|
|
|
/* syn hash parameters */
|
|
|
|
#define TCP_SYN_HASH_SIZE 293
|
|
|
|
#define TCP_SYN_BUCKET_SIZE 35
|
|
|
|
int tcp_syn_cache_size = TCP_SYN_HASH_SIZE;
|
|
|
|
int tcp_syn_cache_limit = TCP_SYN_HASH_SIZE*TCP_SYN_BUCKET_SIZE;
|
|
|
|
int tcp_syn_bucket_limit = 3*TCP_SYN_BUCKET_SIZE;
|
|
|
|
struct syn_cache_head tcp_syn_cache[TCP_SYN_HASH_SIZE];
|
|
|
|
|
2005-02-03 00:41:55 +03:00
|
|
|
int tcp_freeq(struct tcpcb *);
|
1997-12-10 04:58:07 +03:00
|
|
|
|
2000-10-19 01:14:12 +04:00
|
|
|
#ifdef INET
|
2005-02-03 00:41:55 +03:00
|
|
|
void tcp_mtudisc_callback(struct in_addr);
|
2000-10-19 01:14:12 +04:00
|
|
|
#endif
|
|
|
|
#ifdef INET6
|
2005-02-03 00:41:55 +03:00
|
|
|
void tcp6_mtudisc_callback(struct in6_addr *);
|
2000-10-19 01:14:12 +04:00
|
|
|
#endif
|
2000-10-18 21:09:14 +04:00
|
|
|
|
2000-10-20 00:22:59 +04:00
|
|
|
#ifdef INET6
|
2005-02-03 00:41:55 +03:00
|
|
|
void tcp6_mtudisc(struct in6pcb *, int);
|
2000-10-18 21:09:14 +04:00
|
|
|
#endif
|
|
|
|
|
2008-10-13 23:44:21 +04:00
|
|
|
static struct pool tcpcb_pool;
|
1998-08-02 04:36:19 +04:00
|
|
|
|
2001-06-02 20:17:09 +04:00
|
|
|
#ifdef TCP_CSUM_COUNTERS
|
|
|
|
#include <sys/device.h>
|
|
|
|
|
2005-08-10 17:05:16 +04:00
|
|
|
#if defined(INET)
|
2001-06-02 20:17:09 +04:00
|
|
|
struct evcnt tcp_hwcsum_bad = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
|
|
|
|
NULL, "tcp", "hwcsum bad");
|
|
|
|
struct evcnt tcp_hwcsum_ok = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
|
|
|
|
NULL, "tcp", "hwcsum ok");
|
|
|
|
struct evcnt tcp_hwcsum_data = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
|
|
|
|
NULL, "tcp", "hwcsum data");
|
|
|
|
struct evcnt tcp_swcsum = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
|
|
|
|
NULL, "tcp", "swcsum");
|
2004-05-01 06:20:42 +04:00
|
|
|
|
|
|
|
EVCNT_ATTACH_STATIC(tcp_hwcsum_bad);
|
|
|
|
EVCNT_ATTACH_STATIC(tcp_hwcsum_ok);
|
|
|
|
EVCNT_ATTACH_STATIC(tcp_hwcsum_data);
|
|
|
|
EVCNT_ATTACH_STATIC(tcp_swcsum);
|
2005-08-10 17:05:16 +04:00
|
|
|
#endif /* defined(INET) */
|
|
|
|
|
|
|
|
#if defined(INET6)
|
|
|
|
struct evcnt tcp6_hwcsum_bad = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
|
|
|
|
NULL, "tcp6", "hwcsum bad");
|
|
|
|
struct evcnt tcp6_hwcsum_ok = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
|
|
|
|
NULL, "tcp6", "hwcsum ok");
|
|
|
|
struct evcnt tcp6_hwcsum_data = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
|
|
|
|
NULL, "tcp6", "hwcsum data");
|
|
|
|
struct evcnt tcp6_swcsum = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
|
|
|
|
NULL, "tcp6", "swcsum");
|
|
|
|
|
|
|
|
EVCNT_ATTACH_STATIC(tcp6_hwcsum_bad);
|
|
|
|
EVCNT_ATTACH_STATIC(tcp6_hwcsum_ok);
|
|
|
|
EVCNT_ATTACH_STATIC(tcp6_hwcsum_data);
|
|
|
|
EVCNT_ATTACH_STATIC(tcp6_swcsum);
|
|
|
|
#endif /* defined(INET6) */
|
2001-06-02 20:17:09 +04:00
|
|
|
#endif /* TCP_CSUM_COUNTERS */
|
|
|
|
|
2004-05-01 06:20:42 +04:00
|
|
|
|
2002-04-27 05:47:58 +04:00
|
|
|
#ifdef TCP_OUTPUT_COUNTERS
|
|
|
|
#include <sys/device.h>
|
|
|
|
|
|
|
|
struct evcnt tcp_output_bigheader = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
|
|
|
|
NULL, "tcp", "output big header");
|
2003-10-22 01:17:20 +04:00
|
|
|
struct evcnt tcp_output_predict_hit = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
|
|
|
|
NULL, "tcp", "output predict hit");
|
|
|
|
struct evcnt tcp_output_predict_miss = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
|
|
|
|
NULL, "tcp", "output predict miss");
|
2002-04-27 05:47:58 +04:00
|
|
|
struct evcnt tcp_output_copysmall = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
|
|
|
|
NULL, "tcp", "output copy small");
|
|
|
|
struct evcnt tcp_output_copybig = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
|
|
|
|
NULL, "tcp", "output copy big");
|
|
|
|
struct evcnt tcp_output_refbig = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
|
|
|
|
NULL, "tcp", "output reference big");
|
2004-05-01 06:20:42 +04:00
|
|
|
|
|
|
|
EVCNT_ATTACH_STATIC(tcp_output_bigheader);
|
|
|
|
EVCNT_ATTACH_STATIC(tcp_output_predict_hit);
|
|
|
|
EVCNT_ATTACH_STATIC(tcp_output_predict_miss);
|
|
|
|
EVCNT_ATTACH_STATIC(tcp_output_copysmall);
|
|
|
|
EVCNT_ATTACH_STATIC(tcp_output_copybig);
|
|
|
|
EVCNT_ATTACH_STATIC(tcp_output_refbig);
|
|
|
|
|
2002-04-27 05:47:58 +04:00
|
|
|
#endif /* TCP_OUTPUT_COUNTERS */
|
|
|
|
|
2002-05-07 06:59:38 +04:00
|
|
|
#ifdef TCP_REASS_COUNTERS
|
|
|
|
#include <sys/device.h>
|
|
|
|
|
|
|
|
struct evcnt tcp_reass_ = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
|
|
|
|
NULL, "tcp_reass", "calls");
|
|
|
|
struct evcnt tcp_reass_empty = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
|
|
|
|
&tcp_reass_, "tcp_reass", "insert into empty queue");
|
|
|
|
struct evcnt tcp_reass_iteration[8] = {
|
|
|
|
EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", ">7 iterations"),
|
|
|
|
EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "1 iteration"),
|
|
|
|
EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "2 iterations"),
|
|
|
|
EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "3 iterations"),
|
|
|
|
EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "4 iterations"),
|
|
|
|
EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "5 iterations"),
|
|
|
|
EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "6 iterations"),
|
|
|
|
EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "7 iterations"),
|
|
|
|
};
|
|
|
|
struct evcnt tcp_reass_prependfirst = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
|
|
|
|
&tcp_reass_, "tcp_reass", "prepend to first");
|
|
|
|
struct evcnt tcp_reass_prepend = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
|
|
|
|
&tcp_reass_, "tcp_reass", "prepend");
|
|
|
|
struct evcnt tcp_reass_insert = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
|
|
|
|
&tcp_reass_, "tcp_reass", "insert");
|
|
|
|
struct evcnt tcp_reass_inserttail = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
|
|
|
|
&tcp_reass_, "tcp_reass", "insert at tail");
|
|
|
|
struct evcnt tcp_reass_append = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
|
|
|
|
&tcp_reass_, "tcp_reass", "append");
|
|
|
|
struct evcnt tcp_reass_appendtail = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
|
|
|
|
&tcp_reass_, "tcp_reass", "append to tail fragment");
|
|
|
|
struct evcnt tcp_reass_overlaptail = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
|
|
|
|
&tcp_reass_, "tcp_reass", "overlap at end");
|
|
|
|
struct evcnt tcp_reass_overlapfront = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
|
|
|
|
&tcp_reass_, "tcp_reass", "overlap at start");
|
|
|
|
struct evcnt tcp_reass_segdup = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
|
|
|
|
&tcp_reass_, "tcp_reass", "duplicate segment");
|
|
|
|
struct evcnt tcp_reass_fragdup = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
|
|
|
|
&tcp_reass_, "tcp_reass", "duplicate fragment");
|
|
|
|
|
2004-05-01 06:20:42 +04:00
|
|
|
EVCNT_ATTACH_STATIC(tcp_reass_);
|
|
|
|
EVCNT_ATTACH_STATIC(tcp_reass_empty);
|
|
|
|
EVCNT_ATTACH_STATIC2(tcp_reass_iteration, 0);
|
|
|
|
EVCNT_ATTACH_STATIC2(tcp_reass_iteration, 1);
|
|
|
|
EVCNT_ATTACH_STATIC2(tcp_reass_iteration, 2);
|
|
|
|
EVCNT_ATTACH_STATIC2(tcp_reass_iteration, 3);
|
|
|
|
EVCNT_ATTACH_STATIC2(tcp_reass_iteration, 4);
|
|
|
|
EVCNT_ATTACH_STATIC2(tcp_reass_iteration, 5);
|
|
|
|
EVCNT_ATTACH_STATIC2(tcp_reass_iteration, 6);
|
|
|
|
EVCNT_ATTACH_STATIC2(tcp_reass_iteration, 7);
|
|
|
|
EVCNT_ATTACH_STATIC(tcp_reass_prependfirst);
|
|
|
|
EVCNT_ATTACH_STATIC(tcp_reass_prepend);
|
|
|
|
EVCNT_ATTACH_STATIC(tcp_reass_insert);
|
|
|
|
EVCNT_ATTACH_STATIC(tcp_reass_inserttail);
|
|
|
|
EVCNT_ATTACH_STATIC(tcp_reass_append);
|
|
|
|
EVCNT_ATTACH_STATIC(tcp_reass_appendtail);
|
|
|
|
EVCNT_ATTACH_STATIC(tcp_reass_overlaptail);
|
|
|
|
EVCNT_ATTACH_STATIC(tcp_reass_overlapfront);
|
|
|
|
EVCNT_ATTACH_STATIC(tcp_reass_segdup);
|
|
|
|
EVCNT_ATTACH_STATIC(tcp_reass_fragdup);
|
|
|
|
|
2002-05-07 06:59:38 +04:00
|
|
|
#endif /* TCP_REASS_COUNTERS */
|
|
|
|
|
2003-02-26 09:31:08 +03:00
|
|
|
#ifdef MBUFTRACE
|
2006-10-11 01:49:14 +04:00
|
|
|
struct mowner tcp_mowner = MOWNER_INIT("tcp", "");
|
|
|
|
struct mowner tcp_rx_mowner = MOWNER_INIT("tcp", "rx");
|
|
|
|
struct mowner tcp_tx_mowner = MOWNER_INIT("tcp", "tx");
|
2006-12-06 12:10:45 +03:00
|
|
|
struct mowner tcp_sock_mowner = MOWNER_INIT("tcp", "sock");
|
|
|
|
struct mowner tcp_sock_rx_mowner = MOWNER_INIT("tcp", "sock rx");
|
|
|
|
struct mowner tcp_sock_tx_mowner = MOWNER_INIT("tcp", "sock tx");
|
2003-02-26 09:31:08 +03:00
|
|
|
#endif
|
|
|
|
|
1993-03-21 12:45:37 +03:00
|
|
|
/*
|
|
|
|
* Tcp initialization
|
|
|
|
*/
|
1994-01-09 02:07:16 +03:00
|
|
|
void
|
2005-02-04 02:50:33 +03:00
|
|
|
tcp_init(void)
|
1993-03-21 12:45:37 +03:00
|
|
|
{
|
1999-07-01 12:12:45 +04:00
|
|
|
int hlen;
|
1993-03-21 12:45:37 +03:00
|
|
|
|
1996-09-15 22:11:06 +04:00
|
|
|
in_pcbinit(&tcbtable, tcbhashsize, tcbhashsize);
|
2008-10-13 23:44:21 +04:00
|
|
|
pool_init(&tcpcb_pool, sizeof(struct tcpcb), 0, 0, 0, "tcpcbpl",
|
|
|
|
NULL, IPL_SOFTNET);
|
2001-09-10 08:24:24 +04:00
|
|
|
|
1999-07-01 12:12:45 +04:00
|
|
|
hlen = sizeof(struct ip) + sizeof(struct tcphdr);
|
|
|
|
#ifdef INET6
|
|
|
|
if (sizeof(struct ip) < sizeof(struct ip6_hdr))
|
|
|
|
hlen = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
|
|
|
|
#endif
|
|
|
|
if (max_protohdr < hlen)
|
|
|
|
max_protohdr = hlen;
|
|
|
|
if (max_linkhdr + hlen > MHLEN)
|
1993-03-21 12:45:37 +03:00
|
|
|
panic("tcp_init");
|
2000-10-18 21:09:14 +04:00
|
|
|
|
2000-10-19 01:14:12 +04:00
|
|
|
#ifdef INET
|
2000-10-18 21:09:14 +04:00
|
|
|
icmp_mtudisc_callback_register(tcp_mtudisc_callback);
|
2000-10-19 01:14:12 +04:00
|
|
|
#endif
|
|
|
|
#ifdef INET6
|
|
|
|
icmp6_mtudisc_callback_register(tcp6_mtudisc_callback);
|
|
|
|
#endif
|
2000-10-18 21:09:14 +04:00
|
|
|
|
2009-09-16 19:23:04 +04:00
|
|
|
tcp_usrreq_init();
|
|
|
|
|
2001-09-11 00:36:43 +04:00
|
|
|
/* Initialize timer state. */
|
|
|
|
tcp_timer_init();
|
|
|
|
|
1998-05-07 05:37:27 +04:00
|
|
|
/* Initialize the compressed state engine. */
|
|
|
|
syn_cache_init();
|
2001-06-02 20:17:09 +04:00
|
|
|
|
2006-10-09 20:27:07 +04:00
|
|
|
/* Initialize the congestion control algorithms. */
|
|
|
|
tcp_congctl_init();
|
|
|
|
|
2007-06-20 19:29:17 +04:00
|
|
|
/* Initialize the TCPCB template. */
|
|
|
|
tcp_tcpcb_template();
|
|
|
|
|
2009-01-29 23:38:22 +03:00
|
|
|
/* Initialize reassembly queue */
|
|
|
|
tcpipqent_init();
|
|
|
|
|
2009-05-27 21:41:03 +04:00
|
|
|
/* SACK */
|
|
|
|
tcp_sack_init();
|
|
|
|
|
2003-02-26 09:31:08 +03:00
|
|
|
MOWNER_ATTACH(&tcp_tx_mowner);
|
|
|
|
MOWNER_ATTACH(&tcp_rx_mowner);
|
2006-12-06 12:10:45 +03:00
|
|
|
MOWNER_ATTACH(&tcp_reass_mowner);
|
|
|
|
MOWNER_ATTACH(&tcp_sock_mowner);
|
|
|
|
MOWNER_ATTACH(&tcp_sock_tx_mowner);
|
|
|
|
MOWNER_ATTACH(&tcp_sock_rx_mowner);
|
2003-02-26 09:31:08 +03:00
|
|
|
MOWNER_ATTACH(&tcp_mowner);
|
2008-04-26 12:13:35 +04:00
|
|
|
|
|
|
|
tcpstat_percpu = percpu_alloc(sizeof(uint64_t) * TCP_NSTATS);
|
1993-03-21 12:45:37 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Create template to be used to send tcp packets on a connection.
|
|
|
|
* Call after host entry created, allocates an mbuf and fills
|
|
|
|
* in a skeletal tcp/ip header, minimizing the amount of work
|
|
|
|
* necessary when the connection is used.
|
|
|
|
*/
|
1999-07-01 12:12:45 +04:00
|
|
|
struct mbuf *
|
2005-02-04 02:50:33 +03:00
|
|
|
tcp_template(struct tcpcb *tp)
|
1993-03-21 12:45:37 +03:00
|
|
|
{
|
2000-03-30 16:51:13 +04:00
|
|
|
struct inpcb *inp = tp->t_inpcb;
|
1999-07-01 12:12:45 +04:00
|
|
|
#ifdef INET6
|
2000-03-30 16:51:13 +04:00
|
|
|
struct in6pcb *in6p = tp->t_in6pcb;
|
1999-07-01 12:12:45 +04:00
|
|
|
#endif
|
2000-03-30 16:51:13 +04:00
|
|
|
struct tcphdr *n;
|
|
|
|
struct mbuf *m;
|
1999-07-01 12:12:45 +04:00
|
|
|
int hlen;
|
|
|
|
|
|
|
|
switch (tp->t_family) {
|
|
|
|
case AF_INET:
|
|
|
|
hlen = sizeof(struct ip);
|
|
|
|
if (inp)
|
|
|
|
break;
|
|
|
|
#ifdef INET6
|
|
|
|
if (in6p) {
|
|
|
|
/* mapped addr case */
|
|
|
|
if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr)
|
|
|
|
&& IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr))
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
return NULL; /*EINVAL*/
|
|
|
|
#ifdef INET6
|
|
|
|
case AF_INET6:
|
|
|
|
hlen = sizeof(struct ip6_hdr);
|
|
|
|
if (in6p) {
|
|
|
|
/* more sainty check? */
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return NULL; /*EINVAL*/
|
|
|
|
#endif
|
|
|
|
default:
|
1999-07-02 16:45:32 +04:00
|
|
|
hlen = 0; /*pacify gcc*/
|
1999-07-01 12:12:45 +04:00
|
|
|
return NULL; /*EAFNOSUPPORT*/
|
|
|
|
}
|
2000-09-19 22:21:41 +04:00
|
|
|
#ifdef DIAGNOSTIC
|
|
|
|
if (hlen + sizeof(struct tcphdr) > MCLBYTES)
|
|
|
|
panic("mclbytes too small for t_template");
|
|
|
|
#endif
|
|
|
|
m = tp->t_template;
|
|
|
|
if (m && m->m_len == hlen + sizeof(struct tcphdr))
|
|
|
|
;
|
|
|
|
else {
|
|
|
|
if (m)
|
|
|
|
m_freem(m);
|
|
|
|
m = tp->t_template = NULL;
|
1999-07-01 12:12:45 +04:00
|
|
|
MGETHDR(m, M_DONTWAIT, MT_HEADER);
|
2000-09-19 22:21:41 +04:00
|
|
|
if (m && hlen + sizeof(struct tcphdr) > MHLEN) {
|
1999-07-01 12:12:45 +04:00
|
|
|
MCLGET(m, M_DONTWAIT);
|
|
|
|
if ((m->m_flags & M_EXT) == 0) {
|
|
|
|
m_free(m);
|
|
|
|
m = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (m == NULL)
|
|
|
|
return NULL;
|
2003-02-26 09:31:08 +03:00
|
|
|
MCLAIM(m, &tcp_mowner);
|
1999-08-27 06:56:14 +04:00
|
|
|
m->m_pkthdr.len = m->m_len = hlen + sizeof(struct tcphdr);
|
1999-07-01 12:12:45 +04:00
|
|
|
}
|
2001-06-02 20:17:09 +04:00
|
|
|
|
2009-03-18 19:00:08 +03:00
|
|
|
memset(mtod(m, void *), 0, m->m_len);
|
2001-06-02 20:17:09 +04:00
|
|
|
|
2007-03-04 08:59:00 +03:00
|
|
|
n = (struct tcphdr *)(mtod(m, char *) + hlen);
|
2001-06-02 20:17:09 +04:00
|
|
|
|
1999-07-01 12:12:45 +04:00
|
|
|
switch (tp->t_family) {
|
|
|
|
case AF_INET:
|
|
|
|
{
|
|
|
|
struct ipovly *ipov;
|
|
|
|
mtod(m, struct ip *)->ip_v = 4;
|
2003-09-08 06:06:34 +04:00
|
|
|
mtod(m, struct ip *)->ip_hl = hlen >> 2;
|
1999-07-01 12:12:45 +04:00
|
|
|
ipov = mtod(m, struct ipovly *);
|
|
|
|
ipov->ih_pr = IPPROTO_TCP;
|
|
|
|
ipov->ih_len = htons(sizeof(struct tcphdr));
|
|
|
|
if (inp) {
|
|
|
|
ipov->ih_src = inp->inp_laddr;
|
|
|
|
ipov->ih_dst = inp->inp_faddr;
|
|
|
|
}
|
|
|
|
#ifdef INET6
|
|
|
|
else if (in6p) {
|
|
|
|
/* mapped addr case */
|
|
|
|
bcopy(&in6p->in6p_laddr.s6_addr32[3], &ipov->ih_src,
|
|
|
|
sizeof(ipov->ih_src));
|
|
|
|
bcopy(&in6p->in6p_faddr.s6_addr32[3], &ipov->ih_dst,
|
|
|
|
sizeof(ipov->ih_dst));
|
|
|
|
}
|
|
|
|
#endif
|
2001-06-02 20:17:09 +04:00
|
|
|
/*
|
|
|
|
* Compute the pseudo-header portion of the checksum
|
|
|
|
* now. We incrementally add in the TCP option and
|
|
|
|
* payload lengths later, and then compute the TCP
|
|
|
|
* checksum right before the packet is sent off onto
|
|
|
|
* the wire.
|
|
|
|
*/
|
|
|
|
n->th_sum = in_cksum_phdr(ipov->ih_src.s_addr,
|
|
|
|
ipov->ih_dst.s_addr,
|
|
|
|
htons(sizeof(struct tcphdr) + IPPROTO_TCP));
|
1999-07-01 12:12:45 +04:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
#ifdef INET6
|
|
|
|
case AF_INET6:
|
|
|
|
{
|
|
|
|
struct ip6_hdr *ip6;
|
|
|
|
mtod(m, struct ip *)->ip_v = 6;
|
|
|
|
ip6 = mtod(m, struct ip6_hdr *);
|
|
|
|
ip6->ip6_nxt = IPPROTO_TCP;
|
|
|
|
ip6->ip6_plen = htons(sizeof(struct tcphdr));
|
|
|
|
ip6->ip6_src = in6p->in6p_laddr;
|
|
|
|
ip6->ip6_dst = in6p->in6p_faddr;
|
|
|
|
ip6->ip6_flow = in6p->in6p_flowinfo & IPV6_FLOWINFO_MASK;
|
|
|
|
if (ip6_auto_flowlabel) {
|
|
|
|
ip6->ip6_flow &= ~IPV6_FLOWLABEL_MASK;
|
2002-06-09 20:33:36 +04:00
|
|
|
ip6->ip6_flow |=
|
2003-09-06 07:36:30 +04:00
|
|
|
(htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
|
1999-07-01 12:12:45 +04:00
|
|
|
}
|
1999-12-15 09:28:43 +03:00
|
|
|
ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
|
|
|
|
ip6->ip6_vfc |= IPV6_VERSION;
|
2001-06-02 20:17:09 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Compute the pseudo-header portion of the checksum
|
|
|
|
* now. We incrementally add in the TCP option and
|
|
|
|
* payload lengths later, and then compute the TCP
|
|
|
|
* checksum right before the packet is sent off onto
|
|
|
|
* the wire.
|
|
|
|
*/
|
|
|
|
n->th_sum = in6_cksum_phdr(&in6p->in6p_laddr,
|
|
|
|
&in6p->in6p_faddr, htonl(sizeof(struct tcphdr)),
|
|
|
|
htonl(IPPROTO_TCP));
|
1999-07-01 12:12:45 +04:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
if (inp) {
|
|
|
|
n->th_sport = inp->inp_lport;
|
|
|
|
n->th_dport = inp->inp_fport;
|
|
|
|
}
|
|
|
|
#ifdef INET6
|
|
|
|
else if (in6p) {
|
|
|
|
n->th_sport = in6p->in6p_lport;
|
|
|
|
n->th_dport = in6p->in6p_fport;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
n->th_seq = 0;
|
|
|
|
n->th_ack = 0;
|
|
|
|
n->th_x2 = 0;
|
|
|
|
n->th_off = 5;
|
|
|
|
n->th_flags = 0;
|
|
|
|
n->th_win = 0;
|
|
|
|
n->th_urp = 0;
|
|
|
|
return (m);
|
1993-03-21 12:45:37 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Send a single message to the TCP at address specified by
|
|
|
|
* the given TCP/IP header. If m == 0, then we make a copy
|
|
|
|
* of the tcpiphdr at ti and send directly to the addressed host.
|
|
|
|
* This is used to force keep alive messages out using the TCP
|
|
|
|
* template for a connection tp->t_template. If flags are given
|
|
|
|
* then we send a message back to the TCP which originated the
|
|
|
|
* segment ti, and discard the mbuf containing it and any other
|
|
|
|
* attached mbufs.
|
|
|
|
*
|
|
|
|
* In any case the ack and sequence number of the transmitted
|
|
|
|
* segment are as specified by the parameters.
|
|
|
|
*/
|
1997-07-24 01:26:40 +04:00
|
|
|
int
|
2005-02-04 02:50:33 +03:00
|
|
|
tcp_respond(struct tcpcb *tp, struct mbuf *template, struct mbuf *m,
|
|
|
|
struct tcphdr *th0, tcp_seq ack, tcp_seq seq, int flags)
|
1993-03-21 12:45:37 +03:00
|
|
|
{
|
2007-12-20 23:24:49 +03:00
|
|
|
#ifdef INET6
|
2007-12-20 22:53:29 +03:00
|
|
|
struct rtentry *rt;
|
2007-12-20 23:24:49 +03:00
|
|
|
#endif
|
1999-07-01 12:12:45 +04:00
|
|
|
struct route *ro;
|
1999-01-20 06:39:54 +03:00
|
|
|
int error, tlen, win = 0;
|
1999-07-01 12:12:45 +04:00
|
|
|
int hlen;
|
|
|
|
struct ip *ip;
|
|
|
|
#ifdef INET6
|
|
|
|
struct ip6_hdr *ip6;
|
|
|
|
#endif
|
|
|
|
int family; /* family on packet, not inpcb/in6pcb! */
|
|
|
|
struct tcphdr *th;
|
2003-08-23 01:53:01 +04:00
|
|
|
struct socket *so;
|
1999-07-01 12:12:45 +04:00
|
|
|
|
|
|
|
if (tp != NULL && (flags & TH_RST) == 0) {
|
2000-10-17 07:06:42 +04:00
|
|
|
#ifdef DIAGNOSTIC
|
|
|
|
if (tp->t_inpcb && tp->t_in6pcb)
|
|
|
|
panic("tcp_respond: both t_inpcb and t_in6pcb are set");
|
|
|
|
#endif
|
|
|
|
#ifdef INET
|
1999-07-01 12:12:45 +04:00
|
|
|
if (tp->t_inpcb)
|
|
|
|
win = sbspace(&tp->t_inpcb->inp_socket->so_rcv);
|
2000-10-17 07:06:42 +04:00
|
|
|
#endif
|
1999-07-01 12:12:45 +04:00
|
|
|
#ifdef INET6
|
2000-10-17 07:06:42 +04:00
|
|
|
if (tp->t_in6pcb)
|
1999-07-01 12:12:45 +04:00
|
|
|
win = sbspace(&tp->t_in6pcb->in6p_socket->so_rcv);
|
|
|
|
#endif
|
|
|
|
}
|
1993-03-21 12:45:37 +03:00
|
|
|
|
2002-11-24 13:52:47 +03:00
|
|
|
th = NULL; /* Quell uninitialized warning */
|
1999-07-01 12:12:45 +04:00
|
|
|
ip = NULL;
|
|
|
|
#ifdef INET6
|
|
|
|
ip6 = NULL;
|
|
|
|
#endif
|
1993-03-21 12:45:37 +03:00
|
|
|
if (m == 0) {
|
1999-07-22 16:56:56 +04:00
|
|
|
if (!template)
|
|
|
|
return EINVAL;
|
|
|
|
|
1999-07-01 12:12:45 +04:00
|
|
|
/* get family information from template */
|
|
|
|
switch (mtod(template, struct ip *)->ip_v) {
|
|
|
|
case 4:
|
|
|
|
family = AF_INET;
|
|
|
|
hlen = sizeof(struct ip);
|
|
|
|
break;
|
|
|
|
#ifdef INET6
|
|
|
|
case 6:
|
|
|
|
family = AF_INET6;
|
|
|
|
hlen = sizeof(struct ip6_hdr);
|
|
|
|
break;
|
|
|
|
#endif
|
|
|
|
default:
|
|
|
|
return EAFNOSUPPORT;
|
|
|
|
}
|
|
|
|
|
|
|
|
MGETHDR(m, M_DONTWAIT, MT_HEADER);
|
|
|
|
if (m) {
|
2003-02-26 09:31:08 +03:00
|
|
|
MCLAIM(m, &tcp_tx_mowner);
|
1999-07-01 12:12:45 +04:00
|
|
|
MCLGET(m, M_DONTWAIT);
|
1999-07-22 16:56:56 +04:00
|
|
|
if ((m->m_flags & M_EXT) == 0) {
|
1999-07-01 12:12:45 +04:00
|
|
|
m_free(m);
|
|
|
|
m = NULL;
|
|
|
|
}
|
|
|
|
}
|
1993-03-21 12:45:37 +03:00
|
|
|
if (m == NULL)
|
1997-07-24 01:26:40 +04:00
|
|
|
return (ENOBUFS);
|
1998-04-29 09:16:46 +04:00
|
|
|
|
|
|
|
if (tcp_compat_42)
|
|
|
|
tlen = 1;
|
|
|
|
else
|
|
|
|
tlen = 0;
|
|
|
|
|
1993-03-21 12:45:37 +03:00
|
|
|
m->m_data += max_linkhdr;
|
2007-03-04 08:59:00 +03:00
|
|
|
bcopy(mtod(template, void *), mtod(m, void *),
|
1999-07-01 12:12:45 +04:00
|
|
|
template->m_len);
|
|
|
|
switch (family) {
|
|
|
|
case AF_INET:
|
|
|
|
ip = mtod(m, struct ip *);
|
|
|
|
th = (struct tcphdr *)(ip + 1);
|
|
|
|
break;
|
|
|
|
#ifdef INET6
|
|
|
|
case AF_INET6:
|
|
|
|
ip6 = mtod(m, struct ip6_hdr *);
|
|
|
|
th = (struct tcphdr *)(ip6 + 1);
|
|
|
|
break;
|
|
|
|
#endif
|
1999-12-13 18:17:17 +03:00
|
|
|
#if 0
|
|
|
|
default:
|
|
|
|
/* noone will visit here */
|
|
|
|
m_freem(m);
|
|
|
|
return EAFNOSUPPORT;
|
1999-07-03 01:02:05 +04:00
|
|
|
#endif
|
1999-07-01 12:12:45 +04:00
|
|
|
}
|
1993-03-21 12:45:37 +03:00
|
|
|
flags = TH_ACK;
|
|
|
|
} else {
|
2000-06-30 20:44:33 +04:00
|
|
|
|
|
|
|
if ((m->m_flags & M_PKTHDR) == 0) {
|
|
|
|
#if 0
|
|
|
|
printf("non PKTHDR to tcp_respond\n");
|
|
|
|
#endif
|
|
|
|
m_freem(m);
|
|
|
|
return EINVAL;
|
|
|
|
}
|
|
|
|
#ifdef DIAGNOSTIC
|
|
|
|
if (!th0)
|
|
|
|
panic("th0 == NULL in tcp_respond");
|
|
|
|
#endif
|
|
|
|
|
1999-07-01 12:12:45 +04:00
|
|
|
/* get family information from m */
|
|
|
|
switch (mtod(m, struct ip *)->ip_v) {
|
|
|
|
case 4:
|
|
|
|
family = AF_INET;
|
|
|
|
hlen = sizeof(struct ip);
|
2000-06-30 20:44:33 +04:00
|
|
|
ip = mtod(m, struct ip *);
|
1999-07-01 12:12:45 +04:00
|
|
|
break;
|
|
|
|
#ifdef INET6
|
|
|
|
case 6:
|
|
|
|
family = AF_INET6;
|
|
|
|
hlen = sizeof(struct ip6_hdr);
|
2000-06-30 20:44:33 +04:00
|
|
|
ip6 = mtod(m, struct ip6_hdr *);
|
1999-07-01 12:12:45 +04:00
|
|
|
break;
|
|
|
|
#endif
|
|
|
|
default:
|
1999-12-13 18:17:17 +03:00
|
|
|
m_freem(m);
|
1999-07-01 12:12:45 +04:00
|
|
|
return EAFNOSUPPORT;
|
|
|
|
}
|
2005-01-03 22:47:30 +03:00
|
|
|
/* clear h/w csum flags inherited from rx packet */
|
|
|
|
m->m_pkthdr.csum_flags = 0;
|
|
|
|
|
2000-06-30 20:44:33 +04:00
|
|
|
if ((flags & TH_SYN) == 0 || sizeof(*th0) > (th0->th_off << 2))
|
|
|
|
tlen = sizeof(*th0);
|
|
|
|
else
|
|
|
|
tlen = th0->th_off << 2;
|
|
|
|
|
|
|
|
if (m->m_len > hlen + tlen && (m->m_flags & M_EXT) == 0 &&
|
2007-03-04 08:59:00 +03:00
|
|
|
mtod(m, char *) + hlen == (char *)th0) {
|
2000-06-30 20:44:33 +04:00
|
|
|
m->m_len = hlen + tlen;
|
|
|
|
m_freem(m->m_next);
|
|
|
|
m->m_next = NULL;
|
|
|
|
} else {
|
|
|
|
struct mbuf *n;
|
|
|
|
|
|
|
|
#ifdef DIAGNOSTIC
|
|
|
|
if (max_linkhdr + hlen + tlen > MCLBYTES) {
|
|
|
|
m_freem(m);
|
|
|
|
return EMSGSIZE;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
MGETHDR(n, M_DONTWAIT, MT_HEADER);
|
|
|
|
if (n && max_linkhdr + hlen + tlen > MHLEN) {
|
|
|
|
MCLGET(n, M_DONTWAIT);
|
|
|
|
if ((n->m_flags & M_EXT) == 0) {
|
|
|
|
m_freem(n);
|
|
|
|
n = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!n) {
|
|
|
|
m_freem(m);
|
|
|
|
return ENOBUFS;
|
|
|
|
}
|
|
|
|
|
2003-02-26 09:31:08 +03:00
|
|
|
MCLAIM(n, &tcp_tx_mowner);
|
2000-06-30 20:44:33 +04:00
|
|
|
n->m_data += max_linkhdr;
|
|
|
|
n->m_len = hlen + tlen;
|
2007-03-04 08:59:00 +03:00
|
|
|
m_copyback(n, 0, hlen, mtod(m, void *));
|
|
|
|
m_copyback(n, hlen, tlen, (void *)th0);
|
1999-07-01 12:12:45 +04:00
|
|
|
|
|
|
|
m_freem(m);
|
2000-06-30 20:44:33 +04:00
|
|
|
m = n;
|
|
|
|
n = NULL;
|
1999-07-01 12:12:45 +04:00
|
|
|
}
|
|
|
|
|
1994-05-13 10:02:48 +04:00
|
|
|
#define xchg(a,b,type) { type t; t=a; a=b; b=t; }
|
1999-07-01 12:12:45 +04:00
|
|
|
switch (family) {
|
|
|
|
case AF_INET:
|
|
|
|
ip = mtod(m, struct ip *);
|
|
|
|
th = (struct tcphdr *)(ip + 1);
|
2000-06-30 20:44:33 +04:00
|
|
|
ip->ip_p = IPPROTO_TCP;
|
1999-07-01 12:12:45 +04:00
|
|
|
xchg(ip->ip_dst, ip->ip_src, struct in_addr);
|
1999-07-15 02:37:13 +04:00
|
|
|
ip->ip_p = IPPROTO_TCP;
|
1999-07-01 12:12:45 +04:00
|
|
|
break;
|
|
|
|
#ifdef INET6
|
|
|
|
case AF_INET6:
|
|
|
|
ip6 = mtod(m, struct ip6_hdr *);
|
|
|
|
th = (struct tcphdr *)(ip6 + 1);
|
2000-06-30 20:44:33 +04:00
|
|
|
ip6->ip6_nxt = IPPROTO_TCP;
|
1999-07-01 12:12:45 +04:00
|
|
|
xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr);
|
1999-07-15 02:37:13 +04:00
|
|
|
ip6->ip6_nxt = IPPROTO_TCP;
|
1999-07-01 12:12:45 +04:00
|
|
|
break;
|
2000-06-30 20:44:33 +04:00
|
|
|
#endif
|
|
|
|
#if 0
|
|
|
|
default:
|
|
|
|
/* noone will visit here */
|
|
|
|
m_freem(m);
|
|
|
|
return EAFNOSUPPORT;
|
1999-07-01 12:12:45 +04:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
xchg(th->th_dport, th->th_sport, u_int16_t);
|
1993-03-21 12:45:37 +03:00
|
|
|
#undef xchg
|
2000-06-30 20:44:33 +04:00
|
|
|
tlen = 0; /*be friendly with the following code*/
|
1993-03-21 12:45:37 +03:00
|
|
|
}
|
1999-07-01 12:12:45 +04:00
|
|
|
th->th_seq = htonl(seq);
|
|
|
|
th->th_ack = htonl(ack);
|
|
|
|
th->th_x2 = 0;
|
1997-07-24 01:26:40 +04:00
|
|
|
if ((flags & TH_SYN) == 0) {
|
|
|
|
if (tp)
|
2000-02-29 08:25:49 +03:00
|
|
|
win >>= tp->rcv_scale;
|
|
|
|
if (win > TCP_MAXWIN)
|
|
|
|
win = TCP_MAXWIN;
|
|
|
|
th->th_win = htons((u_int16_t)win);
|
1999-07-01 12:12:45 +04:00
|
|
|
th->th_off = sizeof (struct tcphdr) >> 2;
|
2000-06-30 20:44:33 +04:00
|
|
|
tlen += sizeof(*th);
|
1997-07-24 01:26:40 +04:00
|
|
|
} else
|
1999-07-01 12:12:45 +04:00
|
|
|
tlen += th->th_off << 2;
|
|
|
|
m->m_len = hlen + tlen;
|
|
|
|
m->m_pkthdr.len = hlen + tlen;
|
1997-07-24 01:26:40 +04:00
|
|
|
m->m_pkthdr.rcvif = (struct ifnet *) 0;
|
1999-07-01 12:12:45 +04:00
|
|
|
th->th_flags = flags;
|
|
|
|
th->th_urp = 0;
|
|
|
|
|
|
|
|
switch (family) {
|
2000-10-17 07:06:42 +04:00
|
|
|
#ifdef INET
|
1999-07-01 12:12:45 +04:00
|
|
|
case AF_INET:
|
|
|
|
{
|
|
|
|
struct ipovly *ipov = (struct ipovly *)ip;
|
2009-03-18 19:00:08 +03:00
|
|
|
memset(ipov->ih_x1, 0, sizeof ipov->ih_x1);
|
1999-07-01 12:12:45 +04:00
|
|
|
ipov->ih_len = htons((u_int16_t)tlen);
|
|
|
|
|
|
|
|
th->th_sum = 0;
|
|
|
|
th->th_sum = in_cksum(m, hlen + tlen);
|
2002-08-14 04:23:27 +04:00
|
|
|
ip->ip_len = htons(hlen + tlen);
|
1999-07-01 12:12:45 +04:00
|
|
|
ip->ip_ttl = ip_defttl;
|
|
|
|
break;
|
|
|
|
}
|
2000-10-17 07:06:42 +04:00
|
|
|
#endif
|
1999-07-01 12:12:45 +04:00
|
|
|
#ifdef INET6
|
|
|
|
case AF_INET6:
|
|
|
|
{
|
|
|
|
th->th_sum = 0;
|
|
|
|
th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr),
|
|
|
|
tlen);
|
2005-02-12 04:24:07 +03:00
|
|
|
ip6->ip6_plen = htons(tlen);
|
1999-12-13 18:17:17 +03:00
|
|
|
if (tp && tp->t_in6pcb) {
|
|
|
|
struct ifnet *oifp;
|
2008-01-14 07:19:09 +03:00
|
|
|
ro = &tp->t_in6pcb->in6p_route;
|
|
|
|
oifp = (rt = rtcache_validate(ro)) != NULL ? rt->rt_ifp
|
|
|
|
: NULL;
|
1999-12-13 18:17:17 +03:00
|
|
|
ip6->ip6_hlim = in6_selecthlim(tp->t_in6pcb, oifp);
|
|
|
|
} else
|
|
|
|
ip6->ip6_hlim = ip6_defhlim;
|
1999-07-01 12:12:45 +04:00
|
|
|
ip6->ip6_flow &= ~IPV6_FLOWINFO_MASK;
|
|
|
|
if (ip6_auto_flowlabel) {
|
2002-06-09 20:33:36 +04:00
|
|
|
ip6->ip6_flow |=
|
2003-09-06 07:36:30 +04:00
|
|
|
(htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
|
1999-07-01 12:12:45 +04:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2003-08-23 02:27:07 +04:00
|
|
|
if (tp && tp->t_inpcb)
|
2003-08-23 01:53:01 +04:00
|
|
|
so = tp->t_inpcb->inp_socket;
|
|
|
|
#ifdef INET6
|
2003-08-23 02:27:07 +04:00
|
|
|
else if (tp && tp->t_in6pcb)
|
2003-08-23 01:53:01 +04:00
|
|
|
so = tp->t_in6pcb->in6p_socket;
|
|
|
|
#endif
|
|
|
|
else
|
|
|
|
so = NULL;
|
1999-01-20 06:39:54 +03:00
|
|
|
|
1999-07-01 12:12:45 +04:00
|
|
|
if (tp != NULL && tp->t_inpcb != NULL) {
|
1999-01-26 11:28:50 +03:00
|
|
|
ro = &tp->t_inpcb->inp_route;
|
|
|
|
#ifdef DIAGNOSTIC
|
1999-07-01 12:12:45 +04:00
|
|
|
if (family != AF_INET)
|
|
|
|
panic("tcp_respond: address family mismatch");
|
|
|
|
if (!in_hosteq(ip->ip_dst, tp->t_inpcb->inp_faddr)) {
|
|
|
|
panic("tcp_respond: ip_dst %x != inp_faddr %x",
|
|
|
|
ntohl(ip->ip_dst.s_addr),
|
1999-01-26 11:28:50 +03:00
|
|
|
ntohl(tp->t_inpcb->inp_faddr.s_addr));
|
1999-07-01 12:12:45 +04:00
|
|
|
}
|
1999-01-26 11:28:50 +03:00
|
|
|
#endif
|
1999-07-01 12:12:45 +04:00
|
|
|
}
|
|
|
|
#ifdef INET6
|
|
|
|
else if (tp != NULL && tp->t_in6pcb != NULL) {
|
|
|
|
ro = (struct route *)&tp->t_in6pcb->in6p_route;
|
|
|
|
#ifdef DIAGNOSTIC
|
|
|
|
if (family == AF_INET) {
|
|
|
|
if (!IN6_IS_ADDR_V4MAPPED(&tp->t_in6pcb->in6p_faddr))
|
|
|
|
panic("tcp_respond: not mapped addr");
|
2009-03-18 18:14:29 +03:00
|
|
|
if (memcmp(&ip->ip_dst,
|
2002-09-25 15:19:23 +04:00
|
|
|
&tp->t_in6pcb->in6p_faddr.s6_addr32[3],
|
|
|
|
sizeof(ip->ip_dst)) != 0) {
|
1999-07-01 12:12:45 +04:00
|
|
|
panic("tcp_respond: ip_dst != in6p_faddr");
|
|
|
|
}
|
|
|
|
} else if (family == AF_INET6) {
|
2002-09-25 15:19:23 +04:00
|
|
|
if (!IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
|
|
|
|
&tp->t_in6pcb->in6p_faddr))
|
1999-07-01 12:12:45 +04:00
|
|
|
panic("tcp_respond: ip6_dst != in6p_faddr");
|
|
|
|
} else
|
|
|
|
panic("tcp_respond: address family mismatch");
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
#endif
|
2000-10-17 06:57:01 +04:00
|
|
|
else
|
|
|
|
ro = NULL;
|
|
|
|
|
1999-07-01 12:12:45 +04:00
|
|
|
switch (family) {
|
2000-10-17 07:06:42 +04:00
|
|
|
#ifdef INET
|
1999-07-01 12:12:45 +04:00
|
|
|
case AF_INET:
|
2000-10-17 06:57:01 +04:00
|
|
|
error = ip_output(m, NULL, ro,
|
2002-05-26 20:05:43 +04:00
|
|
|
(tp && tp->t_mtudisc ? IP_MTUDISC : 0),
|
2003-08-23 01:53:01 +04:00
|
|
|
(struct ip_moptions *)0, so);
|
1999-07-01 12:12:45 +04:00
|
|
|
break;
|
2000-10-17 07:06:42 +04:00
|
|
|
#endif
|
1999-07-01 12:12:45 +04:00
|
|
|
#ifdef INET6
|
|
|
|
case AF_INET6:
|
Eliminate address family-specific route caches (struct route, struct
route_in6, struct route_iso), replacing all caches with a struct
route.
The principle benefit of this change is that all of the protocol
families can benefit from route cache-invalidation, which is
necessary for correct routing. Route-cache invalidation fixes an
ancient PR, kern/3508, at long last; it fixes various other PRs,
also.
Discussions with and ideas from Joerg Sonnenberger influenced this
work tremendously. Of course, all design oversights and bugs are
mine.
DETAILS
1 I added to each address family a pool of sockaddrs. I have
introduced routines for allocating, copying, and duplicating,
and freeing sockaddrs:
struct sockaddr *sockaddr_alloc(sa_family_t af, int flags);
struct sockaddr *sockaddr_copy(struct sockaddr *dst,
const struct sockaddr *src);
struct sockaddr *sockaddr_dup(const struct sockaddr *src, int flags);
void sockaddr_free(struct sockaddr *sa);
sockaddr_alloc() returns either a sockaddr from the pool belonging
to the specified family, or NULL if the pool is exhausted. The
returned sockaddr has the right size for that family; sa_family
and sa_len fields are initialized to the family and sockaddr
length---e.g., sa_family = AF_INET and sa_len = sizeof(struct
sockaddr_in). sockaddr_free() puts the given sockaddr back into
its family's pool.
sockaddr_dup() and sockaddr_copy() work analogously to strdup()
and strcpy(), respectively. sockaddr_copy() KASSERTs that the
family of the destination and source sockaddrs are alike.
The 'flags' argumet for sockaddr_alloc() and sockaddr_dup() is
passed directly to pool_get(9).
2 I added routines for initializing sockaddrs in each address
family, sockaddr_in_init(), sockaddr_in6_init(), sockaddr_iso_init(),
etc. They are fairly self-explanatory.
3 structs route_in6 and route_iso are no more. All protocol families
use struct route. I have changed the route cache, 'struct route',
so that it does not contain storage space for a sockaddr. Instead,
struct route points to a sockaddr coming from the pool the sockaddr
belongs to. I added a new method to struct route, rtcache_setdst(),
for setting the cache destination:
int rtcache_setdst(struct route *, const struct sockaddr *);
rtcache_setdst() returns 0 on success, or ENOMEM if no memory is
available to create the sockaddr storage.
It is now possible for rtcache_getdst() to return NULL if, say,
rtcache_setdst() failed. I check the return value for NULL
everywhere in the kernel.
4 Each routing domain (struct domain) has a list of live route
caches, dom_rtcache. rtflushall(sa_family_t af) looks up the
domain indicated by 'af', walks the domain's list of route caches
and invalidates each one.
2007-05-03 00:40:22 +04:00
|
|
|
error = ip6_output(m, NULL, ro, 0, NULL, so, NULL);
|
1999-07-01 12:12:45 +04:00
|
|
|
break;
|
|
|
|
#endif
|
1999-07-02 16:45:32 +04:00
|
|
|
default:
|
|
|
|
error = EAFNOSUPPORT;
|
|
|
|
break;
|
1999-07-01 12:12:45 +04:00
|
|
|
}
|
1999-01-20 06:39:54 +03:00
|
|
|
|
|
|
|
return (error);
|
1993-03-21 12:45:37 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2003-10-22 06:45:57 +04:00
|
|
|
* Template TCPCB. Rather than zeroing a new TCPCB and initializing
|
|
|
|
* a bunch of members individually, we maintain this template for the
|
|
|
|
* static and mostly-static components of the TCPCB, and copy it into
|
|
|
|
* the new TCPCB instead.
|
1993-03-21 12:45:37 +03:00
|
|
|
*/
|
2003-10-22 06:45:57 +04:00
|
|
|
static struct tcpcb tcpcb_template = {
|
|
|
|
.t_srtt = TCPTV_SRTTBASE,
|
|
|
|
.t_rttmin = TCPTV_MIN,
|
|
|
|
|
|
|
|
.snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT,
|
|
|
|
.snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT,
|
2005-04-05 05:07:17 +04:00
|
|
|
.snd_numholes = 0,
|
2005-02-16 17:59:40 +03:00
|
|
|
|
|
|
|
.t_partialacks = -1,
|
2006-10-19 15:40:51 +04:00
|
|
|
.t_bytes_acked = 0,
|
2003-10-22 06:45:57 +04:00
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Updates the TCPCB template whenever a parameter that would affect
|
|
|
|
* the template is changed.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
tcp_tcpcb_template(void)
|
|
|
|
{
|
|
|
|
struct tcpcb *tp = &tcpcb_template;
|
2003-10-22 09:55:54 +04:00
|
|
|
int flags;
|
1999-07-01 12:12:45 +04:00
|
|
|
|
1997-11-08 05:35:22 +03:00
|
|
|
tp->t_peermss = tcp_mssdflt;
|
1997-09-23 01:49:55 +04:00
|
|
|
tp->t_ourmss = tcp_mssdflt;
|
1997-11-08 05:35:22 +03:00
|
|
|
tp->t_segsz = tcp_mssdflt;
|
2003-06-29 22:58:26 +04:00
|
|
|
|
2003-10-22 06:45:57 +04:00
|
|
|
flags = 0;
|
1998-04-30 00:43:29 +04:00
|
|
|
if (tcp_do_rfc1323 && tcp_do_win_scale)
|
2003-10-22 06:45:57 +04:00
|
|
|
flags |= TF_REQ_SCALE;
|
1998-04-30 00:43:29 +04:00
|
|
|
if (tcp_do_rfc1323 && tcp_do_timestamps)
|
2003-10-22 06:45:57 +04:00
|
|
|
flags |= TF_REQ_TSTMP;
|
|
|
|
tp->t_flags = flags;
|
|
|
|
|
1993-03-21 12:45:37 +03:00
|
|
|
/*
|
|
|
|
* Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
|
|
|
|
* rtt estimate. Set rttvar so that srtt + 2 * rttvar gives
|
|
|
|
* reasonable initial retransmit time.
|
|
|
|
*/
|
1995-06-12 00:39:22 +04:00
|
|
|
tp->t_rttvar = tcp_rttdflt * PR_SLOWHZ << (TCP_RTTVAR_SHIFT + 2 - 1);
|
|
|
|
TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
|
1993-03-21 12:45:37 +03:00
|
|
|
TCPTV_MIN, TCPTV_REXMTMAX);
|
2007-06-20 19:29:17 +04:00
|
|
|
|
|
|
|
/* Keep Alive */
|
|
|
|
tp->t_keepinit = tcp_keepinit;
|
|
|
|
tp->t_keepidle = tcp_keepidle;
|
|
|
|
tp->t_keepintvl = tcp_keepintvl;
|
|
|
|
tp->t_keepcnt = tcp_keepcnt;
|
|
|
|
tp->t_maxidle = tp->t_keepcnt * tp->t_keepintvl;
|
2003-10-22 06:45:57 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Create a new TCP control block, making an
|
|
|
|
* empty reassembly queue and hooking it to the argument
|
|
|
|
* protocol control block.
|
|
|
|
*/
|
2005-02-04 02:50:33 +03:00
|
|
|
/* family selects inpcb, or in6pcb */
|
2003-10-22 06:45:57 +04:00
|
|
|
struct tcpcb *
|
2005-02-04 02:50:33 +03:00
|
|
|
tcp_newtcpcb(int family, void *aux)
|
2003-10-22 06:45:57 +04:00
|
|
|
{
|
2007-12-20 23:24:49 +03:00
|
|
|
#ifdef INET6
|
2007-12-20 22:53:29 +03:00
|
|
|
struct rtentry *rt;
|
2007-12-20 23:24:49 +03:00
|
|
|
#endif
|
2003-10-22 06:45:57 +04:00
|
|
|
struct tcpcb *tp;
|
2003-10-22 09:55:54 +04:00
|
|
|
int i;
|
2003-10-22 06:45:57 +04:00
|
|
|
|
|
|
|
/* XXX Consider using a pool_cache for speed. */
|
2006-10-05 21:35:19 +04:00
|
|
|
tp = pool_get(&tcpcb_pool, PR_NOWAIT); /* splsoftnet via tcp_usrreq */
|
2003-10-22 06:45:57 +04:00
|
|
|
if (tp == NULL)
|
|
|
|
return (NULL);
|
|
|
|
memcpy(tp, &tcpcb_template, sizeof(*tp));
|
|
|
|
TAILQ_INIT(&tp->segq);
|
|
|
|
TAILQ_INIT(&tp->timeq);
|
|
|
|
tp->t_family = family; /* may be overridden later on */
|
Commit TCP SACK patches from Kentaro A. Karahone's patch at:
http://www.sigusr1.org/~kurahone/tcp-sack-netbsd-02152005.diff.gz
Fixes in that patch for pre-existing TCP pcb initializations were already
committed to NetBSD-current, so are not included in this commit.
The SACK patch has been observed to correctly negotiate and respond,
to SACKs in wide-area traffic.
There are two indepenently-observed, as-yet-unresolved anomalies:
First, seeing unexplained delays between in fast retransmission
(potentially explainable by an 0.2sec RTT between adjacent
ethernet/wifi NICs); and second, peculiar and unepxlained TCP
retransmits observed over an ath0 card.
After discussion with several interested developers, I'm committing
this now, as-is, for more eyes to use and look over. Current hypothesis
is that the anomalies above may in fact be due to link/level (hardware,
driver, HAL, firmware) abberations in the test setup, affecting both
Kentaro's wired-Ethernet NIC and in my two (different) WiFi NICs.
2005-02-28 19:20:59 +03:00
|
|
|
TAILQ_INIT(&tp->snd_holes);
|
2003-10-22 06:45:57 +04:00
|
|
|
LIST_INIT(&tp->t_sc); /* XXX can template this */
|
|
|
|
|
2003-10-27 19:52:01 +03:00
|
|
|
/* Don't sweat this loop; hopefully the compiler will unroll it. */
|
2007-07-10 00:51:58 +04:00
|
|
|
for (i = 0; i < TCPT_NTIMERS; i++) {
|
2008-04-24 15:38:36 +04:00
|
|
|
callout_init(&tp->t_timer[i], CALLOUT_MPSAFE);
|
2003-10-22 09:55:54 +04:00
|
|
|
TCP_TIMER_INIT(tp, i);
|
2007-07-10 00:51:58 +04:00
|
|
|
}
|
2008-04-24 15:38:36 +04:00
|
|
|
callout_init(&tp->t_delack_ch, CALLOUT_MPSAFE);
|
2003-10-22 09:55:54 +04:00
|
|
|
|
2003-10-22 06:45:57 +04:00
|
|
|
switch (family) {
|
|
|
|
case AF_INET:
|
|
|
|
{
|
1999-07-01 12:12:45 +04:00
|
|
|
struct inpcb *inp = (struct inpcb *)aux;
|
2003-10-22 06:45:57 +04:00
|
|
|
|
1999-07-01 12:12:45 +04:00
|
|
|
inp->inp_ip.ip_ttl = ip_defttl;
|
2007-03-04 08:59:00 +03:00
|
|
|
inp->inp_ppcb = (void *)tp;
|
2003-10-22 06:45:57 +04:00
|
|
|
|
|
|
|
tp->t_inpcb = inp;
|
|
|
|
tp->t_mtudisc = ip_mtudisc;
|
|
|
|
break;
|
|
|
|
}
|
1999-07-01 12:12:45 +04:00
|
|
|
#ifdef INET6
|
2003-10-22 06:45:57 +04:00
|
|
|
case AF_INET6:
|
|
|
|
{
|
1999-07-01 12:12:45 +04:00
|
|
|
struct in6pcb *in6p = (struct in6pcb *)aux;
|
2003-10-22 06:45:57 +04:00
|
|
|
|
1999-12-13 18:17:17 +03:00
|
|
|
in6p->in6p_ip6.ip6_hlim = in6_selecthlim(in6p,
|
2008-01-14 07:19:09 +03:00
|
|
|
(rt = rtcache_validate(&in6p->in6p_route)) != NULL
|
2007-12-20 22:53:29 +03:00
|
|
|
? rt->rt_ifp
|
|
|
|
: NULL);
|
2007-03-04 08:59:00 +03:00
|
|
|
in6p->in6p_ppcb = (void *)tp;
|
2003-10-22 06:45:57 +04:00
|
|
|
|
|
|
|
tp->t_in6pcb = in6p;
|
|
|
|
/* for IPv6, always try to run path MTU discovery */
|
|
|
|
tp->t_mtudisc = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
#endif /* INET6 */
|
|
|
|
default:
|
2007-07-10 00:51:58 +04:00
|
|
|
for (i = 0; i < TCPT_NTIMERS; i++)
|
|
|
|
callout_destroy(&tp->t_timer[i]);
|
|
|
|
callout_destroy(&tp->t_delack_ch);
|
2006-10-05 21:35:19 +04:00
|
|
|
pool_put(&tcpcb_pool, tp); /* splsoftnet via tcp_usrreq */
|
2003-10-22 06:45:57 +04:00
|
|
|
return (NULL);
|
1999-07-01 12:12:45 +04:00
|
|
|
}
|
Two changes, designed to make us even more resilient against TCP
ISS attacks (which we already fend off quite well).
1. First-cut implementation of RFC1948, Steve Bellovin's cryptographic
hash method of generating TCP ISS values. Note, this code is experimental
and disabled by default (experimental enough that I don't export the
variable via sysctl yet, either). There are a couple of issues I'd
like to discuss with Steve, so this code should only be used by people
who really know what they're doing.
2. Per a recent thread on Bugtraq, it's possible to determine a system's
uptime by snooping the RFC1323 TCP timestamp options sent by a host; in
4.4BSD, timestamps are created by incrementing the tcp_now variable
at 2 Hz; there's even a company out there that uses this to determine
web server uptime. According to Newsham's paper "The Problem With
Random Increments", while NetBSD's TCP ISS generation method is much
better than the "random increment" method used by FreeBSD and OpenBSD,
it is still theoretically possible to mount an attack against NetBSD's
method if the attacker knows how many times the tcp_iss_seq variable
has been incremented. By not leaking uptime information, we can make
that much harder to determine. So, we avoid the leak by giving each
TCP connection a timebase of 0.
2001-03-20 23:07:51 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize our timebase. When we send timestamps, we take
|
|
|
|
* the delta from tcp_now -- this means each connection always
|
2008-02-05 12:38:47 +03:00
|
|
|
* gets a timebase of 1, which makes it, among other things,
|
Two changes, designed to make us even more resilient against TCP
ISS attacks (which we already fend off quite well).
1. First-cut implementation of RFC1948, Steve Bellovin's cryptographic
hash method of generating TCP ISS values. Note, this code is experimental
and disabled by default (experimental enough that I don't export the
variable via sysctl yet, either). There are a couple of issues I'd
like to discuss with Steve, so this code should only be used by people
who really know what they're doing.
2. Per a recent thread on Bugtraq, it's possible to determine a system's
uptime by snooping the RFC1323 TCP timestamp options sent by a host; in
4.4BSD, timestamps are created by incrementing the tcp_now variable
at 2 Hz; there's even a company out there that uses this to determine
web server uptime. According to Newsham's paper "The Problem With
Random Increments", while NetBSD's TCP ISS generation method is much
better than the "random increment" method used by FreeBSD and OpenBSD,
it is still theoretically possible to mount an attack against NetBSD's
method if the attacker knows how many times the tcp_iss_seq variable
has been incremented. By not leaking uptime information, we can make
that much harder to determine. So, we avoid the leak by giving each
TCP connection a timebase of 0.
2001-03-20 23:07:51 +03:00
|
|
|
* more difficult to determine how long a system has been up,
|
|
|
|
* and thus how many TCP sequence increments have occurred.
|
2008-02-05 12:38:47 +03:00
|
|
|
*
|
|
|
|
* We start with 1, because 0 doesn't work with linux, which
|
|
|
|
* considers timestamp 0 in a SYN packet as a bug and disables
|
|
|
|
* timestamps.
|
Two changes, designed to make us even more resilient against TCP
ISS attacks (which we already fend off quite well).
1. First-cut implementation of RFC1948, Steve Bellovin's cryptographic
hash method of generating TCP ISS values. Note, this code is experimental
and disabled by default (experimental enough that I don't export the
variable via sysctl yet, either). There are a couple of issues I'd
like to discuss with Steve, so this code should only be used by people
who really know what they're doing.
2. Per a recent thread on Bugtraq, it's possible to determine a system's
uptime by snooping the RFC1323 TCP timestamp options sent by a host; in
4.4BSD, timestamps are created by incrementing the tcp_now variable
at 2 Hz; there's even a company out there that uses this to determine
web server uptime. According to Newsham's paper "The Problem With
Random Increments", while NetBSD's TCP ISS generation method is much
better than the "random increment" method used by FreeBSD and OpenBSD,
it is still theoretically possible to mount an attack against NetBSD's
method if the attacker knows how many times the tcp_iss_seq variable
has been incremented. By not leaking uptime information, we can make
that much harder to determine. So, we avoid the leak by giving each
TCP connection a timebase of 0.
2001-03-20 23:07:51 +03:00
|
|
|
*/
|
2008-02-05 12:38:47 +03:00
|
|
|
tp->ts_timebase = tcp_now - 1;
|
2006-10-09 20:27:07 +04:00
|
|
|
|
2008-02-29 10:39:17 +03:00
|
|
|
tcp_congctl_select(tp, tcp_congctl_global_name);
|
2007-06-20 19:29:17 +04:00
|
|
|
|
1993-03-21 12:45:37 +03:00
|
|
|
return (tp);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Drop a TCP connection, reporting
|
|
|
|
* the specified error. If connection is synchronized,
|
|
|
|
* then send a RST to peer.
|
|
|
|
*/
|
|
|
|
struct tcpcb *
|
2005-02-04 02:50:33 +03:00
|
|
|
tcp_drop(struct tcpcb *tp, int errno)
|
1993-03-21 12:45:37 +03:00
|
|
|
{
|
2000-10-29 09:33:59 +03:00
|
|
|
struct socket *so = NULL;
|
1999-07-01 12:12:45 +04:00
|
|
|
|
2000-10-17 07:06:42 +04:00
|
|
|
#ifdef DIAGNOSTIC
|
|
|
|
if (tp->t_inpcb && tp->t_in6pcb)
|
|
|
|
panic("tcp_drop: both t_inpcb and t_in6pcb are set");
|
|
|
|
#endif
|
|
|
|
#ifdef INET
|
1999-07-01 12:12:45 +04:00
|
|
|
if (tp->t_inpcb)
|
|
|
|
so = tp->t_inpcb->inp_socket;
|
2000-10-17 07:06:42 +04:00
|
|
|
#endif
|
1999-07-01 12:12:45 +04:00
|
|
|
#ifdef INET6
|
2000-10-17 07:06:42 +04:00
|
|
|
if (tp->t_in6pcb)
|
1999-07-01 12:12:45 +04:00
|
|
|
so = tp->t_in6pcb->in6p_socket;
|
|
|
|
#endif
|
2000-10-29 09:33:59 +03:00
|
|
|
if (!so)
|
|
|
|
return NULL;
|
1993-03-21 12:45:37 +03:00
|
|
|
|
|
|
|
if (TCPS_HAVERCVDSYN(tp->t_state)) {
|
|
|
|
tp->t_state = TCPS_CLOSED;
|
|
|
|
(void) tcp_output(tp);
|
2008-04-12 09:58:22 +04:00
|
|
|
TCP_STATINC(TCP_STAT_DROPS);
|
1993-03-21 12:45:37 +03:00
|
|
|
} else
|
2008-04-12 09:58:22 +04:00
|
|
|
TCP_STATINC(TCP_STAT_CONNDROPS);
|
1993-03-21 12:45:37 +03:00
|
|
|
if (errno == ETIMEDOUT && tp->t_softerror)
|
|
|
|
errno = tp->t_softerror;
|
|
|
|
so->so_error = errno;
|
|
|
|
return (tcp_close(tp));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Close a TCP control block:
|
|
|
|
* discard all space held by the tcp
|
|
|
|
* discard internet protocol block
|
|
|
|
* wake up any sleepers
|
|
|
|
*/
|
|
|
|
struct tcpcb *
|
2005-02-04 02:50:33 +03:00
|
|
|
tcp_close(struct tcpcb *tp)
|
1993-03-21 12:45:37 +03:00
|
|
|
{
|
1999-07-01 12:12:45 +04:00
|
|
|
struct inpcb *inp;
|
|
|
|
#ifdef INET6
|
|
|
|
struct in6pcb *in6p;
|
|
|
|
#endif
|
|
|
|
struct socket *so;
|
1993-03-21 12:45:37 +03:00
|
|
|
#ifdef RTV_RTT
|
2000-03-30 16:51:13 +04:00
|
|
|
struct rtentry *rt;
|
1999-07-01 12:12:45 +04:00
|
|
|
#endif
|
|
|
|
struct route *ro;
|
2007-07-10 00:51:58 +04:00
|
|
|
int j;
|
1993-03-21 12:45:37 +03:00
|
|
|
|
1999-07-01 12:12:45 +04:00
|
|
|
inp = tp->t_inpcb;
|
|
|
|
#ifdef INET6
|
|
|
|
in6p = tp->t_in6pcb;
|
|
|
|
#endif
|
|
|
|
so = NULL;
|
|
|
|
ro = NULL;
|
|
|
|
if (inp) {
|
|
|
|
so = inp->inp_socket;
|
|
|
|
ro = &inp->inp_route;
|
|
|
|
}
|
|
|
|
#ifdef INET6
|
|
|
|
else if (in6p) {
|
|
|
|
so = in6p->in6p_socket;
|
|
|
|
ro = (struct route *)&in6p->in6p_route;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef RTV_RTT
|
1993-03-21 12:45:37 +03:00
|
|
|
/*
|
|
|
|
* If we sent enough data to get some meaningful characteristics,
|
2002-06-09 20:33:36 +04:00
|
|
|
* save them in the routing entry. 'Enough' is arbitrarily
|
1993-03-21 12:45:37 +03:00
|
|
|
* defined as the sendpipesize (default 4K) * 16. This would
|
|
|
|
* give us 16 rtt samples assuming we only get one sample per
|
|
|
|
* window (the usual case on a long haul net). 16 samples is
|
|
|
|
* enough for the srtt filter to converge to within 5% of the correct
|
|
|
|
* value; fewer samples and we could save a very bogus rtt.
|
|
|
|
*
|
|
|
|
* Don't update the default route's characteristics and don't
|
|
|
|
* update anything that the user "locked".
|
|
|
|
*/
|
|
|
|
if (SEQ_LT(tp->iss + so->so_snd.sb_hiwat * 16, tp->snd_max) &&
|
2008-01-14 07:19:09 +03:00
|
|
|
ro && (rt = rtcache_validate(ro)) != NULL &&
|
Take steps to hide the radix_node implementation of the forwarding table
from the forwarding table's users:
Introduce rt_walktree() for walking the routing table and
applying a function to each rtentry. Replace most
rn_walktree() calls with it.
Use rt_getkey()/rt_setkey() to get/set a route's destination.
Keep a pointer to the sockaddr key in the rtentry, so that
rtentry users do not have to grovel in the radix_node for
the key.
Add a RTM_GET method to rtrequest. Use that instead of
radix_node lookups in, e.g., carp(4).
Add sys/net/link_proto.c, which supplies sockaddr routines for
link-layer socket addresses (sockaddr_dl).
Cosmetic:
Constify. KNF. Stop open-coding LIST_FOREACH, TAILQ_FOREACH,
et cetera. Use NULL instead of 0 for null pointers. Use
__arraycount(). Reduce gratuitous parenthesization.
Stop using variadic arguments for rip6_output(), it is
unnecessary.
Remove the unnecessary rtentry member rt_genmask and the
code to maintain it, since nothing actually used it.
Make rt_maskedcopy() easier to read by using meaningful variable
names.
Extract a subroutine intern_netmask() for looking up a netmask in
the masks table.
Start converting backslash-ridden IPv6 macros in
sys/netinet6/in6_var.h into inline subroutines that one
can read without special eyeglasses.
One functional change: when the kernel serves an RTM_GET, RTM_LOCK,
or RTM_CHANGE request, it applies the netmask (if supplied) to a
destination before searching for it in the forwarding table.
I have changed sys/netinet/ip_carp.c, carp_setroute(), to remove
the unlawful radix_node knowledge.
Apart from the changes to carp(4), netiso, ATM, and strip(4), I
have run the changes on three nodes in my wireless routing testbed,
which involves IPv4 + IPv6 dynamic routing acrobatics, and it's
working beautifully so far.
2007-07-20 00:48:52 +04:00
|
|
|
!in_nullhost(satocsin(rt_getkey(rt))->sin_addr)) {
|
2000-03-30 16:51:13 +04:00
|
|
|
u_long i = 0;
|
1993-03-21 12:45:37 +03:00
|
|
|
|
|
|
|
if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) {
|
|
|
|
i = tp->t_srtt *
|
1996-12-10 21:20:19 +03:00
|
|
|
((RTM_RTTUNIT / PR_SLOWHZ) >> (TCP_RTT_SHIFT + 2));
|
1993-03-21 12:45:37 +03:00
|
|
|
if (rt->rt_rmx.rmx_rtt && i)
|
|
|
|
/*
|
|
|
|
* filter this update to half the old & half
|
|
|
|
* the new values, converting scale.
|
|
|
|
* See route.h and tcp_var.h for a
|
|
|
|
* description of the scaling constants.
|
|
|
|
*/
|
|
|
|
rt->rt_rmx.rmx_rtt =
|
|
|
|
(rt->rt_rmx.rmx_rtt + i) / 2;
|
|
|
|
else
|
|
|
|
rt->rt_rmx.rmx_rtt = i;
|
|
|
|
}
|
|
|
|
if ((rt->rt_rmx.rmx_locks & RTV_RTTVAR) == 0) {
|
|
|
|
i = tp->t_rttvar *
|
1996-12-10 21:20:19 +03:00
|
|
|
((RTM_RTTUNIT / PR_SLOWHZ) >> (TCP_RTTVAR_SHIFT + 2));
|
1993-03-21 12:45:37 +03:00
|
|
|
if (rt->rt_rmx.rmx_rttvar && i)
|
|
|
|
rt->rt_rmx.rmx_rttvar =
|
|
|
|
(rt->rt_rmx.rmx_rttvar + i) / 2;
|
|
|
|
else
|
|
|
|
rt->rt_rmx.rmx_rttvar = i;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* update the pipelimit (ssthresh) if it has been updated
|
|
|
|
* already or if a pipesize was specified & the threshhold
|
|
|
|
* got below half the pipesize. I.e., wait for bad news
|
|
|
|
* before we start updating, then update on both good
|
|
|
|
* and bad news.
|
|
|
|
*/
|
1996-02-14 02:40:59 +03:00
|
|
|
if (((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 &&
|
|
|
|
(i = tp->snd_ssthresh) && rt->rt_rmx.rmx_ssthresh) ||
|
1993-03-21 12:45:37 +03:00
|
|
|
i < (rt->rt_rmx.rmx_sendpipe / 2)) {
|
|
|
|
/*
|
|
|
|
* convert the limit from user data bytes to
|
|
|
|
* packets then to packet data bytes.
|
|
|
|
*/
|
1997-11-08 05:35:22 +03:00
|
|
|
i = (i + tp->t_segsz / 2) / tp->t_segsz;
|
1993-03-21 12:45:37 +03:00
|
|
|
if (i < 2)
|
|
|
|
i = 2;
|
1997-11-08 05:35:22 +03:00
|
|
|
i *= (u_long)(tp->t_segsz + sizeof (struct tcpiphdr));
|
1993-03-21 12:45:37 +03:00
|
|
|
if (rt->rt_rmx.rmx_ssthresh)
|
|
|
|
rt->rt_rmx.rmx_ssthresh =
|
|
|
|
(rt->rt_rmx.rmx_ssthresh + i) / 2;
|
|
|
|
else
|
|
|
|
rt->rt_rmx.rmx_ssthresh = i;
|
|
|
|
}
|
|
|
|
}
|
1994-01-11 02:27:39 +03:00
|
|
|
#endif /* RTV_RTT */
|
1993-03-21 12:45:37 +03:00
|
|
|
/* free the reassembly queue, if any */
|
1998-12-19 00:38:02 +03:00
|
|
|
TCP_REASS_LOCK(tp);
|
1997-12-10 04:58:07 +03:00
|
|
|
(void) tcp_freeq(tp);
|
1998-12-19 00:38:02 +03:00
|
|
|
TCP_REASS_UNLOCK(tp);
|
|
|
|
|
Commit TCP SACK patches from Kentaro A. Karahone's patch at:
http://www.sigusr1.org/~kurahone/tcp-sack-netbsd-02152005.diff.gz
Fixes in that patch for pre-existing TCP pcb initializations were already
committed to NetBSD-current, so are not included in this commit.
The SACK patch has been observed to correctly negotiate and respond,
to SACKs in wide-area traffic.
There are two indepenently-observed, as-yet-unresolved anomalies:
First, seeing unexplained delays between in fast retransmission
(potentially explainable by an 0.2sec RTT between adjacent
ethernet/wifi NICs); and second, peculiar and unepxlained TCP
retransmits observed over an ath0 card.
After discussion with several interested developers, I'm committing
this now, as-is, for more eyes to use and look over. Current hypothesis
is that the anomalies above may in fact be due to link/level (hardware,
driver, HAL, firmware) abberations in the test setup, affecting both
Kentaro's wired-Ethernet NIC and in my two (different) WiFi NICs.
2005-02-28 19:20:59 +03:00
|
|
|
/* free the SACK holes list. */
|
2008-10-10 14:23:34 +04:00
|
|
|
tcp_free_sackholes(tp);
|
2008-02-29 10:39:17 +03:00
|
|
|
tcp_congctl_release(tp);
|
1999-08-25 19:23:12 +04:00
|
|
|
syn_cache_cleanup(tp);
|
1997-12-10 04:58:07 +03:00
|
|
|
|
1999-07-01 12:12:45 +04:00
|
|
|
if (tp->t_template) {
|
|
|
|
m_free(tp->t_template);
|
|
|
|
tp->t_template = NULL;
|
|
|
|
}
|
2003-07-20 20:35:07 +04:00
|
|
|
|
2008-10-10 14:23:34 +04:00
|
|
|
/*
|
|
|
|
* Detaching the pcb will unlock the socket/tcpcb, and stopping
|
|
|
|
* the timers can also drop the lock. We need to prevent access
|
|
|
|
* to the tcpcb as it's half torn down. Flag the pcb as dead
|
|
|
|
* (prevents access by timers) and only then detach it.
|
|
|
|
*/
|
|
|
|
tp->t_flags |= TF_DEAD;
|
1999-07-01 12:12:45 +04:00
|
|
|
if (inp) {
|
|
|
|
inp->inp_ppcb = 0;
|
|
|
|
soisdisconnected(so);
|
|
|
|
in_pcbdetach(inp);
|
|
|
|
}
|
|
|
|
#ifdef INET6
|
|
|
|
else if (in6p) {
|
|
|
|
in6p->in6p_ppcb = 0;
|
|
|
|
soisdisconnected(so);
|
|
|
|
in6_pcbdetach(in6p);
|
|
|
|
}
|
|
|
|
#endif
|
2008-10-10 14:23:34 +04:00
|
|
|
/*
|
|
|
|
* pcb is no longer visble elsewhere, so we can safely release
|
|
|
|
* the lock in callout_halt() if needed.
|
|
|
|
*/
|
2008-04-12 09:58:22 +04:00
|
|
|
TCP_STATINC(TCP_STAT_CLOSED);
|
2008-10-10 14:23:34 +04:00
|
|
|
for (j = 0; j < TCPT_NTIMERS; j++) {
|
|
|
|
callout_halt(&tp->t_timer[j], softnet_lock);
|
|
|
|
callout_destroy(&tp->t_timer[j]);
|
|
|
|
}
|
|
|
|
callout_halt(&tp->t_delack_ch, softnet_lock);
|
|
|
|
callout_destroy(&tp->t_delack_ch);
|
|
|
|
pool_put(&tcpcb_pool, tp);
|
|
|
|
|
1993-03-21 12:45:37 +03:00
|
|
|
return ((struct tcpcb *)0);
|
|
|
|
}
|
|
|
|
|
1997-12-10 04:58:07 +03:00
|
|
|
int
|
2008-02-27 22:41:51 +03:00
|
|
|
tcp_freeq(struct tcpcb *tp)
|
1997-12-10 04:58:07 +03:00
|
|
|
{
|
2000-03-30 16:51:13 +04:00
|
|
|
struct ipqent *qe;
|
1997-12-10 04:58:07 +03:00
|
|
|
int rv = 0;
|
1998-04-30 00:43:29 +04:00
|
|
|
#ifdef TCPREASS_DEBUG
|
|
|
|
int i = 0;
|
|
|
|
#endif
|
1997-12-10 04:58:07 +03:00
|
|
|
|
1998-12-19 00:38:02 +03:00
|
|
|
TCP_REASS_LOCK_CHECK(tp);
|
|
|
|
|
2002-05-07 06:59:38 +04:00
|
|
|
while ((qe = TAILQ_FIRST(&tp->segq)) != NULL) {
|
1998-04-30 00:43:29 +04:00
|
|
|
#ifdef TCPREASS_DEBUG
|
|
|
|
printf("tcp_freeq[%p,%d]: %u:%u(%u) 0x%02x\n",
|
|
|
|
tp, i++, qe->ipqe_seq, qe->ipqe_seq + qe->ipqe_len,
|
|
|
|
qe->ipqe_len, qe->ipqe_flags & (TH_SYN|TH_FIN|TH_RST));
|
|
|
|
#endif
|
2002-05-07 06:59:38 +04:00
|
|
|
TAILQ_REMOVE(&tp->segq, qe, ipqe_q);
|
|
|
|
TAILQ_REMOVE(&tp->timeq, qe, ipqe_timeq);
|
1997-12-10 04:58:07 +03:00
|
|
|
m_freem(qe->ipqe_m);
|
2005-03-30 00:10:16 +04:00
|
|
|
tcpipqent_free(qe);
|
1997-12-10 04:58:07 +03:00
|
|
|
rv = 1;
|
|
|
|
}
|
2005-03-16 03:39:56 +03:00
|
|
|
tp->t_segqlen = 0;
|
|
|
|
KASSERT(TAILQ_EMPTY(&tp->timeq));
|
1997-12-10 04:58:07 +03:00
|
|
|
return (rv);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Protocol drain routine. Called when memory is in short supply.
|
2008-05-02 17:40:32 +04:00
|
|
|
* Don't acquire softnet_lock as can be called from hardware
|
|
|
|
* interrupt handler.
|
1997-12-10 04:58:07 +03:00
|
|
|
*/
|
1994-01-09 02:07:16 +03:00
|
|
|
void
|
2005-02-04 02:50:33 +03:00
|
|
|
tcp_drain(void)
|
1993-03-21 12:45:37 +03:00
|
|
|
{
|
2003-09-04 13:16:57 +04:00
|
|
|
struct inpcb_hdr *inph;
|
2000-03-30 16:51:13 +04:00
|
|
|
struct tcpcb *tp;
|
1993-03-21 12:45:37 +03:00
|
|
|
|
2008-04-24 15:38:36 +04:00
|
|
|
KERNEL_LOCK(1, NULL);
|
|
|
|
|
1997-12-10 04:58:07 +03:00
|
|
|
/*
|
|
|
|
* Free the sequence queue of all TCP connections.
|
|
|
|
*/
|
2003-09-04 13:16:57 +04:00
|
|
|
CIRCLEQ_FOREACH(inph, &tcbtable.inpt_queue, inph_queue) {
|
|
|
|
switch (inph->inph_af) {
|
|
|
|
case AF_INET:
|
|
|
|
tp = intotcpcb((struct inpcb *)inph);
|
|
|
|
break;
|
2002-03-15 12:25:41 +03:00
|
|
|
#ifdef INET6
|
2003-09-04 13:16:57 +04:00
|
|
|
case AF_INET6:
|
|
|
|
tp = in6totcpcb((struct in6pcb *)inph);
|
|
|
|
break;
|
|
|
|
#endif
|
|
|
|
default:
|
|
|
|
tp = NULL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (tp != NULL) {
|
2002-03-15 12:25:41 +03:00
|
|
|
/*
|
|
|
|
* We may be called from a device's interrupt
|
|
|
|
* context. If the tcpcb is already busy,
|
|
|
|
* just bail out now.
|
|
|
|
*/
|
|
|
|
if (tcp_reass_lock_try(tp) == 0)
|
|
|
|
continue;
|
|
|
|
if (tcp_freeq(tp))
|
2008-04-12 09:58:22 +04:00
|
|
|
TCP_STATINC(TCP_STAT_CONNSDRAINED);
|
2002-03-15 12:25:41 +03:00
|
|
|
TCP_REASS_UNLOCK(tp);
|
|
|
|
}
|
|
|
|
}
|
2008-04-24 15:38:36 +04:00
|
|
|
|
|
|
|
KERNEL_UNLOCK_ONE(NULL);
|
2002-03-15 12:25:41 +03:00
|
|
|
}
|
|
|
|
|
1993-03-21 12:45:37 +03:00
|
|
|
/*
|
|
|
|
* Notify a tcp user of an asynchronous error;
|
|
|
|
* store error as soft error, but wake up user
|
|
|
|
* (for now, won't do anything until can select for soft error).
|
|
|
|
*/
|
1994-01-09 02:07:16 +03:00
|
|
|
void
|
2005-02-04 02:50:33 +03:00
|
|
|
tcp_notify(struct inpcb *inp, int error)
|
1993-03-21 12:45:37 +03:00
|
|
|
{
|
2000-03-30 16:51:13 +04:00
|
|
|
struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb;
|
|
|
|
struct socket *so = inp->inp_socket;
|
1993-03-21 12:45:37 +03:00
|
|
|
|
1994-05-13 10:02:48 +04:00
|
|
|
/*
|
|
|
|
* Ignore some errors if we are hooked up.
|
|
|
|
* If connection hasn't completed, has retransmitted several times,
|
|
|
|
* and receives a second error, give up now. This is better
|
|
|
|
* than waiting a long time to establish a connection that
|
|
|
|
* can never complete.
|
|
|
|
*/
|
|
|
|
if (tp->t_state == TCPS_ESTABLISHED &&
|
|
|
|
(error == EHOSTUNREACH || error == ENETUNREACH ||
|
|
|
|
error == EHOSTDOWN)) {
|
|
|
|
return;
|
1994-10-14 19:01:48 +03:00
|
|
|
} else if (TCPS_HAVEESTABLISHED(tp->t_state) == 0 &&
|
|
|
|
tp->t_rxtshift > 3 && tp->t_softerror)
|
1994-05-13 10:02:48 +04:00
|
|
|
so->so_error = error;
|
2002-06-09 20:33:36 +04:00
|
|
|
else
|
1994-05-13 10:02:48 +04:00
|
|
|
tp->t_softerror = error;
|
2008-04-24 15:38:36 +04:00
|
|
|
cv_broadcast(&so->so_cv);
|
1994-05-13 10:02:48 +04:00
|
|
|
sorwakeup(so);
|
|
|
|
sowwakeup(so);
|
1993-03-21 12:45:37 +03:00
|
|
|
}
|
|
|
|
|
2000-10-20 00:22:59 +04:00
|
|
|
#ifdef INET6
|
1999-07-22 16:56:56 +04:00
|
|
|
void
|
2005-02-04 02:50:33 +03:00
|
|
|
tcp6_notify(struct in6pcb *in6p, int error)
|
1999-07-22 16:56:56 +04:00
|
|
|
{
|
2000-03-30 16:51:13 +04:00
|
|
|
struct tcpcb *tp = (struct tcpcb *)in6p->in6p_ppcb;
|
|
|
|
struct socket *so = in6p->in6p_socket;
|
1999-07-22 16:56:56 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Ignore some errors if we are hooked up.
|
|
|
|
* If connection hasn't completed, has retransmitted several times,
|
|
|
|
* and receives a second error, give up now. This is better
|
|
|
|
* than waiting a long time to establish a connection that
|
|
|
|
* can never complete.
|
|
|
|
*/
|
|
|
|
if (tp->t_state == TCPS_ESTABLISHED &&
|
|
|
|
(error == EHOSTUNREACH || error == ENETUNREACH ||
|
|
|
|
error == EHOSTDOWN)) {
|
|
|
|
return;
|
|
|
|
} else if (TCPS_HAVEESTABLISHED(tp->t_state) == 0 &&
|
|
|
|
tp->t_rxtshift > 3 && tp->t_softerror)
|
|
|
|
so->so_error = error;
|
2002-06-09 20:33:36 +04:00
|
|
|
else
|
1999-07-22 16:56:56 +04:00
|
|
|
tp->t_softerror = error;
|
2008-04-24 15:38:36 +04:00
|
|
|
cv_broadcast(&so->so_cv);
|
1999-07-22 16:56:56 +04:00
|
|
|
sorwakeup(so);
|
|
|
|
sowwakeup(so);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2000-10-20 00:22:59 +04:00
|
|
|
#ifdef INET6
|
2008-04-24 15:38:36 +04:00
|
|
|
void *
|
KNF: de-__P, bzero -> memset, bcmp -> memcmp. Remove extraneous
parentheses in return statements.
Cosmetic: don't open-code TAILQ_FOREACH().
Cosmetic: change types of variables to avoid oodles of casts: in
in6_src.c, avoid casts by changing several route_in6 pointers
to struct route pointers. Remove unnecessary casts to caddr_t
elsewhere.
Pave the way for eliminating address family-specific route caches:
soon, struct route will not embed a sockaddr, but it will hold
a reference to an external sockaddr, instead. We will set the
destination sockaddr using rtcache_setdst(). (I created a stub
for it, but it isn't used anywhere, yet.) rtcache_free() will
free the sockaddr. I have extracted from rtcache_free() a helper
subroutine, rtcache_clear(). rtcache_clear() will "forget" a
cached route, but it will not forget the destination by releasing
the sockaddr. I use rtcache_clear() instead of rtcache_free()
in rtcache_update(), because rtcache_update() is not supposed
to forget the destination.
Constify:
1 Introduce const accessor for route->ro_dst, rtcache_getdst().
2 Constify the 'dst' argument to ifnet->if_output(). This
led me to constify a lot of code called by output routines.
3 Constify the sockaddr argument to protosw->pr_ctlinput. This
led me to constify a lot of code called by ctlinput routines.
4 Introduce const macros for converting from a generic sockaddr
to family-specific sockaddrs, e.g., sockaddr_in: satocsin6,
satocsin, et cetera.
2007-02-18 01:34:07 +03:00
|
|
|
tcp6_ctlinput(int cmd, const struct sockaddr *sa, void *d)
|
1999-07-01 12:12:45 +04:00
|
|
|
{
|
1999-07-22 16:56:56 +04:00
|
|
|
struct tcphdr th;
|
2005-02-03 00:41:55 +03:00
|
|
|
void (*notify)(struct in6pcb *, int) = tcp6_notify;
|
1999-07-22 16:56:56 +04:00
|
|
|
int nmatch;
|
2000-03-30 16:51:13 +04:00
|
|
|
struct ip6_hdr *ip6;
|
2001-02-11 09:49:49 +03:00
|
|
|
const struct sockaddr_in6 *sa6_src = NULL;
|
KNF: de-__P, bzero -> memset, bcmp -> memcmp. Remove extraneous
parentheses in return statements.
Cosmetic: don't open-code TAILQ_FOREACH().
Cosmetic: change types of variables to avoid oodles of casts: in
in6_src.c, avoid casts by changing several route_in6 pointers
to struct route pointers. Remove unnecessary casts to caddr_t
elsewhere.
Pave the way for eliminating address family-specific route caches:
soon, struct route will not embed a sockaddr, but it will hold
a reference to an external sockaddr, instead. We will set the
destination sockaddr using rtcache_setdst(). (I created a stub
for it, but it isn't used anywhere, yet.) rtcache_free() will
free the sockaddr. I have extracted from rtcache_free() a helper
subroutine, rtcache_clear(). rtcache_clear() will "forget" a
cached route, but it will not forget the destination by releasing
the sockaddr. I use rtcache_clear() instead of rtcache_free()
in rtcache_update(), because rtcache_update() is not supposed
to forget the destination.
Constify:
1 Introduce const accessor for route->ro_dst, rtcache_getdst().
2 Constify the 'dst' argument to ifnet->if_output(). This
led me to constify a lot of code called by output routines.
3 Constify the sockaddr argument to protosw->pr_ctlinput. This
led me to constify a lot of code called by ctlinput routines.
4 Introduce const macros for converting from a generic sockaddr
to family-specific sockaddrs, e.g., sockaddr_in: satocsin6,
satocsin, et cetera.
2007-02-18 01:34:07 +03:00
|
|
|
const struct sockaddr_in6 *sa6 = (const struct sockaddr_in6 *)sa;
|
1999-12-13 18:17:17 +03:00
|
|
|
struct mbuf *m;
|
|
|
|
int off;
|
1999-07-22 16:56:56 +04:00
|
|
|
|
1999-08-09 14:55:29 +04:00
|
|
|
if (sa->sa_family != AF_INET6 ||
|
|
|
|
sa->sa_len != sizeof(struct sockaddr_in6))
|
2008-04-24 15:38:36 +04:00
|
|
|
return NULL;
|
1999-12-13 18:17:17 +03:00
|
|
|
if ((unsigned)cmd >= PRC_NCMDS)
|
2008-04-24 15:38:36 +04:00
|
|
|
return NULL;
|
1999-12-13 18:17:17 +03:00
|
|
|
else if (cmd == PRC_QUENCH) {
|
2005-07-19 21:00:02 +04:00
|
|
|
/*
|
|
|
|
* Don't honor ICMP Source Quench messages meant for
|
|
|
|
* TCP connections.
|
|
|
|
*/
|
2008-04-24 15:38:36 +04:00
|
|
|
return NULL;
|
1999-12-13 18:17:17 +03:00
|
|
|
} else if (PRC_IS_REDIRECT(cmd))
|
|
|
|
notify = in6_rtchange, d = NULL;
|
1999-07-22 16:56:56 +04:00
|
|
|
else if (cmd == PRC_MSGSIZE)
|
2000-10-19 01:14:12 +04:00
|
|
|
; /* special code is present, see below */
|
1999-12-13 18:17:17 +03:00
|
|
|
else if (cmd == PRC_HOSTDEAD)
|
|
|
|
d = NULL;
|
|
|
|
else if (inet6ctlerrmap[cmd] == 0)
|
2008-04-24 15:38:36 +04:00
|
|
|
return NULL;
|
1999-07-31 22:41:15 +04:00
|
|
|
|
1999-12-13 18:17:17 +03:00
|
|
|
/* if the parameter is from icmp6, decode it. */
|
|
|
|
if (d != NULL) {
|
|
|
|
struct ip6ctlparam *ip6cp = (struct ip6ctlparam *)d;
|
|
|
|
m = ip6cp->ip6c_m;
|
|
|
|
ip6 = ip6cp->ip6c_ip6;
|
|
|
|
off = ip6cp->ip6c_off;
|
2001-02-11 09:49:49 +03:00
|
|
|
sa6_src = ip6cp->ip6c_src;
|
1999-12-13 18:17:17 +03:00
|
|
|
} else {
|
|
|
|
m = NULL;
|
|
|
|
ip6 = NULL;
|
2001-02-11 09:49:49 +03:00
|
|
|
sa6_src = &sa6_any;
|
2003-10-25 12:13:28 +04:00
|
|
|
off = 0;
|
1999-12-13 18:17:17 +03:00
|
|
|
}
|
|
|
|
|
1999-07-22 16:56:56 +04:00
|
|
|
if (ip6) {
|
|
|
|
/*
|
|
|
|
* XXX: We assume that when ip6 is non NULL,
|
|
|
|
* M and OFF are valid.
|
|
|
|
*/
|
|
|
|
|
2000-10-13 21:53:44 +04:00
|
|
|
/* check if we can safely examine src and dst ports */
|
2001-05-24 11:22:27 +04:00
|
|
|
if (m->m_pkthdr.len < off + sizeof(th)) {
|
|
|
|
if (cmd == PRC_MSGSIZE)
|
|
|
|
icmp6_mtudisc_update((struct ip6ctlparam *)d, 0);
|
2008-04-24 15:38:36 +04:00
|
|
|
return NULL;
|
2001-05-24 11:22:27 +04:00
|
|
|
}
|
2000-10-13 21:53:44 +04:00
|
|
|
|
2009-03-18 19:00:08 +03:00
|
|
|
memset(&th, 0, sizeof(th));
|
2007-03-04 08:59:00 +03:00
|
|
|
m_copydata(m, off, sizeof(th), (void *)&th);
|
2000-10-19 01:14:12 +04:00
|
|
|
|
|
|
|
if (cmd == PRC_MSGSIZE) {
|
2000-12-09 04:29:45 +03:00
|
|
|
int valid = 0;
|
|
|
|
|
2000-10-19 01:14:12 +04:00
|
|
|
/*
|
|
|
|
* Check to see if we have a valid TCP connection
|
|
|
|
* corresponding to the address in the ICMPv6 message
|
|
|
|
* payload.
|
|
|
|
*/
|
2003-09-04 13:16:57 +04:00
|
|
|
if (in6_pcblookup_connect(&tcbtable, &sa6->sin6_addr,
|
KNF: de-__P, bzero -> memset, bcmp -> memcmp. Remove extraneous
parentheses in return statements.
Cosmetic: don't open-code TAILQ_FOREACH().
Cosmetic: change types of variables to avoid oodles of casts: in
in6_src.c, avoid casts by changing several route_in6 pointers
to struct route pointers. Remove unnecessary casts to caddr_t
elsewhere.
Pave the way for eliminating address family-specific route caches:
soon, struct route will not embed a sockaddr, but it will hold
a reference to an external sockaddr, instead. We will set the
destination sockaddr using rtcache_setdst(). (I created a stub
for it, but it isn't used anywhere, yet.) rtcache_free() will
free the sockaddr. I have extracted from rtcache_free() a helper
subroutine, rtcache_clear(). rtcache_clear() will "forget" a
cached route, but it will not forget the destination by releasing
the sockaddr. I use rtcache_clear() instead of rtcache_free()
in rtcache_update(), because rtcache_update() is not supposed
to forget the destination.
Constify:
1 Introduce const accessor for route->ro_dst, rtcache_getdst().
2 Constify the 'dst' argument to ifnet->if_output(). This
led me to constify a lot of code called by output routines.
3 Constify the sockaddr argument to protosw->pr_ctlinput. This
led me to constify a lot of code called by ctlinput routines.
4 Introduce const macros for converting from a generic sockaddr
to family-specific sockaddrs, e.g., sockaddr_in: satocsin6,
satocsin, et cetera.
2007-02-18 01:34:07 +03:00
|
|
|
th.th_dport,
|
|
|
|
(const struct in6_addr *)&sa6_src->sin6_addr,
|
2001-02-11 09:49:49 +03:00
|
|
|
th.th_sport, 0))
|
2000-12-09 04:29:45 +03:00
|
|
|
valid++;
|
2000-10-19 01:14:12 +04:00
|
|
|
|
|
|
|
/*
|
2001-02-11 09:49:49 +03:00
|
|
|
* Depending on the value of "valid" and routing table
|
|
|
|
* size (mtudisc_{hi,lo}wat), we will:
|
|
|
|
* - recalcurate the new MTU and create the
|
|
|
|
* corresponding routing entry, or
|
|
|
|
* - ignore the MTU change notification.
|
2000-10-19 01:14:12 +04:00
|
|
|
*/
|
2000-12-09 04:29:45 +03:00
|
|
|
icmp6_mtudisc_update((struct ip6ctlparam *)d, valid);
|
2000-10-19 01:14:12 +04:00
|
|
|
|
2001-02-11 09:49:49 +03:00
|
|
|
/*
|
|
|
|
* no need to call in6_pcbnotify, it should have been
|
|
|
|
* called via callback if necessary
|
|
|
|
*/
|
2008-04-24 15:38:36 +04:00
|
|
|
return NULL;
|
2000-10-19 01:14:12 +04:00
|
|
|
}
|
|
|
|
|
2003-09-04 13:16:57 +04:00
|
|
|
nmatch = in6_pcbnotify(&tcbtable, sa, th.th_dport,
|
2005-05-30 01:41:23 +04:00
|
|
|
(const struct sockaddr *)sa6_src, th.th_sport, cmd, NULL, notify);
|
1999-07-22 16:56:56 +04:00
|
|
|
if (nmatch == 0 && syn_cache_count &&
|
|
|
|
(inet6ctlerrmap[cmd] == EHOSTUNREACH ||
|
|
|
|
inet6ctlerrmap[cmd] == ENETUNREACH ||
|
2001-02-11 09:49:49 +03:00
|
|
|
inet6ctlerrmap[cmd] == EHOSTDOWN))
|
2005-05-30 01:41:23 +04:00
|
|
|
syn_cache_unreach((const struct sockaddr *)sa6_src,
|
2001-02-11 09:49:49 +03:00
|
|
|
sa, &th);
|
1999-07-22 16:56:56 +04:00
|
|
|
} else {
|
2003-09-04 13:16:57 +04:00
|
|
|
(void) in6_pcbnotify(&tcbtable, sa, 0,
|
2005-05-30 01:41:23 +04:00
|
|
|
(const struct sockaddr *)sa6_src, 0, cmd, NULL, notify);
|
1999-07-22 16:56:56 +04:00
|
|
|
}
|
2008-04-24 15:38:36 +04:00
|
|
|
|
|
|
|
return NULL;
|
1999-07-01 12:12:45 +04:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2000-10-17 07:06:42 +04:00
|
|
|
#ifdef INET
|
1999-07-01 12:12:45 +04:00
|
|
|
/* assumes that ip header and tcp header are contiguous on mbuf */
|
1996-02-14 02:40:59 +03:00
|
|
|
void *
|
KNF: de-__P, bzero -> memset, bcmp -> memcmp. Remove extraneous
parentheses in return statements.
Cosmetic: don't open-code TAILQ_FOREACH().
Cosmetic: change types of variables to avoid oodles of casts: in
in6_src.c, avoid casts by changing several route_in6 pointers
to struct route pointers. Remove unnecessary casts to caddr_t
elsewhere.
Pave the way for eliminating address family-specific route caches:
soon, struct route will not embed a sockaddr, but it will hold
a reference to an external sockaddr, instead. We will set the
destination sockaddr using rtcache_setdst(). (I created a stub
for it, but it isn't used anywhere, yet.) rtcache_free() will
free the sockaddr. I have extracted from rtcache_free() a helper
subroutine, rtcache_clear(). rtcache_clear() will "forget" a
cached route, but it will not forget the destination by releasing
the sockaddr. I use rtcache_clear() instead of rtcache_free()
in rtcache_update(), because rtcache_update() is not supposed
to forget the destination.
Constify:
1 Introduce const accessor for route->ro_dst, rtcache_getdst().
2 Constify the 'dst' argument to ifnet->if_output(). This
led me to constify a lot of code called by output routines.
3 Constify the sockaddr argument to protosw->pr_ctlinput. This
led me to constify a lot of code called by ctlinput routines.
4 Introduce const macros for converting from a generic sockaddr
to family-specific sockaddrs, e.g., sockaddr_in: satocsin6,
satocsin, et cetera.
2007-02-18 01:34:07 +03:00
|
|
|
tcp_ctlinput(int cmd, const struct sockaddr *sa, void *v)
|
1993-03-21 12:45:37 +03:00
|
|
|
{
|
2000-03-30 16:51:13 +04:00
|
|
|
struct ip *ip = v;
|
|
|
|
struct tcphdr *th;
|
2000-10-18 21:09:14 +04:00
|
|
|
struct icmp *icp;
|
2001-11-04 16:42:27 +03:00
|
|
|
extern const int inetctlerrmap[];
|
2005-02-03 00:41:55 +03:00
|
|
|
void (*notify)(struct inpcb *, int) = tcp_notify;
|
1995-06-12 10:48:54 +04:00
|
|
|
int errno;
|
1997-07-24 01:26:40 +04:00
|
|
|
int nmatch;
|
2005-07-19 21:00:02 +04:00
|
|
|
struct tcpcb *tp;
|
|
|
|
u_int mtu;
|
|
|
|
tcp_seq seq;
|
|
|
|
struct inpcb *inp;
|
2002-07-02 00:51:25 +04:00
|
|
|
#ifdef INET6
|
2005-07-19 21:00:02 +04:00
|
|
|
struct in6pcb *in6p;
|
2002-07-02 00:51:25 +04:00
|
|
|
struct in6_addr src6, dst6;
|
|
|
|
#endif
|
1993-03-21 12:45:37 +03:00
|
|
|
|
1999-08-09 14:55:29 +04:00
|
|
|
if (sa->sa_family != AF_INET ||
|
|
|
|
sa->sa_len != sizeof(struct sockaddr_in))
|
|
|
|
return NULL;
|
1995-06-12 10:46:34 +04:00
|
|
|
if ((unsigned)cmd >= PRC_NCMDS)
|
1996-02-14 02:40:59 +03:00
|
|
|
return NULL;
|
1995-06-12 10:46:34 +04:00
|
|
|
errno = inetctlerrmap[cmd];
|
1995-06-12 10:24:21 +04:00
|
|
|
if (cmd == PRC_QUENCH)
|
2005-07-19 21:00:02 +04:00
|
|
|
/*
|
|
|
|
* Don't honor ICMP Source Quench messages meant for
|
|
|
|
* TCP connections.
|
|
|
|
*/
|
|
|
|
return NULL;
|
1995-06-12 10:24:21 +04:00
|
|
|
else if (PRC_IS_REDIRECT(cmd))
|
|
|
|
notify = in_rtchange, ip = 0;
|
2002-05-26 20:05:43 +04:00
|
|
|
else if (cmd == PRC_MSGSIZE && ip && ip->ip_v == 4) {
|
2000-10-18 21:09:14 +04:00
|
|
|
/*
|
|
|
|
* Check to see if we have a valid TCP connection
|
|
|
|
* corresponding to the address in the ICMP message
|
|
|
|
* payload.
|
2001-05-24 11:22:27 +04:00
|
|
|
*
|
|
|
|
* Boundary check is made in icmp_input(), with ICMP_ADVLENMIN.
|
2000-10-18 21:09:14 +04:00
|
|
|
*/
|
2007-03-04 08:59:00 +03:00
|
|
|
th = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2));
|
2002-07-02 00:51:25 +04:00
|
|
|
#ifdef INET6
|
|
|
|
memset(&src6, 0, sizeof(src6));
|
|
|
|
memset(&dst6, 0, sizeof(dst6));
|
|
|
|
src6.s6_addr16[5] = dst6.s6_addr16[5] = 0xffff;
|
|
|
|
memcpy(&src6.s6_addr32[3], &ip->ip_src, sizeof(struct in_addr));
|
|
|
|
memcpy(&dst6.s6_addr32[3], &ip->ip_dst, sizeof(struct in_addr));
|
|
|
|
#endif
|
2005-07-19 21:00:02 +04:00
|
|
|
if ((inp = in_pcblookup_connect(&tcbtable, ip->ip_dst,
|
|
|
|
th->th_dport, ip->ip_src, th->th_sport)) != NULL)
|
2005-07-20 12:05:43 +04:00
|
|
|
#ifdef INET6
|
2005-07-19 21:00:02 +04:00
|
|
|
in6p = NULL;
|
2005-07-20 12:05:43 +04:00
|
|
|
#else
|
|
|
|
;
|
|
|
|
#endif
|
2002-07-02 00:51:25 +04:00
|
|
|
#ifdef INET6
|
2005-07-19 21:00:02 +04:00
|
|
|
else if ((in6p = in6_pcblookup_connect(&tcbtable, &dst6,
|
|
|
|
th->th_dport, &src6, th->th_sport, 0)) != NULL)
|
2002-07-02 00:51:25 +04:00
|
|
|
;
|
|
|
|
#endif
|
|
|
|
else
|
2000-10-18 21:09:14 +04:00
|
|
|
return NULL;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Now that we've validated that we are actually communicating
|
|
|
|
* with the host indicated in the ICMP message, locate the
|
|
|
|
* ICMP header, recalculate the new MTU, and create the
|
|
|
|
* corresponding routing entry.
|
|
|
|
*/
|
2007-03-04 08:59:00 +03:00
|
|
|
icp = (struct icmp *)((char *)ip -
|
2000-10-18 21:09:14 +04:00
|
|
|
offsetof(struct icmp, icmp_ip));
|
2005-07-19 21:00:02 +04:00
|
|
|
if (inp) {
|
|
|
|
if ((tp = intotcpcb(inp)) == NULL)
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
#ifdef INET6
|
|
|
|
else if (in6p) {
|
|
|
|
if ((tp = in6totcpcb(in6p)) == NULL)
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
else
|
|
|
|
return NULL;
|
|
|
|
seq = ntohl(th->th_seq);
|
|
|
|
if (SEQ_LT(seq, tp->snd_una) || SEQ_GT(seq, tp->snd_max))
|
|
|
|
return NULL;
|
|
|
|
/*
|
|
|
|
* If the ICMP message advertises a Next-Hop MTU
|
|
|
|
* equal or larger than the maximum packet size we have
|
|
|
|
* ever sent, drop the message.
|
|
|
|
*/
|
|
|
|
mtu = (u_int)ntohs(icp->icmp_nextmtu);
|
|
|
|
if (mtu >= tp->t_pmtud_mtu_sent)
|
|
|
|
return NULL;
|
|
|
|
if (mtu >= tcp_hdrsz(tp) + tp->t_pmtud_mss_acked) {
|
|
|
|
/*
|
|
|
|
* Calculate new MTU, and create corresponding
|
|
|
|
* route (traditional PMTUD).
|
|
|
|
*/
|
|
|
|
tp->t_flags &= ~TF_PMTUD_PEND;
|
|
|
|
icmp_mtudisc(icp, ip->ip_dst);
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* Record the information got in the ICMP
|
|
|
|
* message; act on it later.
|
|
|
|
* If we had already recorded an ICMP message,
|
|
|
|
* replace the old one only if the new message
|
|
|
|
* refers to an older TCP segment
|
|
|
|
*/
|
|
|
|
if (tp->t_flags & TF_PMTUD_PEND) {
|
|
|
|
if (SEQ_LT(tp->t_pmtud_th_seq, seq))
|
|
|
|
return NULL;
|
|
|
|
} else
|
|
|
|
tp->t_flags |= TF_PMTUD_PEND;
|
|
|
|
tp->t_pmtud_th_seq = seq;
|
|
|
|
tp->t_pmtud_nextmtu = icp->icmp_nextmtu;
|
|
|
|
tp->t_pmtud_ip_len = icp->icmp_ip.ip_len;
|
|
|
|
tp->t_pmtud_ip_hl = icp->icmp_ip.ip_hl;
|
|
|
|
}
|
2000-10-18 21:09:14 +04:00
|
|
|
return NULL;
|
|
|
|
} else if (cmd == PRC_HOSTDEAD)
|
1995-06-12 10:24:21 +04:00
|
|
|
ip = 0;
|
1995-06-12 10:46:34 +04:00
|
|
|
else if (errno == 0)
|
1996-02-14 02:40:59 +03:00
|
|
|
return NULL;
|
1999-07-01 12:12:45 +04:00
|
|
|
if (ip && ip->ip_v == 4 && sa->sa_family == AF_INET) {
|
2007-03-04 08:59:00 +03:00
|
|
|
th = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2));
|
KNF: de-__P, bzero -> memset, bcmp -> memcmp. Remove extraneous
parentheses in return statements.
Cosmetic: don't open-code TAILQ_FOREACH().
Cosmetic: change types of variables to avoid oodles of casts: in
in6_src.c, avoid casts by changing several route_in6 pointers
to struct route pointers. Remove unnecessary casts to caddr_t
elsewhere.
Pave the way for eliminating address family-specific route caches:
soon, struct route will not embed a sockaddr, but it will hold
a reference to an external sockaddr, instead. We will set the
destination sockaddr using rtcache_setdst(). (I created a stub
for it, but it isn't used anywhere, yet.) rtcache_free() will
free the sockaddr. I have extracted from rtcache_free() a helper
subroutine, rtcache_clear(). rtcache_clear() will "forget" a
cached route, but it will not forget the destination by releasing
the sockaddr. I use rtcache_clear() instead of rtcache_free()
in rtcache_update(), because rtcache_update() is not supposed
to forget the destination.
Constify:
1 Introduce const accessor for route->ro_dst, rtcache_getdst().
2 Constify the 'dst' argument to ifnet->if_output(). This
led me to constify a lot of code called by output routines.
3 Constify the sockaddr argument to protosw->pr_ctlinput. This
led me to constify a lot of code called by ctlinput routines.
4 Introduce const macros for converting from a generic sockaddr
to family-specific sockaddrs, e.g., sockaddr_in: satocsin6,
satocsin, et cetera.
2007-02-18 01:34:07 +03:00
|
|
|
nmatch = in_pcbnotify(&tcbtable, satocsin(sa)->sin_addr,
|
1997-07-24 01:26:40 +04:00
|
|
|
th->th_dport, ip->ip_src, th->th_sport, errno, notify);
|
|
|
|
if (nmatch == 0 && syn_cache_count &&
|
|
|
|
(inetctlerrmap[cmd] == EHOSTUNREACH ||
|
|
|
|
inetctlerrmap[cmd] == ENETUNREACH ||
|
1999-07-01 12:12:45 +04:00
|
|
|
inetctlerrmap[cmd] == EHOSTDOWN)) {
|
|
|
|
struct sockaddr_in sin;
|
2009-03-18 19:00:08 +03:00
|
|
|
memset(&sin, 0, sizeof(sin));
|
1999-07-01 12:12:45 +04:00
|
|
|
sin.sin_len = sizeof(sin);
|
|
|
|
sin.sin_family = AF_INET;
|
|
|
|
sin.sin_port = th->th_sport;
|
|
|
|
sin.sin_addr = ip->ip_src;
|
|
|
|
syn_cache_unreach((struct sockaddr *)&sin, sa, th);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* XXX mapped address case */
|
2000-10-18 21:09:14 +04:00
|
|
|
} else
|
KNF: de-__P, bzero -> memset, bcmp -> memcmp. Remove extraneous
parentheses in return statements.
Cosmetic: don't open-code TAILQ_FOREACH().
Cosmetic: change types of variables to avoid oodles of casts: in
in6_src.c, avoid casts by changing several route_in6 pointers
to struct route pointers. Remove unnecessary casts to caddr_t
elsewhere.
Pave the way for eliminating address family-specific route caches:
soon, struct route will not embed a sockaddr, but it will hold
a reference to an external sockaddr, instead. We will set the
destination sockaddr using rtcache_setdst(). (I created a stub
for it, but it isn't used anywhere, yet.) rtcache_free() will
free the sockaddr. I have extracted from rtcache_free() a helper
subroutine, rtcache_clear(). rtcache_clear() will "forget" a
cached route, but it will not forget the destination by releasing
the sockaddr. I use rtcache_clear() instead of rtcache_free()
in rtcache_update(), because rtcache_update() is not supposed
to forget the destination.
Constify:
1 Introduce const accessor for route->ro_dst, rtcache_getdst().
2 Constify the 'dst' argument to ifnet->if_output(). This
led me to constify a lot of code called by output routines.
3 Constify the sockaddr argument to protosw->pr_ctlinput. This
led me to constify a lot of code called by ctlinput routines.
4 Introduce const macros for converting from a generic sockaddr
to family-specific sockaddrs, e.g., sockaddr_in: satocsin6,
satocsin, et cetera.
2007-02-18 01:34:07 +03:00
|
|
|
in_pcbnotifyall(&tcbtable, satocsin(sa)->sin_addr, errno,
|
1996-09-09 18:51:07 +04:00
|
|
|
notify);
|
1996-02-14 02:40:59 +03:00
|
|
|
return NULL;
|
1993-03-21 12:45:37 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2005-03-09 07:24:12 +03:00
|
|
|
* When a source quench is received, we are being notified of congestion.
|
1998-07-18 03:02:38 +04:00
|
|
|
* Close the congestion window down to the Loss Window (one segment).
|
|
|
|
* We will gradually open it again as we proceed.
|
1993-03-21 12:45:37 +03:00
|
|
|
*/
|
1994-01-09 02:07:16 +03:00
|
|
|
void
|
2006-11-16 04:32:37 +03:00
|
|
|
tcp_quench(struct inpcb *inp, int errno)
|
1993-03-21 12:45:37 +03:00
|
|
|
{
|
|
|
|
struct tcpcb *tp = intotcpcb(inp);
|
|
|
|
|
2006-10-19 15:40:51 +04:00
|
|
|
if (tp) {
|
1998-07-18 03:02:38 +04:00
|
|
|
tp->snd_cwnd = tp->t_segsz;
|
2006-10-19 15:40:51 +04:00
|
|
|
tp->t_bytes_acked = 0;
|
|
|
|
}
|
1993-03-21 12:45:37 +03:00
|
|
|
}
|
2000-10-17 07:06:42 +04:00
|
|
|
#endif
|
1997-09-23 01:49:55 +04:00
|
|
|
|
2000-10-20 00:22:59 +04:00
|
|
|
#ifdef INET6
|
1999-07-22 16:56:56 +04:00
|
|
|
void
|
2006-11-16 04:32:37 +03:00
|
|
|
tcp6_quench(struct in6pcb *in6p, int errno)
|
1999-07-22 16:56:56 +04:00
|
|
|
{
|
|
|
|
struct tcpcb *tp = in6totcpcb(in6p);
|
|
|
|
|
2006-10-19 15:40:51 +04:00
|
|
|
if (tp) {
|
1999-07-22 16:56:56 +04:00
|
|
|
tp->snd_cwnd = tp->t_segsz;
|
2006-10-19 15:40:51 +04:00
|
|
|
tp->t_bytes_acked = 0;
|
|
|
|
}
|
1999-07-22 16:56:56 +04:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2000-10-19 01:14:12 +04:00
|
|
|
#ifdef INET
|
2000-10-18 21:09:14 +04:00
|
|
|
/*
|
|
|
|
* Path MTU Discovery handlers.
|
|
|
|
*/
|
|
|
|
void
|
2005-02-04 02:50:33 +03:00
|
|
|
tcp_mtudisc_callback(struct in_addr faddr)
|
2000-10-18 21:09:14 +04:00
|
|
|
{
|
2002-07-02 00:51:25 +04:00
|
|
|
#ifdef INET6
|
|
|
|
struct in6_addr in6;
|
|
|
|
#endif
|
2000-10-18 21:09:14 +04:00
|
|
|
|
|
|
|
in_pcbnotifyall(&tcbtable, faddr, EMSGSIZE, tcp_mtudisc);
|
2002-07-02 00:51:25 +04:00
|
|
|
#ifdef INET6
|
|
|
|
memset(&in6, 0, sizeof(in6));
|
|
|
|
in6.s6_addr16[5] = 0xffff;
|
|
|
|
memcpy(&in6.s6_addr32[3], &faddr, sizeof(struct in_addr));
|
|
|
|
tcp6_mtudisc_callback(&in6);
|
|
|
|
#endif
|
2000-10-18 21:09:14 +04:00
|
|
|
}
|
|
|
|
|
1997-10-18 02:12:14 +04:00
|
|
|
/*
|
|
|
|
* On receipt of path MTU corrections, flush old route and replace it
|
|
|
|
* with the new one. Retransmit all unacknowledged packets, to ensure
|
|
|
|
* that all packets will be received.
|
|
|
|
*/
|
|
|
|
void
|
2005-02-04 02:50:33 +03:00
|
|
|
tcp_mtudisc(struct inpcb *inp, int errno)
|
1997-10-18 02:12:14 +04:00
|
|
|
{
|
|
|
|
struct tcpcb *tp = intotcpcb(inp);
|
|
|
|
struct rtentry *rt = in_pcbrtentry(inp);
|
|
|
|
|
|
|
|
if (tp != 0) {
|
|
|
|
if (rt != 0) {
|
1997-12-12 01:47:24 +03:00
|
|
|
/*
|
|
|
|
* If this was not a host route, remove and realloc.
|
|
|
|
*/
|
1997-10-18 02:12:14 +04:00
|
|
|
if ((rt->rt_flags & RTF_HOST) == 0) {
|
|
|
|
in_rtchange(inp, errno);
|
|
|
|
if ((rt = in_pcbrtentry(inp)) == 0)
|
|
|
|
return;
|
|
|
|
}
|
1997-12-12 01:47:24 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Slow start out of the error condition. We
|
|
|
|
* use the MTU because we know it's smaller
|
|
|
|
* than the previously transmitted segment.
|
1998-07-18 03:09:58 +04:00
|
|
|
*
|
|
|
|
* Note: This is more conservative than the
|
|
|
|
* suggestion in draft-floyd-incr-init-win-03.
|
1997-12-12 01:47:24 +03:00
|
|
|
*/
|
1997-11-08 05:35:22 +03:00
|
|
|
if (rt->rt_rmx.rmx_mtu != 0)
|
1997-12-12 01:47:24 +03:00
|
|
|
tp->snd_cwnd =
|
1998-04-01 02:49:09 +04:00
|
|
|
TCP_INITIAL_WINDOW(tcp_init_win,
|
|
|
|
rt->rt_rmx.rmx_mtu);
|
1997-10-18 02:12:14 +04:00
|
|
|
}
|
2002-06-09 20:33:36 +04:00
|
|
|
|
1997-12-12 01:47:24 +03:00
|
|
|
/*
|
|
|
|
* Resend unacknowledged packets.
|
|
|
|
*/
|
2008-03-27 03:18:56 +03:00
|
|
|
tp->snd_nxt = tp->sack_newdata = tp->snd_una;
|
1997-10-18 02:12:14 +04:00
|
|
|
tcp_output(tp);
|
|
|
|
}
|
|
|
|
}
|
2000-10-19 01:14:12 +04:00
|
|
|
#endif
|
1997-10-18 02:12:14 +04:00
|
|
|
|
2000-10-20 00:22:59 +04:00
|
|
|
#ifdef INET6
|
2000-10-19 01:14:12 +04:00
|
|
|
/*
|
|
|
|
* Path MTU Discovery handlers.
|
|
|
|
*/
|
|
|
|
void
|
2005-02-04 02:50:33 +03:00
|
|
|
tcp6_mtudisc_callback(struct in6_addr *faddr)
|
2000-10-19 01:14:12 +04:00
|
|
|
{
|
|
|
|
struct sockaddr_in6 sin6;
|
|
|
|
|
2009-03-18 19:00:08 +03:00
|
|
|
memset(&sin6, 0, sizeof(sin6));
|
2000-10-19 01:14:12 +04:00
|
|
|
sin6.sin6_family = AF_INET6;
|
|
|
|
sin6.sin6_len = sizeof(struct sockaddr_in6);
|
|
|
|
sin6.sin6_addr = *faddr;
|
2003-09-04 13:16:57 +04:00
|
|
|
(void) in6_pcbnotify(&tcbtable, (struct sockaddr *)&sin6, 0,
|
2005-05-30 01:41:23 +04:00
|
|
|
(const struct sockaddr *)&sa6_any, 0, PRC_MSGSIZE, NULL, tcp6_mtudisc);
|
2000-10-19 01:14:12 +04:00
|
|
|
}
|
|
|
|
|
1999-07-22 16:56:56 +04:00
|
|
|
void
|
2005-02-04 02:50:33 +03:00
|
|
|
tcp6_mtudisc(struct in6pcb *in6p, int errno)
|
1999-07-22 16:56:56 +04:00
|
|
|
{
|
|
|
|
struct tcpcb *tp = in6totcpcb(in6p);
|
|
|
|
struct rtentry *rt = in6_pcbrtentry(in6p);
|
|
|
|
|
|
|
|
if (tp != 0) {
|
|
|
|
if (rt != 0) {
|
|
|
|
/*
|
|
|
|
* If this was not a host route, remove and realloc.
|
|
|
|
*/
|
|
|
|
if ((rt->rt_flags & RTF_HOST) == 0) {
|
|
|
|
in6_rtchange(in6p, errno);
|
|
|
|
if ((rt = in6_pcbrtentry(in6p)) == 0)
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Slow start out of the error condition. We
|
|
|
|
* use the MTU because we know it's smaller
|
|
|
|
* than the previously transmitted segment.
|
|
|
|
*
|
|
|
|
* Note: This is more conservative than the
|
|
|
|
* suggestion in draft-floyd-incr-init-win-03.
|
|
|
|
*/
|
|
|
|
if (rt->rt_rmx.rmx_mtu != 0)
|
|
|
|
tp->snd_cwnd =
|
|
|
|
TCP_INITIAL_WINDOW(tcp_init_win,
|
|
|
|
rt->rt_rmx.rmx_mtu);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Resend unacknowledged packets.
|
|
|
|
*/
|
2008-03-27 03:18:56 +03:00
|
|
|
tp->snd_nxt = tp->sack_newdata = tp->snd_una;
|
1999-07-22 16:56:56 +04:00
|
|
|
tcp_output(tp);
|
|
|
|
}
|
|
|
|
}
|
2000-10-20 00:22:59 +04:00
|
|
|
#endif /* INET6 */
|
1997-10-18 02:12:14 +04:00
|
|
|
|
1997-09-23 01:49:55 +04:00
|
|
|
/*
|
|
|
|
* Compute the MSS to advertise to the peer. Called only during
|
|
|
|
* the 3-way handshake. If we are the server (peer initiated
|
1998-05-08 02:30:23 +04:00
|
|
|
* connection), we are called with a pointer to the interface
|
2002-06-09 20:33:36 +04:00
|
|
|
* on which the SYN packet arrived. If we are the client (we
|
1998-05-08 02:30:23 +04:00
|
|
|
* initiated connection), we are called with a pointer to the
|
|
|
|
* interface out which this connection should go.
|
1999-09-23 06:21:30 +04:00
|
|
|
*
|
|
|
|
* NOTE: Do not subtract IP option/extension header size nor IPsec
|
|
|
|
* header size from MSS advertisement. MSS option must hold the maximum
|
|
|
|
* segment size we can accept, so it must always be:
|
|
|
|
* max(if mtu) - ip header - tcp header
|
1997-09-23 01:49:55 +04:00
|
|
|
*/
|
1998-04-14 01:18:19 +04:00
|
|
|
u_long
|
2005-02-04 02:50:33 +03:00
|
|
|
tcp_mss_to_advertise(const struct ifnet *ifp, int af)
|
1997-09-23 01:49:55 +04:00
|
|
|
{
|
|
|
|
extern u_long in_maxmtu;
|
1998-04-14 01:18:19 +04:00
|
|
|
u_long mss = 0;
|
1999-09-23 06:21:30 +04:00
|
|
|
u_long hdrsiz;
|
1997-09-23 01:49:55 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* In order to avoid defeating path MTU discovery on the peer,
|
|
|
|
* we advertise the max MTU of all attached networks as our MSS,
|
|
|
|
* per RFC 1191, section 3.1.
|
1998-04-14 01:18:19 +04:00
|
|
|
*
|
|
|
|
* We provide the option to advertise just the MTU of
|
|
|
|
* the interface on which we hope this connection will
|
|
|
|
* be receiving. If we are responding to a SYN, we
|
|
|
|
* will have a pretty good idea about this, but when
|
|
|
|
* initiating a connection there is a bit more doubt.
|
|
|
|
*
|
|
|
|
* We also need to ensure that loopback has a large enough
|
|
|
|
* MSS, as the loopback MTU is never included in in_maxmtu.
|
1997-09-23 01:49:55 +04:00
|
|
|
*/
|
|
|
|
|
1998-04-14 01:18:19 +04:00
|
|
|
if (ifp != NULL)
|
2002-05-29 11:53:39 +04:00
|
|
|
switch (af) {
|
|
|
|
case AF_INET:
|
|
|
|
mss = ifp->if_mtu;
|
|
|
|
break;
|
|
|
|
#ifdef INET6
|
|
|
|
case AF_INET6:
|
|
|
|
mss = IN6_LINKMTU(ifp);
|
|
|
|
break;
|
|
|
|
#endif
|
|
|
|
}
|
1998-04-14 01:18:19 +04:00
|
|
|
|
|
|
|
if (tcp_mss_ifmtu == 0)
|
2001-07-23 19:17:58 +04:00
|
|
|
switch (af) {
|
|
|
|
case AF_INET:
|
|
|
|
mss = max(in_maxmtu, mss);
|
|
|
|
break;
|
2001-07-23 19:20:41 +04:00
|
|
|
#ifdef INET6
|
2001-07-23 19:17:58 +04:00
|
|
|
case AF_INET6:
|
|
|
|
mss = max(in6_maxmtu, mss);
|
|
|
|
break;
|
2001-07-23 19:20:41 +04:00
|
|
|
#endif
|
2001-07-23 19:17:58 +04:00
|
|
|
}
|
1998-04-14 01:18:19 +04:00
|
|
|
|
1999-09-23 06:21:30 +04:00
|
|
|
switch (af) {
|
|
|
|
case AF_INET:
|
|
|
|
hdrsiz = sizeof(struct ip);
|
|
|
|
break;
|
1999-09-23 08:02:27 +04:00
|
|
|
#ifdef INET6
|
1999-09-23 06:21:30 +04:00
|
|
|
case AF_INET6:
|
|
|
|
hdrsiz = sizeof(struct ip6_hdr);
|
|
|
|
break;
|
1999-09-23 08:02:27 +04:00
|
|
|
#endif
|
1999-09-23 06:21:30 +04:00
|
|
|
default:
|
|
|
|
hdrsiz = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
hdrsiz += sizeof(struct tcphdr);
|
|
|
|
if (mss > hdrsiz)
|
|
|
|
mss -= hdrsiz;
|
1998-04-14 01:18:19 +04:00
|
|
|
|
|
|
|
mss = max(tcp_mssdflt, mss);
|
1997-09-23 01:49:55 +04:00
|
|
|
return (mss);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set connection variables based on the peer's advertised MSS.
|
|
|
|
* We are passed the TCPCB for the actual connection. If we
|
|
|
|
* are the server, we are called by the compressed state engine
|
|
|
|
* when the 3-way handshake is complete. If we are the client,
|
2001-06-12 19:17:10 +04:00
|
|
|
* we are called when we receive the SYN,ACK from the server.
|
1997-09-23 01:49:55 +04:00
|
|
|
*
|
|
|
|
* NOTE: Our advertised MSS value must be initialized in the TCPCB
|
|
|
|
* before this routine is called!
|
|
|
|
*/
|
|
|
|
void
|
2005-02-04 02:50:33 +03:00
|
|
|
tcp_mss_from_peer(struct tcpcb *tp, int offer)
|
1997-09-23 01:49:55 +04:00
|
|
|
{
|
1999-07-01 12:12:45 +04:00
|
|
|
struct socket *so;
|
1997-09-23 01:49:55 +04:00
|
|
|
#if defined(RTV_SPIPE) || defined(RTV_SSTHRESH)
|
1999-07-01 12:12:45 +04:00
|
|
|
struct rtentry *rt;
|
1997-09-23 01:49:55 +04:00
|
|
|
#endif
|
|
|
|
u_long bufsize;
|
|
|
|
int mss;
|
|
|
|
|
2000-10-17 07:06:42 +04:00
|
|
|
#ifdef DIAGNOSTIC
|
|
|
|
if (tp->t_inpcb && tp->t_in6pcb)
|
|
|
|
panic("tcp_mss_from_peer: both t_inpcb and t_in6pcb are set");
|
|
|
|
#endif
|
1999-07-01 12:12:45 +04:00
|
|
|
so = NULL;
|
|
|
|
rt = NULL;
|
2000-10-17 07:06:42 +04:00
|
|
|
#ifdef INET
|
1999-07-01 12:12:45 +04:00
|
|
|
if (tp->t_inpcb) {
|
|
|
|
so = tp->t_inpcb->inp_socket;
|
|
|
|
#if defined(RTV_SPIPE) || defined(RTV_SSTHRESH)
|
|
|
|
rt = in_pcbrtentry(tp->t_inpcb);
|
|
|
|
#endif
|
|
|
|
}
|
2000-10-17 07:06:42 +04:00
|
|
|
#endif
|
1999-07-01 12:12:45 +04:00
|
|
|
#ifdef INET6
|
2000-10-17 07:06:42 +04:00
|
|
|
if (tp->t_in6pcb) {
|
1999-07-01 12:12:45 +04:00
|
|
|
so = tp->t_in6pcb->in6p_socket;
|
|
|
|
#if defined(RTV_SPIPE) || defined(RTV_SSTHRESH)
|
|
|
|
rt = in6_pcbrtentry(tp->t_in6pcb);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
1997-09-23 01:49:55 +04:00
|
|
|
/*
|
2002-06-09 20:33:36 +04:00
|
|
|
* As per RFC1122, use the default MSS value, unless they
|
2004-01-07 22:15:43 +03:00
|
|
|
* sent us an offer. Do not accept offers less than 256 bytes.
|
1997-09-23 01:49:55 +04:00
|
|
|
*/
|
1998-03-18 02:50:30 +03:00
|
|
|
mss = tcp_mssdflt;
|
1997-09-23 01:49:55 +04:00
|
|
|
if (offer)
|
|
|
|
mss = offer;
|
2004-01-07 22:15:43 +03:00
|
|
|
mss = max(mss, 256); /* sanity */
|
1998-05-13 01:45:51 +04:00
|
|
|
tp->t_peermss = mss;
|
1999-07-01 12:12:45 +04:00
|
|
|
mss -= tcp_optlen(tp);
|
2000-10-17 07:06:42 +04:00
|
|
|
#ifdef INET
|
1999-07-01 12:12:45 +04:00
|
|
|
if (tp->t_inpcb)
|
|
|
|
mss -= ip_optlen(tp->t_inpcb);
|
2000-10-17 07:06:42 +04:00
|
|
|
#endif
|
1999-07-01 12:12:45 +04:00
|
|
|
#ifdef INET6
|
2000-10-17 07:06:42 +04:00
|
|
|
if (tp->t_in6pcb)
|
1999-07-01 12:12:45 +04:00
|
|
|
mss -= ip6_optlen(tp->t_in6pcb);
|
|
|
|
#endif
|
1997-09-23 01:49:55 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If there's a pipesize, change the socket buffer to that size.
|
|
|
|
* Make the socket buffer an integral number of MSS units. If
|
|
|
|
* the MSS is larger than the socket buffer, artificially decrease
|
|
|
|
* the MSS.
|
|
|
|
*/
|
|
|
|
#ifdef RTV_SPIPE
|
|
|
|
if (rt != NULL && rt->rt_rmx.rmx_sendpipe != 0)
|
|
|
|
bufsize = rt->rt_rmx.rmx_sendpipe;
|
|
|
|
else
|
|
|
|
#endif
|
2006-04-15 06:30:39 +04:00
|
|
|
{
|
|
|
|
KASSERT(so != NULL);
|
1997-09-23 01:49:55 +04:00
|
|
|
bufsize = so->so_snd.sb_hiwat;
|
2006-04-15 06:30:39 +04:00
|
|
|
}
|
1997-09-23 01:49:55 +04:00
|
|
|
if (bufsize < mss)
|
|
|
|
mss = bufsize;
|
|
|
|
else {
|
|
|
|
bufsize = roundup(bufsize, mss);
|
|
|
|
if (bufsize > sb_max)
|
|
|
|
bufsize = sb_max;
|
2004-04-17 19:18:53 +04:00
|
|
|
(void) sbreserve(&so->so_snd, bufsize, so);
|
1997-09-23 01:49:55 +04:00
|
|
|
}
|
1997-11-08 05:35:22 +03:00
|
|
|
tp->t_segsz = mss;
|
1997-09-23 01:49:55 +04:00
|
|
|
|
|
|
|
#ifdef RTV_SSTHRESH
|
|
|
|
if (rt != NULL && rt->rt_rmx.rmx_ssthresh) {
|
|
|
|
/*
|
|
|
|
* There's some sort of gateway or interface buffer
|
|
|
|
* limit on the path. Use this to set the slow
|
|
|
|
* start threshold, but set the threshold to no less
|
|
|
|
* than 2 * MSS.
|
|
|
|
*/
|
|
|
|
tp->snd_ssthresh = max(2 * mss, rt->rt_rmx.rmx_ssthresh);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Processing necessary when a TCP connection is established.
|
|
|
|
*/
|
|
|
|
void
|
2005-02-04 02:50:33 +03:00
|
|
|
tcp_established(struct tcpcb *tp)
|
1997-09-23 01:49:55 +04:00
|
|
|
{
|
1999-07-01 12:12:45 +04:00
|
|
|
struct socket *so;
|
1997-09-23 01:49:55 +04:00
|
|
|
#ifdef RTV_RPIPE
|
1999-07-01 12:12:45 +04:00
|
|
|
struct rtentry *rt;
|
1997-09-23 01:49:55 +04:00
|
|
|
#endif
|
|
|
|
u_long bufsize;
|
|
|
|
|
2000-10-17 07:06:42 +04:00
|
|
|
#ifdef DIAGNOSTIC
|
|
|
|
if (tp->t_inpcb && tp->t_in6pcb)
|
|
|
|
panic("tcp_established: both t_inpcb and t_in6pcb are set");
|
|
|
|
#endif
|
1999-07-01 12:12:45 +04:00
|
|
|
so = NULL;
|
|
|
|
rt = NULL;
|
2000-10-17 07:06:42 +04:00
|
|
|
#ifdef INET
|
1999-07-01 12:12:45 +04:00
|
|
|
if (tp->t_inpcb) {
|
|
|
|
so = tp->t_inpcb->inp_socket;
|
|
|
|
#if defined(RTV_RPIPE)
|
|
|
|
rt = in_pcbrtentry(tp->t_inpcb);
|
|
|
|
#endif
|
|
|
|
}
|
2000-10-17 07:06:42 +04:00
|
|
|
#endif
|
1999-07-01 12:12:45 +04:00
|
|
|
#ifdef INET6
|
2000-10-17 07:06:42 +04:00
|
|
|
if (tp->t_in6pcb) {
|
1999-07-01 12:12:45 +04:00
|
|
|
so = tp->t_in6pcb->in6p_socket;
|
|
|
|
#if defined(RTV_RPIPE)
|
|
|
|
rt = in6_pcbrtentry(tp->t_in6pcb);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
1997-09-23 01:49:55 +04:00
|
|
|
tp->t_state = TCPS_ESTABLISHED;
|
2007-06-20 19:29:17 +04:00
|
|
|
TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepidle);
|
1997-09-23 01:49:55 +04:00
|
|
|
|
|
|
|
#ifdef RTV_RPIPE
|
|
|
|
if (rt != NULL && rt->rt_rmx.rmx_recvpipe != 0)
|
|
|
|
bufsize = rt->rt_rmx.rmx_recvpipe;
|
|
|
|
else
|
|
|
|
#endif
|
2006-04-15 06:29:12 +04:00
|
|
|
{
|
|
|
|
KASSERT(so != NULL);
|
1997-09-23 01:49:55 +04:00
|
|
|
bufsize = so->so_rcv.sb_hiwat;
|
2006-04-15 06:29:12 +04:00
|
|
|
}
|
1997-09-23 01:49:55 +04:00
|
|
|
if (bufsize > tp->t_ourmss) {
|
|
|
|
bufsize = roundup(bufsize, tp->t_ourmss);
|
|
|
|
if (bufsize > sb_max)
|
|
|
|
bufsize = sb_max;
|
2004-04-17 19:18:53 +04:00
|
|
|
(void) sbreserve(&so->so_rcv, bufsize, so);
|
1997-09-23 01:49:55 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check if there's an initial rtt or rttvar. Convert from the
|
|
|
|
* route-table units to scaled multiples of the slow timeout timer.
|
|
|
|
* Called only during the 3-way handshake.
|
|
|
|
*/
|
|
|
|
void
|
2005-02-04 02:50:33 +03:00
|
|
|
tcp_rmx_rtt(struct tcpcb *tp)
|
1997-09-23 01:49:55 +04:00
|
|
|
{
|
|
|
|
#ifdef RTV_RTT
|
1999-07-01 12:12:45 +04:00
|
|
|
struct rtentry *rt = NULL;
|
1997-09-23 01:49:55 +04:00
|
|
|
int rtt;
|
|
|
|
|
2000-10-17 07:06:42 +04:00
|
|
|
#ifdef DIAGNOSTIC
|
|
|
|
if (tp->t_inpcb && tp->t_in6pcb)
|
|
|
|
panic("tcp_rmx_rtt: both t_inpcb and t_in6pcb are set");
|
|
|
|
#endif
|
|
|
|
#ifdef INET
|
1999-07-01 12:12:45 +04:00
|
|
|
if (tp->t_inpcb)
|
|
|
|
rt = in_pcbrtentry(tp->t_inpcb);
|
2000-10-17 07:06:42 +04:00
|
|
|
#endif
|
1999-07-01 12:12:45 +04:00
|
|
|
#ifdef INET6
|
2000-10-20 00:22:59 +04:00
|
|
|
if (tp->t_in6pcb)
|
1999-07-01 12:12:45 +04:00
|
|
|
rt = in6_pcbrtentry(tp->t_in6pcb);
|
|
|
|
#endif
|
|
|
|
if (rt == NULL)
|
1997-09-23 01:49:55 +04:00
|
|
|
return;
|
|
|
|
|
|
|
|
if (tp->t_srtt == 0 && (rtt = rt->rt_rmx.rmx_rtt)) {
|
|
|
|
/*
|
|
|
|
* XXX The lock bit for MTU indicates that the value
|
|
|
|
* is also a minimum value; this is subject to time.
|
|
|
|
*/
|
|
|
|
if (rt->rt_rmx.rmx_locks & RTV_RTT)
|
1998-03-20 01:29:33 +03:00
|
|
|
TCPT_RANGESET(tp->t_rttmin,
|
|
|
|
rtt / (RTM_RTTUNIT / PR_SLOWHZ),
|
|
|
|
TCPTV_MIN, TCPTV_REXMTMAX);
|
1997-09-23 01:49:55 +04:00
|
|
|
tp->t_srtt = rtt /
|
|
|
|
((RTM_RTTUNIT / PR_SLOWHZ) >> (TCP_RTT_SHIFT + 2));
|
|
|
|
if (rt->rt_rmx.rmx_rttvar) {
|
|
|
|
tp->t_rttvar = rt->rt_rmx.rmx_rttvar /
|
|
|
|
((RTM_RTTUNIT / PR_SLOWHZ) >>
|
|
|
|
(TCP_RTTVAR_SHIFT + 2));
|
|
|
|
} else {
|
|
|
|
/* Default variation is +- 1 rtt */
|
|
|
|
tp->t_rttvar =
|
|
|
|
tp->t_srtt >> (TCP_RTT_SHIFT - TCP_RTTVAR_SHIFT);
|
|
|
|
}
|
|
|
|
TCPT_RANGESET(tp->t_rxtcur,
|
|
|
|
((tp->t_srtt >> 2) + tp->t_rttvar) >> (1 + 2),
|
|
|
|
tp->t_rttmin, TCPTV_REXMTMAX);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
}
|
1997-10-10 05:51:07 +04:00
|
|
|
|
1997-10-13 04:46:08 +04:00
|
|
|
tcp_seq tcp_iss_seq = 0; /* tcp initial seq # */
|
Two changes, designed to make us even more resilient against TCP
ISS attacks (which we already fend off quite well).
1. First-cut implementation of RFC1948, Steve Bellovin's cryptographic
hash method of generating TCP ISS values. Note, this code is experimental
and disabled by default (experimental enough that I don't export the
variable via sysctl yet, either). There are a couple of issues I'd
like to discuss with Steve, so this code should only be used by people
who really know what they're doing.
2. Per a recent thread on Bugtraq, it's possible to determine a system's
uptime by snooping the RFC1323 TCP timestamp options sent by a host; in
4.4BSD, timestamps are created by incrementing the tcp_now variable
at 2 Hz; there's even a company out there that uses this to determine
web server uptime. According to Newsham's paper "The Problem With
Random Increments", while NetBSD's TCP ISS generation method is much
better than the "random increment" method used by FreeBSD and OpenBSD,
it is still theoretically possible to mount an attack against NetBSD's
method if the attacker knows how many times the tcp_iss_seq variable
has been incremented. By not leaking uptime information, we can make
that much harder to determine. So, we avoid the leak by giving each
TCP connection a timebase of 0.
2001-03-20 23:07:51 +03:00
|
|
|
#if NRND > 0
|
|
|
|
u_int8_t tcp_iss_secret[16]; /* 128 bits; should be plenty */
|
|
|
|
#endif
|
1997-10-13 04:46:08 +04:00
|
|
|
|
1997-10-10 05:51:07 +04:00
|
|
|
/*
|
|
|
|
* Get a new sequence value given a tcp control block
|
|
|
|
*/
|
|
|
|
tcp_seq
|
Two changes, designed to make us even more resilient against TCP
ISS attacks (which we already fend off quite well).
1. First-cut implementation of RFC1948, Steve Bellovin's cryptographic
hash method of generating TCP ISS values. Note, this code is experimental
and disabled by default (experimental enough that I don't export the
variable via sysctl yet, either). There are a couple of issues I'd
like to discuss with Steve, so this code should only be used by people
who really know what they're doing.
2. Per a recent thread on Bugtraq, it's possible to determine a system's
uptime by snooping the RFC1323 TCP timestamp options sent by a host; in
4.4BSD, timestamps are created by incrementing the tcp_now variable
at 2 Hz; there's even a company out there that uses this to determine
web server uptime. According to Newsham's paper "The Problem With
Random Increments", while NetBSD's TCP ISS generation method is much
better than the "random increment" method used by FreeBSD and OpenBSD,
it is still theoretically possible to mount an attack against NetBSD's
method if the attacker knows how many times the tcp_iss_seq variable
has been incremented. By not leaking uptime information, we can make
that much harder to determine. So, we avoid the leak by giving each
TCP connection a timebase of 0.
2001-03-20 23:07:51 +03:00
|
|
|
tcp_new_iss(struct tcpcb *tp, tcp_seq addin)
|
|
|
|
{
|
|
|
|
|
|
|
|
#ifdef INET
|
|
|
|
if (tp->t_inpcb != NULL) {
|
|
|
|
return (tcp_new_iss1(&tp->t_inpcb->inp_laddr,
|
|
|
|
&tp->t_inpcb->inp_faddr, tp->t_inpcb->inp_lport,
|
|
|
|
tp->t_inpcb->inp_fport, sizeof(tp->t_inpcb->inp_laddr),
|
|
|
|
addin));
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef INET6
|
|
|
|
if (tp->t_in6pcb != NULL) {
|
|
|
|
return (tcp_new_iss1(&tp->t_in6pcb->in6p_laddr,
|
|
|
|
&tp->t_in6pcb->in6p_faddr, tp->t_in6pcb->in6p_lport,
|
|
|
|
tp->t_in6pcb->in6p_fport, sizeof(tp->t_in6pcb->in6p_laddr),
|
|
|
|
addin));
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
/* Not possible. */
|
|
|
|
panic("tcp_new_iss");
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This routine actually generates a new TCP initial sequence number.
|
|
|
|
*/
|
|
|
|
tcp_seq
|
|
|
|
tcp_new_iss1(void *laddr, void *faddr, u_int16_t lport, u_int16_t fport,
|
|
|
|
size_t addrsz, tcp_seq addin)
|
1997-10-10 05:51:07 +04:00
|
|
|
{
|
Two changes, designed to make us even more resilient against TCP
ISS attacks (which we already fend off quite well).
1. First-cut implementation of RFC1948, Steve Bellovin's cryptographic
hash method of generating TCP ISS values. Note, this code is experimental
and disabled by default (experimental enough that I don't export the
variable via sysctl yet, either). There are a couple of issues I'd
like to discuss with Steve, so this code should only be used by people
who really know what they're doing.
2. Per a recent thread on Bugtraq, it's possible to determine a system's
uptime by snooping the RFC1323 TCP timestamp options sent by a host; in
4.4BSD, timestamps are created by incrementing the tcp_now variable
at 2 Hz; there's even a company out there that uses this to determine
web server uptime. According to Newsham's paper "The Problem With
Random Increments", while NetBSD's TCP ISS generation method is much
better than the "random increment" method used by FreeBSD and OpenBSD,
it is still theoretically possible to mount an attack against NetBSD's
method if the attacker knows how many times the tcp_iss_seq variable
has been incremented. By not leaking uptime information, we can make
that much harder to determine. So, we avoid the leak by giving each
TCP connection a timebase of 0.
2001-03-20 23:07:51 +03:00
|
|
|
tcp_seq tcp_iss;
|
1997-10-10 05:51:07 +04:00
|
|
|
|
Two changes, designed to make us even more resilient against TCP
ISS attacks (which we already fend off quite well).
1. First-cut implementation of RFC1948, Steve Bellovin's cryptographic
hash method of generating TCP ISS values. Note, this code is experimental
and disabled by default (experimental enough that I don't export the
variable via sysctl yet, either). There are a couple of issues I'd
like to discuss with Steve, so this code should only be used by people
who really know what they're doing.
2. Per a recent thread on Bugtraq, it's possible to determine a system's
uptime by snooping the RFC1323 TCP timestamp options sent by a host; in
4.4BSD, timestamps are created by incrementing the tcp_now variable
at 2 Hz; there's even a company out there that uses this to determine
web server uptime. According to Newsham's paper "The Problem With
Random Increments", while NetBSD's TCP ISS generation method is much
better than the "random increment" method used by FreeBSD and OpenBSD,
it is still theoretically possible to mount an attack against NetBSD's
method if the attacker knows how many times the tcp_iss_seq variable
has been incremented. By not leaking uptime information, we can make
that much harder to determine. So, we avoid the leak by giving each
TCP connection a timebase of 0.
2001-03-20 23:07:51 +03:00
|
|
|
#if NRND > 0
|
2008-02-29 10:39:17 +03:00
|
|
|
static bool tcp_iss_gotten_secret;
|
2001-03-21 06:35:11 +03:00
|
|
|
|
1997-10-10 05:51:07 +04:00
|
|
|
/*
|
Two changes, designed to make us even more resilient against TCP
ISS attacks (which we already fend off quite well).
1. First-cut implementation of RFC1948, Steve Bellovin's cryptographic
hash method of generating TCP ISS values. Note, this code is experimental
and disabled by default (experimental enough that I don't export the
variable via sysctl yet, either). There are a couple of issues I'd
like to discuss with Steve, so this code should only be used by people
who really know what they're doing.
2. Per a recent thread on Bugtraq, it's possible to determine a system's
uptime by snooping the RFC1323 TCP timestamp options sent by a host; in
4.4BSD, timestamps are created by incrementing the tcp_now variable
at 2 Hz; there's even a company out there that uses this to determine
web server uptime. According to Newsham's paper "The Problem With
Random Increments", while NetBSD's TCP ISS generation method is much
better than the "random increment" method used by FreeBSD and OpenBSD,
it is still theoretically possible to mount an attack against NetBSD's
method if the attacker knows how many times the tcp_iss_seq variable
has been incremented. By not leaking uptime information, we can make
that much harder to determine. So, we avoid the leak by giving each
TCP connection a timebase of 0.
2001-03-20 23:07:51 +03:00
|
|
|
* If we haven't been here before, initialize our cryptographic
|
|
|
|
* hash secret.
|
1997-10-10 05:51:07 +04:00
|
|
|
*/
|
2008-02-29 10:39:17 +03:00
|
|
|
if (tcp_iss_gotten_secret == false) {
|
Two changes, designed to make us even more resilient against TCP
ISS attacks (which we already fend off quite well).
1. First-cut implementation of RFC1948, Steve Bellovin's cryptographic
hash method of generating TCP ISS values. Note, this code is experimental
and disabled by default (experimental enough that I don't export the
variable via sysctl yet, either). There are a couple of issues I'd
like to discuss with Steve, so this code should only be used by people
who really know what they're doing.
2. Per a recent thread on Bugtraq, it's possible to determine a system's
uptime by snooping the RFC1323 TCP timestamp options sent by a host; in
4.4BSD, timestamps are created by incrementing the tcp_now variable
at 2 Hz; there's even a company out there that uses this to determine
web server uptime. According to Newsham's paper "The Problem With
Random Increments", while NetBSD's TCP ISS generation method is much
better than the "random increment" method used by FreeBSD and OpenBSD,
it is still theoretically possible to mount an attack against NetBSD's
method if the attacker knows how many times the tcp_iss_seq variable
has been incremented. By not leaking uptime information, we can make
that much harder to determine. So, we avoid the leak by giving each
TCP connection a timebase of 0.
2001-03-20 23:07:51 +03:00
|
|
|
rnd_extract_data(tcp_iss_secret, sizeof(tcp_iss_secret),
|
|
|
|
RND_EXTRACT_ANY);
|
2008-02-29 10:39:17 +03:00
|
|
|
tcp_iss_gotten_secret = true;
|
Two changes, designed to make us even more resilient against TCP
ISS attacks (which we already fend off quite well).
1. First-cut implementation of RFC1948, Steve Bellovin's cryptographic
hash method of generating TCP ISS values. Note, this code is experimental
and disabled by default (experimental enough that I don't export the
variable via sysctl yet, either). There are a couple of issues I'd
like to discuss with Steve, so this code should only be used by people
who really know what they're doing.
2. Per a recent thread on Bugtraq, it's possible to determine a system's
uptime by snooping the RFC1323 TCP timestamp options sent by a host; in
4.4BSD, timestamps are created by incrementing the tcp_now variable
at 2 Hz; there's even a company out there that uses this to determine
web server uptime. According to Newsham's paper "The Problem With
Random Increments", while NetBSD's TCP ISS generation method is much
better than the "random increment" method used by FreeBSD and OpenBSD,
it is still theoretically possible to mount an attack against NetBSD's
method if the attacker knows how many times the tcp_iss_seq variable
has been incremented. By not leaking uptime information, we can make
that much harder to determine. So, we avoid the leak by giving each
TCP connection a timebase of 0.
2001-03-20 23:07:51 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
if (tcp_do_rfc1948) {
|
|
|
|
MD5_CTX ctx;
|
|
|
|
u_int8_t hash[16]; /* XXX MD5 knowledge */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Compute the base value of the ISS. It is a hash
|
|
|
|
* of (saddr, sport, daddr, dport, secret).
|
|
|
|
*/
|
|
|
|
MD5Init(&ctx);
|
|
|
|
|
|
|
|
MD5Update(&ctx, (u_char *) laddr, addrsz);
|
|
|
|
MD5Update(&ctx, (u_char *) &lport, sizeof(lport));
|
|
|
|
|
|
|
|
MD5Update(&ctx, (u_char *) faddr, addrsz);
|
|
|
|
MD5Update(&ctx, (u_char *) &fport, sizeof(fport));
|
|
|
|
|
|
|
|
MD5Update(&ctx, tcp_iss_secret, sizeof(tcp_iss_secret));
|
|
|
|
|
|
|
|
MD5Final(hash, &ctx);
|
|
|
|
|
|
|
|
memcpy(&tcp_iss, hash, sizeof(tcp_iss));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Now increment our "timer", and add it in to
|
|
|
|
* the computed value.
|
|
|
|
*
|
|
|
|
* XXX Use `addin'?
|
|
|
|
* XXX TCP_ISSINCR too large to use?
|
|
|
|
*/
|
|
|
|
tcp_iss_seq += TCP_ISSINCR;
|
|
|
|
#ifdef TCPISS_DEBUG
|
|
|
|
printf("ISS hash 0x%08x, ", tcp_iss);
|
|
|
|
#endif
|
|
|
|
tcp_iss += tcp_iss_seq + addin;
|
|
|
|
#ifdef TCPISS_DEBUG
|
|
|
|
printf("new ISS 0x%08x\n", tcp_iss);
|
|
|
|
#endif
|
|
|
|
} else
|
|
|
|
#endif /* NRND > 0 */
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Randomize.
|
|
|
|
*/
|
1997-10-13 04:46:08 +04:00
|
|
|
#if NRND > 0
|
Two changes, designed to make us even more resilient against TCP
ISS attacks (which we already fend off quite well).
1. First-cut implementation of RFC1948, Steve Bellovin's cryptographic
hash method of generating TCP ISS values. Note, this code is experimental
and disabled by default (experimental enough that I don't export the
variable via sysctl yet, either). There are a couple of issues I'd
like to discuss with Steve, so this code should only be used by people
who really know what they're doing.
2. Per a recent thread on Bugtraq, it's possible to determine a system's
uptime by snooping the RFC1323 TCP timestamp options sent by a host; in
4.4BSD, timestamps are created by incrementing the tcp_now variable
at 2 Hz; there's even a company out there that uses this to determine
web server uptime. According to Newsham's paper "The Problem With
Random Increments", while NetBSD's TCP ISS generation method is much
better than the "random increment" method used by FreeBSD and OpenBSD,
it is still theoretically possible to mount an attack against NetBSD's
method if the attacker knows how many times the tcp_iss_seq variable
has been incremented. By not leaking uptime information, we can make
that much harder to determine. So, we avoid the leak by giving each
TCP connection a timebase of 0.
2001-03-20 23:07:51 +03:00
|
|
|
rnd_extract_data(&tcp_iss, sizeof(tcp_iss), RND_EXTRACT_ANY);
|
1997-10-13 04:46:08 +04:00
|
|
|
#else
|
2002-05-28 14:17:27 +04:00
|
|
|
tcp_iss = arc4random();
|
1997-10-13 04:46:08 +04:00
|
|
|
#endif
|
1997-10-10 05:51:07 +04:00
|
|
|
|
Two changes, designed to make us even more resilient against TCP
ISS attacks (which we already fend off quite well).
1. First-cut implementation of RFC1948, Steve Bellovin's cryptographic
hash method of generating TCP ISS values. Note, this code is experimental
and disabled by default (experimental enough that I don't export the
variable via sysctl yet, either). There are a couple of issues I'd
like to discuss with Steve, so this code should only be used by people
who really know what they're doing.
2. Per a recent thread on Bugtraq, it's possible to determine a system's
uptime by snooping the RFC1323 TCP timestamp options sent by a host; in
4.4BSD, timestamps are created by incrementing the tcp_now variable
at 2 Hz; there's even a company out there that uses this to determine
web server uptime. According to Newsham's paper "The Problem With
Random Increments", while NetBSD's TCP ISS generation method is much
better than the "random increment" method used by FreeBSD and OpenBSD,
it is still theoretically possible to mount an attack against NetBSD's
method if the attacker knows how many times the tcp_iss_seq variable
has been incremented. By not leaking uptime information, we can make
that much harder to determine. So, we avoid the leak by giving each
TCP connection a timebase of 0.
2001-03-20 23:07:51 +03:00
|
|
|
/*
|
|
|
|
* If we were asked to add some amount to a known value,
|
|
|
|
* we will take a random value obtained above, mask off
|
|
|
|
* the upper bits, and add in the known value. We also
|
|
|
|
* add in a constant to ensure that we are at least a
|
|
|
|
* certain distance from the original value.
|
|
|
|
*
|
|
|
|
* This is used when an old connection is in timed wait
|
|
|
|
* and we have a new one coming in, for instance.
|
|
|
|
*/
|
|
|
|
if (addin != 0) {
|
1997-10-10 05:51:07 +04:00
|
|
|
#ifdef TCPISS_DEBUG
|
Two changes, designed to make us even more resilient against TCP
ISS attacks (which we already fend off quite well).
1. First-cut implementation of RFC1948, Steve Bellovin's cryptographic
hash method of generating TCP ISS values. Note, this code is experimental
and disabled by default (experimental enough that I don't export the
variable via sysctl yet, either). There are a couple of issues I'd
like to discuss with Steve, so this code should only be used by people
who really know what they're doing.
2. Per a recent thread on Bugtraq, it's possible to determine a system's
uptime by snooping the RFC1323 TCP timestamp options sent by a host; in
4.4BSD, timestamps are created by incrementing the tcp_now variable
at 2 Hz; there's even a company out there that uses this to determine
web server uptime. According to Newsham's paper "The Problem With
Random Increments", while NetBSD's TCP ISS generation method is much
better than the "random increment" method used by FreeBSD and OpenBSD,
it is still theoretically possible to mount an attack against NetBSD's
method if the attacker knows how many times the tcp_iss_seq variable
has been incremented. By not leaking uptime information, we can make
that much harder to determine. So, we avoid the leak by giving each
TCP connection a timebase of 0.
2001-03-20 23:07:51 +03:00
|
|
|
printf("Random %08x, ", tcp_iss);
|
1997-10-10 05:51:07 +04:00
|
|
|
#endif
|
Two changes, designed to make us even more resilient against TCP
ISS attacks (which we already fend off quite well).
1. First-cut implementation of RFC1948, Steve Bellovin's cryptographic
hash method of generating TCP ISS values. Note, this code is experimental
and disabled by default (experimental enough that I don't export the
variable via sysctl yet, either). There are a couple of issues I'd
like to discuss with Steve, so this code should only be used by people
who really know what they're doing.
2. Per a recent thread on Bugtraq, it's possible to determine a system's
uptime by snooping the RFC1323 TCP timestamp options sent by a host; in
4.4BSD, timestamps are created by incrementing the tcp_now variable
at 2 Hz; there's even a company out there that uses this to determine
web server uptime. According to Newsham's paper "The Problem With
Random Increments", while NetBSD's TCP ISS generation method is much
better than the "random increment" method used by FreeBSD and OpenBSD,
it is still theoretically possible to mount an attack against NetBSD's
method if the attacker knows how many times the tcp_iss_seq variable
has been incremented. By not leaking uptime information, we can make
that much harder to determine. So, we avoid the leak by giving each
TCP connection a timebase of 0.
2001-03-20 23:07:51 +03:00
|
|
|
tcp_iss &= TCP_ISS_RANDOM_MASK;
|
|
|
|
tcp_iss += addin + TCP_ISSINCR;
|
1997-10-10 05:51:07 +04:00
|
|
|
#ifdef TCPISS_DEBUG
|
Two changes, designed to make us even more resilient against TCP
ISS attacks (which we already fend off quite well).
1. First-cut implementation of RFC1948, Steve Bellovin's cryptographic
hash method of generating TCP ISS values. Note, this code is experimental
and disabled by default (experimental enough that I don't export the
variable via sysctl yet, either). There are a couple of issues I'd
like to discuss with Steve, so this code should only be used by people
who really know what they're doing.
2. Per a recent thread on Bugtraq, it's possible to determine a system's
uptime by snooping the RFC1323 TCP timestamp options sent by a host; in
4.4BSD, timestamps are created by incrementing the tcp_now variable
at 2 Hz; there's even a company out there that uses this to determine
web server uptime. According to Newsham's paper "The Problem With
Random Increments", while NetBSD's TCP ISS generation method is much
better than the "random increment" method used by FreeBSD and OpenBSD,
it is still theoretically possible to mount an attack against NetBSD's
method if the attacker knows how many times the tcp_iss_seq variable
has been incremented. By not leaking uptime information, we can make
that much harder to determine. So, we avoid the leak by giving each
TCP connection a timebase of 0.
2001-03-20 23:07:51 +03:00
|
|
|
printf("Old ISS %08x, ISS %08x\n", addin, tcp_iss);
|
1997-10-10 05:51:07 +04:00
|
|
|
#endif
|
Two changes, designed to make us even more resilient against TCP
ISS attacks (which we already fend off quite well).
1. First-cut implementation of RFC1948, Steve Bellovin's cryptographic
hash method of generating TCP ISS values. Note, this code is experimental
and disabled by default (experimental enough that I don't export the
variable via sysctl yet, either). There are a couple of issues I'd
like to discuss with Steve, so this code should only be used by people
who really know what they're doing.
2. Per a recent thread on Bugtraq, it's possible to determine a system's
uptime by snooping the RFC1323 TCP timestamp options sent by a host; in
4.4BSD, timestamps are created by incrementing the tcp_now variable
at 2 Hz; there's even a company out there that uses this to determine
web server uptime. According to Newsham's paper "The Problem With
Random Increments", while NetBSD's TCP ISS generation method is much
better than the "random increment" method used by FreeBSD and OpenBSD,
it is still theoretically possible to mount an attack against NetBSD's
method if the attacker knows how many times the tcp_iss_seq variable
has been incremented. By not leaking uptime information, we can make
that much harder to determine. So, we avoid the leak by giving each
TCP connection a timebase of 0.
2001-03-20 23:07:51 +03:00
|
|
|
} else {
|
|
|
|
tcp_iss &= TCP_ISS_RANDOM_MASK;
|
|
|
|
tcp_iss += tcp_iss_seq;
|
|
|
|
tcp_iss_seq += TCP_ISSINCR;
|
1997-10-10 05:51:07 +04:00
|
|
|
#ifdef TCPISS_DEBUG
|
Two changes, designed to make us even more resilient against TCP
ISS attacks (which we already fend off quite well).
1. First-cut implementation of RFC1948, Steve Bellovin's cryptographic
hash method of generating TCP ISS values. Note, this code is experimental
and disabled by default (experimental enough that I don't export the
variable via sysctl yet, either). There are a couple of issues I'd
like to discuss with Steve, so this code should only be used by people
who really know what they're doing.
2. Per a recent thread on Bugtraq, it's possible to determine a system's
uptime by snooping the RFC1323 TCP timestamp options sent by a host; in
4.4BSD, timestamps are created by incrementing the tcp_now variable
at 2 Hz; there's even a company out there that uses this to determine
web server uptime. According to Newsham's paper "The Problem With
Random Increments", while NetBSD's TCP ISS generation method is much
better than the "random increment" method used by FreeBSD and OpenBSD,
it is still theoretically possible to mount an attack against NetBSD's
method if the attacker knows how many times the tcp_iss_seq variable
has been incremented. By not leaking uptime information, we can make
that much harder to determine. So, we avoid the leak by giving each
TCP connection a timebase of 0.
2001-03-20 23:07:51 +03:00
|
|
|
printf("ISS %08x\n", tcp_iss);
|
1997-10-10 05:51:07 +04:00
|
|
|
#endif
|
Two changes, designed to make us even more resilient against TCP
ISS attacks (which we already fend off quite well).
1. First-cut implementation of RFC1948, Steve Bellovin's cryptographic
hash method of generating TCP ISS values. Note, this code is experimental
and disabled by default (experimental enough that I don't export the
variable via sysctl yet, either). There are a couple of issues I'd
like to discuss with Steve, so this code should only be used by people
who really know what they're doing.
2. Per a recent thread on Bugtraq, it's possible to determine a system's
uptime by snooping the RFC1323 TCP timestamp options sent by a host; in
4.4BSD, timestamps are created by incrementing the tcp_now variable
at 2 Hz; there's even a company out there that uses this to determine
web server uptime. According to Newsham's paper "The Problem With
Random Increments", while NetBSD's TCP ISS generation method is much
better than the "random increment" method used by FreeBSD and OpenBSD,
it is still theoretically possible to mount an attack against NetBSD's
method if the attacker knows how many times the tcp_iss_seq variable
has been incremented. By not leaking uptime information, we can make
that much harder to determine. So, we avoid the leak by giving each
TCP connection a timebase of 0.
2001-03-20 23:07:51 +03:00
|
|
|
}
|
1997-10-10 05:51:07 +04:00
|
|
|
}
|
|
|
|
|
1998-04-29 09:16:46 +04:00
|
|
|
if (tcp_compat_42) {
|
|
|
|
/*
|
|
|
|
* Limit it to the positive range for really old TCP
|
|
|
|
* implementations.
|
2002-10-22 11:22:19 +04:00
|
|
|
* Just AND off the top bit instead of checking if
|
2002-10-22 06:53:59 +04:00
|
|
|
* is set first - saves a branch 50% of the time.
|
1998-04-29 09:16:46 +04:00
|
|
|
*/
|
2002-10-22 06:53:59 +04:00
|
|
|
tcp_iss &= 0x7fffffff; /* XXX */
|
1998-04-29 09:16:46 +04:00
|
|
|
}
|
1997-10-10 05:51:07 +04:00
|
|
|
|
Two changes, designed to make us even more resilient against TCP
ISS attacks (which we already fend off quite well).
1. First-cut implementation of RFC1948, Steve Bellovin's cryptographic
hash method of generating TCP ISS values. Note, this code is experimental
and disabled by default (experimental enough that I don't export the
variable via sysctl yet, either). There are a couple of issues I'd
like to discuss with Steve, so this code should only be used by people
who really know what they're doing.
2. Per a recent thread on Bugtraq, it's possible to determine a system's
uptime by snooping the RFC1323 TCP timestamp options sent by a host; in
4.4BSD, timestamps are created by incrementing the tcp_now variable
at 2 Hz; there's even a company out there that uses this to determine
web server uptime. According to Newsham's paper "The Problem With
Random Increments", while NetBSD's TCP ISS generation method is much
better than the "random increment" method used by FreeBSD and OpenBSD,
it is still theoretically possible to mount an attack against NetBSD's
method if the attacker knows how many times the tcp_iss_seq variable
has been incremented. By not leaking uptime information, we can make
that much harder to determine. So, we avoid the leak by giving each
TCP connection a timebase of 0.
2001-03-20 23:07:51 +03:00
|
|
|
return (tcp_iss);
|
1997-10-10 05:51:07 +04:00
|
|
|
}
|
1998-03-18 02:50:30 +03:00
|
|
|
|
2003-08-15 07:42:00 +04:00
|
|
|
#if defined(IPSEC) || defined(FAST_IPSEC)
|
1999-07-01 12:12:45 +04:00
|
|
|
/* compute ESP/AH header size for TCP, including outer IP header. */
|
|
|
|
size_t
|
2005-02-04 02:50:33 +03:00
|
|
|
ipsec4_hdrsiz_tcp(struct tcpcb *tp)
|
1999-07-01 12:12:45 +04:00
|
|
|
{
|
|
|
|
struct inpcb *inp;
|
|
|
|
size_t hdrsiz;
|
|
|
|
|
|
|
|
/* XXX mapped addr case (tp->t_in6pcb) */
|
|
|
|
if (!tp || !tp->t_template || !(inp = tp->t_inpcb))
|
|
|
|
return 0;
|
|
|
|
switch (tp->t_family) {
|
|
|
|
case AF_INET:
|
2000-01-31 17:18:52 +03:00
|
|
|
/* XXX: should use currect direction. */
|
|
|
|
hdrsiz = ipsec4_hdrsiz(tp->t_template, IPSEC_DIR_OUTBOUND, inp);
|
1999-07-01 12:12:45 +04:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
hdrsiz = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return hdrsiz;
|
|
|
|
}
|
|
|
|
|
2000-10-20 00:22:59 +04:00
|
|
|
#ifdef INET6
|
1999-07-01 12:12:45 +04:00
|
|
|
size_t
|
2005-02-04 02:50:33 +03:00
|
|
|
ipsec6_hdrsiz_tcp(struct tcpcb *tp)
|
1999-07-01 12:12:45 +04:00
|
|
|
{
|
|
|
|
struct in6pcb *in6p;
|
|
|
|
size_t hdrsiz;
|
|
|
|
|
|
|
|
if (!tp || !tp->t_template || !(in6p = tp->t_in6pcb))
|
|
|
|
return 0;
|
|
|
|
switch (tp->t_family) {
|
|
|
|
case AF_INET6:
|
2000-01-31 17:18:52 +03:00
|
|
|
/* XXX: should use currect direction. */
|
|
|
|
hdrsiz = ipsec6_hdrsiz(tp->t_template, IPSEC_DIR_OUTBOUND, in6p);
|
1999-07-01 12:12:45 +04:00
|
|
|
break;
|
|
|
|
case AF_INET:
|
|
|
|
/* mapped address case - tricky */
|
|
|
|
default:
|
|
|
|
hdrsiz = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return hdrsiz;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#endif /*IPSEC*/
|
1998-03-18 02:50:30 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Determine the length of the TCP options for this connection.
|
2002-06-09 20:33:36 +04:00
|
|
|
*
|
1998-03-18 02:50:30 +03:00
|
|
|
* XXX: What do we do for SACK, when we add that? Just reserve
|
|
|
|
* all of the space? Otherwise we can't exactly be incrementing
|
|
|
|
* cwnd by an amount that varies depending on the amount we last
|
|
|
|
* had to SACK!
|
|
|
|
*/
|
|
|
|
|
|
|
|
u_int
|
2005-02-04 02:50:33 +03:00
|
|
|
tcp_optlen(struct tcpcb *tp)
|
1998-03-18 02:50:30 +03:00
|
|
|
{
|
Initial commit of a port of the FreeBSD implementation of RFC 2385
(MD5 signatures for TCP, as used with BGP). Credit for original
FreeBSD code goes to Bruce M. Simpson, with FreeBSD sponsorship
credited to sentex.net. Shortening of the setsockopt() name
attributed to Vincent Jardin.
This commit is a minimal, working version of the FreeBSD code, as
MFC'ed to FreeBSD-4. It has received minimal testing with a ttcp
modified to set the TCP-MD5 option; BMS's additions to tcpdump-current
(tcpdump -M) confirm that the MD5 signatures are correct. Committed
as-is for further testing between a NetBSD BGP speaker (e.g., quagga)
and industry-standard BGP speakers (e.g., Cisco, Juniper).
NOTE: This version has two potential flaws. First, I do see any code
that verifies recieved TCP-MD5 signatures. Second, the TCP-MD5
options are internally padded and assumed to be 32-bit aligned. A more
space-efficient scheme is to pack all TCP options densely (and
possibly unaligned) into the TCP header ; then do one final padding to
a 4-byte boundary. Pre-existing comments note that accounting for
TCP-option space when we add SACK is yet to be done. For now, I'm
punting on that; we can solve it properly, in a way that will handle
SACK blocks, as a separate exercise.
In case a pullup to NetBSD-2 is requested, this adds sys/netipsec/xform_tcp.c
,and modifies:
sys/net/pfkeyv2.h,v 1.15
sys/netinet/files.netinet,v 1.5
sys/netinet/ip.h,v 1.25
sys/netinet/tcp.h,v 1.15
sys/netinet/tcp_input.c,v 1.200
sys/netinet/tcp_output.c,v 1.109
sys/netinet/tcp_subr.c,v 1.165
sys/netinet/tcp_usrreq.c,v 1.89
sys/netinet/tcp_var.h,v 1.109
sys/netipsec/files.netipsec,v 1.3
sys/netipsec/ipsec.c,v 1.11
sys/netipsec/ipsec.h,v 1.7
sys/netipsec/key.c,v 1.11
share/man/man4/tcp.4,v 1.16
lib/libipsec/pfkey.c,v 1.20
lib/libipsec/pfkey_dump.c,v 1.17
lib/libipsec/policy_token.l,v 1.8
sbin/setkey/parse.y,v 1.14
sbin/setkey/setkey.8,v 1.27
sbin/setkey/token.l,v 1.15
Note that the preceding two revisions to tcp.4 will be
required to cleanly apply this diff.
2004-04-26 02:25:03 +04:00
|
|
|
u_int optlen;
|
|
|
|
|
|
|
|
optlen = 0;
|
2002-06-09 20:33:36 +04:00
|
|
|
if ((tp->t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP|TF_NOOPT)) ==
|
1998-03-18 02:50:30 +03:00
|
|
|
(TF_REQ_TSTMP | TF_RCVD_TSTMP))
|
Initial commit of a port of the FreeBSD implementation of RFC 2385
(MD5 signatures for TCP, as used with BGP). Credit for original
FreeBSD code goes to Bruce M. Simpson, with FreeBSD sponsorship
credited to sentex.net. Shortening of the setsockopt() name
attributed to Vincent Jardin.
This commit is a minimal, working version of the FreeBSD code, as
MFC'ed to FreeBSD-4. It has received minimal testing with a ttcp
modified to set the TCP-MD5 option; BMS's additions to tcpdump-current
(tcpdump -M) confirm that the MD5 signatures are correct. Committed
as-is for further testing between a NetBSD BGP speaker (e.g., quagga)
and industry-standard BGP speakers (e.g., Cisco, Juniper).
NOTE: This version has two potential flaws. First, I do see any code
that verifies recieved TCP-MD5 signatures. Second, the TCP-MD5
options are internally padded and assumed to be 32-bit aligned. A more
space-efficient scheme is to pack all TCP options densely (and
possibly unaligned) into the TCP header ; then do one final padding to
a 4-byte boundary. Pre-existing comments note that accounting for
TCP-option space when we add SACK is yet to be done. For now, I'm
punting on that; we can solve it properly, in a way that will handle
SACK blocks, as a separate exercise.
In case a pullup to NetBSD-2 is requested, this adds sys/netipsec/xform_tcp.c
,and modifies:
sys/net/pfkeyv2.h,v 1.15
sys/netinet/files.netinet,v 1.5
sys/netinet/ip.h,v 1.25
sys/netinet/tcp.h,v 1.15
sys/netinet/tcp_input.c,v 1.200
sys/netinet/tcp_output.c,v 1.109
sys/netinet/tcp_subr.c,v 1.165
sys/netinet/tcp_usrreq.c,v 1.89
sys/netinet/tcp_var.h,v 1.109
sys/netipsec/files.netipsec,v 1.3
sys/netipsec/ipsec.c,v 1.11
sys/netipsec/ipsec.h,v 1.7
sys/netipsec/key.c,v 1.11
share/man/man4/tcp.4,v 1.16
lib/libipsec/pfkey.c,v 1.20
lib/libipsec/pfkey_dump.c,v 1.17
lib/libipsec/policy_token.l,v 1.8
sbin/setkey/parse.y,v 1.14
sbin/setkey/setkey.8,v 1.27
sbin/setkey/token.l,v 1.15
Note that the preceding two revisions to tcp.4 will be
required to cleanly apply this diff.
2004-04-26 02:25:03 +04:00
|
|
|
optlen += TCPOLEN_TSTAMP_APPA;
|
|
|
|
|
|
|
|
#ifdef TCP_SIGNATURE
|
|
|
|
if (tp->t_flags & TF_SIGNATURE)
|
|
|
|
optlen += TCPOLEN_SIGNATURE + 2;
|
|
|
|
#endif /* TCP_SIGNATURE */
|
|
|
|
|
|
|
|
return optlen;
|
1998-03-18 02:50:30 +03:00
|
|
|
}
|
2005-07-19 21:00:02 +04:00
|
|
|
|
|
|
|
u_int
|
|
|
|
tcp_hdrsz(struct tcpcb *tp)
|
|
|
|
{
|
|
|
|
u_int hlen;
|
|
|
|
|
|
|
|
switch (tp->t_family) {
|
|
|
|
#ifdef INET6
|
|
|
|
case AF_INET6:
|
|
|
|
hlen = sizeof(struct ip6_hdr);
|
|
|
|
break;
|
|
|
|
#endif
|
|
|
|
case AF_INET:
|
|
|
|
hlen = sizeof(struct ip);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
hlen = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
hlen += sizeof(struct tcphdr);
|
|
|
|
|
|
|
|
if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
|
|
|
|
(tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP)
|
|
|
|
hlen += TCPOLEN_TSTAMP_APPA;
|
|
|
|
#ifdef TCP_SIGNATURE
|
|
|
|
if (tp->t_flags & TF_SIGNATURE)
|
|
|
|
hlen += TCPOLEN_SIGLEN;
|
|
|
|
#endif
|
|
|
|
return hlen;
|
|
|
|
}
|
2008-04-12 09:58:22 +04:00
|
|
|
|
|
|
|
void
|
|
|
|
tcp_statinc(u_int stat)
|
|
|
|
{
|
|
|
|
|
|
|
|
KASSERT(stat < TCP_NSTATS);
|
|
|
|
TCP_STATINC(stat);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
tcp_statadd(u_int stat, uint64_t val)
|
|
|
|
{
|
|
|
|
|
|
|
|
KASSERT(stat < TCP_NSTATS);
|
|
|
|
TCP_STATADD(stat, val);
|
|
|
|
}
|