2015-08-25 01:21:26 +03:00
|
|
|
/* $NetBSD: tcp_usrreq.c,v 1.209 2015/08/24 22:21:26 pooka Exp $ */
|
1999-07-01 12:12:45 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
|
|
|
|
* All rights reserved.
|
2002-06-09 20:33:36 +04:00
|
|
|
*
|
1999-07-01 12:12:45 +04:00
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* 3. Neither the name of the project nor the names of its contributors
|
|
|
|
* may be used to endorse or promote products derived from this software
|
|
|
|
* without specific prior written permission.
|
2002-06-09 20:33:36 +04:00
|
|
|
*
|
1999-07-01 12:12:45 +04:00
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*/
|
1998-02-19 05:36:42 +03:00
|
|
|
|
|
|
|
/*-
|
2006-09-05 04:29:35 +04:00
|
|
|
* Copyright (c) 1997, 1998, 2005, 2006 The NetBSD Foundation, Inc.
|
1998-02-19 05:36:42 +03:00
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* This code is derived from software contributed to The NetBSD Foundation
|
|
|
|
* by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation
|
|
|
|
* Facility, NASA Ames Research Center.
|
2005-03-02 13:20:18 +03:00
|
|
|
* This code is derived from software contributed to The NetBSD Foundation
|
|
|
|
* by Charles M. Hannum.
|
2006-09-05 04:29:35 +04:00
|
|
|
* This code is derived from software contributed to The NetBSD Foundation
|
|
|
|
* by Rui Paulo.
|
1998-02-19 05:36:42 +03:00
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
|
|
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
|
|
|
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
|
|
|
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
1994-06-29 10:29:24 +04:00
|
|
|
|
1993-03-21 12:45:37 +03:00
|
|
|
/*
|
1998-01-05 13:31:44 +03:00
|
|
|
* Copyright (c) 1982, 1986, 1988, 1993, 1995
|
1994-05-13 10:02:48 +04:00
|
|
|
* The Regents of the University of California. All rights reserved.
|
1993-03-21 12:45:37 +03:00
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
2003-08-07 20:26:28 +04:00
|
|
|
* 3. Neither the name of the University nor the names of its contributors
|
1993-03-21 12:45:37 +03:00
|
|
|
* may be used to endorse or promote products derived from this software
|
|
|
|
* without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*
|
1998-01-05 13:31:44 +03:00
|
|
|
* @(#)tcp_usrreq.c 8.5 (Berkeley) 6/21/95
|
1993-03-21 12:45:37 +03:00
|
|
|
*/
|
|
|
|
|
2014-05-19 06:51:24 +04:00
|
|
|
/*
|
|
|
|
* TCP protocol interface to socket abstraction.
|
|
|
|
*/
|
|
|
|
|
2001-11-13 03:32:34 +03:00
|
|
|
#include <sys/cdefs.h>
|
2015-08-25 01:21:26 +03:00
|
|
|
__KERNEL_RCSID(0, "$NetBSD: tcp_usrreq.c,v 1.209 2015/08/24 22:21:26 pooka Exp $");
|
2001-11-13 03:32:34 +03:00
|
|
|
|
2015-08-25 01:21:26 +03:00
|
|
|
#ifdef _KERNEL_OPT
|
1999-07-01 12:12:45 +04:00
|
|
|
#include "opt_inet.h"
|
2001-07-08 20:18:56 +04:00
|
|
|
#include "opt_tcp_debug.h"
|
2003-06-23 15:00:59 +04:00
|
|
|
#include "opt_mbuftrace.h"
|
2015-08-25 01:21:26 +03:00
|
|
|
#include "opt_tcp_space.h"
|
|
|
|
#endif
|
2012-02-02 23:42:57 +04:00
|
|
|
|
1993-12-18 03:40:47 +03:00
|
|
|
#include <sys/param.h>
|
|
|
|
#include <sys/systm.h>
|
1995-03-21 10:48:14 +03:00
|
|
|
#include <sys/kernel.h>
|
1993-12-18 03:40:47 +03:00
|
|
|
#include <sys/mbuf.h>
|
|
|
|
#include <sys/socket.h>
|
|
|
|
#include <sys/socketvar.h>
|
|
|
|
#include <sys/protosw.h>
|
|
|
|
#include <sys/errno.h>
|
|
|
|
#include <sys/stat.h>
|
1996-02-14 02:40:59 +03:00
|
|
|
#include <sys/proc.h>
|
1999-07-01 12:12:45 +04:00
|
|
|
#include <sys/domain.h>
|
1996-02-14 02:40:59 +03:00
|
|
|
#include <sys/sysctl.h>
|
2006-05-15 01:19:33 +04:00
|
|
|
#include <sys/kauth.h>
|
2015-02-14 15:57:52 +03:00
|
|
|
#include <sys/kernel.h>
|
2008-10-11 17:40:57 +04:00
|
|
|
#include <sys/uidinfo.h>
|
1993-03-21 12:45:37 +03:00
|
|
|
|
1993-12-18 03:40:47 +03:00
|
|
|
#include <net/if.h>
|
|
|
|
#include <net/route.h>
|
1993-03-21 12:45:37 +03:00
|
|
|
|
1993-12-18 03:40:47 +03:00
|
|
|
#include <netinet/in.h>
|
|
|
|
#include <netinet/in_systm.h>
|
1995-04-13 10:35:38 +04:00
|
|
|
#include <netinet/in_var.h>
|
1993-12-18 03:40:47 +03:00
|
|
|
#include <netinet/ip.h>
|
|
|
|
#include <netinet/in_pcb.h>
|
|
|
|
#include <netinet/ip_var.h>
|
2005-08-10 17:06:49 +04:00
|
|
|
#include <netinet/in_offload.h>
|
1999-07-01 12:12:45 +04:00
|
|
|
|
|
|
|
#ifdef INET6
|
|
|
|
#ifndef INET
|
|
|
|
#include <netinet/in.h>
|
|
|
|
#endif
|
|
|
|
#include <netinet/ip6.h>
|
|
|
|
#include <netinet6/in6_pcb.h>
|
|
|
|
#include <netinet6/ip6_var.h>
|
2007-06-29 01:11:12 +04:00
|
|
|
#include <netinet6/scope6_var.h>
|
1999-07-01 12:12:45 +04:00
|
|
|
#endif
|
|
|
|
|
1993-12-18 03:40:47 +03:00
|
|
|
#include <netinet/tcp.h>
|
|
|
|
#include <netinet/tcp_fsm.h>
|
|
|
|
#include <netinet/tcp_seq.h>
|
|
|
|
#include <netinet/tcp_timer.h>
|
|
|
|
#include <netinet/tcp_var.h>
|
2008-04-12 09:58:22 +04:00
|
|
|
#include <netinet/tcp_private.h>
|
2006-10-09 20:27:07 +04:00
|
|
|
#include <netinet/tcp_congctl.h>
|
1993-12-18 03:40:47 +03:00
|
|
|
#include <netinet/tcpip.h>
|
|
|
|
#include <netinet/tcp_debug.h>
|
Reduces the resources demanded by TCP sessions in TIME_WAIT-state using
methods called Vestigial Time-Wait (VTW) and Maximum Segment Lifetime
Truncation (MSLT).
MSLT and VTW were contributed by Coyote Point Systems, Inc.
Even after a TCP session enters the TIME_WAIT state, its corresponding
socket and protocol control blocks (PCBs) stick around until the TCP
Maximum Segment Lifetime (MSL) expires. On a host whose workload
necessarily creates and closes down many TCP sockets, the sockets & PCBs
for TCP sessions in TIME_WAIT state amount to many megabytes of dead
weight in RAM.
Maximum Segment Lifetimes Truncation (MSLT) assigns each TCP session to
a class based on the nearness of the peer. Corresponding to each class
is an MSL, and a session uses the MSL of its class. The classes are
loopback (local host equals remote host), local (local host and remote
host are on the same link/subnet), and remote (local host and remote
host communicate via one or more gateways). Classes corresponding to
nearer peers have lower MSLs by default: 2 seconds for loopback, 10
seconds for local, 60 seconds for remote. Loopback and local sessions
expire more quickly when MSLT is used.
Vestigial Time-Wait (VTW) replaces a TIME_WAIT session's PCB/socket
dead weight with a compact representation of the session, called a
"vestigial PCB". VTW data structures are designed to be very fast and
memory-efficient: for fast insertion and lookup of vestigial PCBs,
the PCBs are stored in a hash table that is designed to minimize the
number of cacheline visits per lookup/insertion. The memory both
for vestigial PCBs and for elements of the PCB hashtable come from
fixed-size pools, and linked data structures exploit this to conserve
memory by representing references with a narrow index/offset from the
start of a pool instead of a pointer. When space for new vestigial PCBs
runs out, VTW makes room by discarding old vestigial PCBs, oldest first.
VTW cooperates with MSLT.
It may help to think of VTW as a "FIN cache" by analogy to the SYN
cache.
A 2.8-GHz Pentium 4 running a test workload that creates TIME_WAIT
sessions as fast as it can is approximately 17% idle when VTW is active
versus 0% idle when VTW is inactive. It has 103 megabytes more free RAM
when VTW is active (approximately 64k vestigial PCBs are created) than
when it is inactive.
2011-05-03 22:28:44 +04:00
|
|
|
#include <netinet/tcp_vtw.h>
|
1993-03-21 12:45:37 +03:00
|
|
|
|
* split PRU_PEERADDR and PRU_SOCKADDR function out of pr_generic()
usrreq switches and put into separate functions
xxx_{peer,sock}addr(struct socket *, struct mbuf *).
- KASSERT(solocked(so)) always in new functions even if request
is not implemented
- KASSERT(pcb != NULL) and KASSERT(nam) if the request is
implemented and not for tcp.
* for tcp roll #ifdef KPROF and #ifdef DEBUG code from tcp_usrreq() into
easier to cut & paste functions tcp_debug_capture() and
tcp_debug_trace()
- functions provided by rmind
- remaining use of PRU_{PEER,SOCK}ADDR #define to be removed in a
future commit.
* rename netbt functions to permit consistency of pru function names
(as has been done with other requests already split out).
- l2cap_{peer,sock}addr() -> l2cap_{peer,sock}_addr_pcb()
- rfcomm_{peer,sock}addr() -> rfcomm_{peer,sock}_addr_pcb()
- sco_{peer,sock}addr() -> sco_{peer,sock}_addr_pcb()
* split/refactor do_sys_getsockname(lwp, fd, which, nam) into
two functions do_sys_get{peer,sock}name(fd, nam).
- move PRU_PEERADDR handling into do_sys_getpeername() from
do_sys_getsockname()
- have svr4_stream directly call do_sys_get{sock,peer}name()
respectively instead of providing `which' & fix a DPRINTF string
that incorrectly wrote "getpeername" when it meant "getsockname"
- fix sys_getpeername() and sys_getsockname() to call
do_sys_get{sock,peer}name() without `which' and `lwp' & adjust
comments
- bump kernel version for removal of lwp & which parameters from
do_sys_getsockname()
note: future cleanup to remove struct mbuf * abuse in
xxx_{peer,sock}name()
still to come, not done in this commit since it is easier to do post
split.
patch reviewed by rmind
welcome to 6.99.47
2014-07-09 08:54:03 +04:00
|
|
|
static int
|
|
|
|
tcp_debug_capture(struct tcpcb *tp, int req)
|
|
|
|
{
|
|
|
|
#ifdef KPROF
|
|
|
|
tcp_acounts[tp->t_state][req]++;
|
|
|
|
#endif
|
|
|
|
#ifdef TCP_DEBUG
|
|
|
|
return tp->t_state;
|
|
|
|
#endif
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
tcp_debug_trace(struct socket *so, struct tcpcb *tp, int ostate, int req)
|
|
|
|
{
|
|
|
|
#ifdef TCP_DEBUG
|
|
|
|
if (tp && (so->so_options & SO_DEBUG))
|
|
|
|
tcp_trace(TA_USER, ostate, tp, NULL, req);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2014-07-30 10:53:53 +04:00
|
|
|
static int
|
|
|
|
tcp_getpcb(struct socket *so, struct inpcb **inp,
|
|
|
|
struct in6pcb **in6p, struct tcpcb **tp)
|
|
|
|
{
|
2014-08-05 11:10:41 +04:00
|
|
|
|
|
|
|
KASSERT(solocked(so));
|
|
|
|
|
2014-07-30 10:53:53 +04:00
|
|
|
/*
|
|
|
|
* When a TCP is attached to a socket, then there will be
|
|
|
|
* a (struct inpcb) pointed at by the socket, and this
|
|
|
|
* structure will point at a subsidary (struct tcpcb).
|
|
|
|
*/
|
|
|
|
switch (so->so_proto->pr_domain->dom_family) {
|
|
|
|
#ifdef INET
|
|
|
|
case PF_INET:
|
|
|
|
*inp = sotoinpcb(so);
|
|
|
|
if (*inp == NULL)
|
|
|
|
return EINVAL;
|
|
|
|
*tp = intotcpcb(*inp);
|
|
|
|
break;
|
|
|
|
#endif
|
|
|
|
#ifdef INET6
|
|
|
|
case PF_INET6:
|
|
|
|
*in6p = sotoin6pcb(so);
|
|
|
|
if (*in6p == NULL)
|
|
|
|
return EINVAL;
|
|
|
|
*tp = in6totcpcb(*in6p);
|
|
|
|
break;
|
|
|
|
#endif
|
|
|
|
default:
|
|
|
|
return EAFNOSUPPORT;
|
|
|
|
}
|
|
|
|
|
|
|
|
KASSERT(tp != NULL);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2007-06-20 19:29:17 +04:00
|
|
|
static void
|
|
|
|
change_keepalive(struct socket *so, struct tcpcb *tp)
|
|
|
|
{
|
|
|
|
tp->t_maxidle = tp->t_keepcnt * tp->t_keepintvl;
|
|
|
|
TCP_TIMER_DISARM(tp, TCPT_KEEP);
|
|
|
|
TCP_TIMER_DISARM(tp, TCPT_2MSL);
|
|
|
|
|
|
|
|
if (tp->t_state == TCPS_SYN_RECEIVED ||
|
|
|
|
tp->t_state == TCPS_SYN_SENT) {
|
|
|
|
TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepinit);
|
|
|
|
} else if (so->so_options & SO_KEEPALIVE &&
|
|
|
|
tp->t_state <= TCPS_CLOSE_WAIT) {
|
|
|
|
TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepintvl);
|
|
|
|
} else {
|
|
|
|
TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepidle);
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((tp->t_state == TCPS_FIN_WAIT_2) && (tp->t_maxidle > 0))
|
|
|
|
TCP_TIMER_ARM(tp, TCPT_2MSL, tp->t_maxidle);
|
|
|
|
}
|
|
|
|
|
2015-02-14 15:57:52 +03:00
|
|
|
/*
|
|
|
|
* Export TCP internal state information via a struct tcp_info, based on the
|
|
|
|
* Linux 2.6 API. Not ABI compatible as our constants are mapped differently
|
|
|
|
* (TCP state machine, etc). We export all information using FreeBSD-native
|
|
|
|
* constants -- for example, the numeric values for tcpi_state will differ
|
|
|
|
* from Linux.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti)
|
|
|
|
{
|
|
|
|
|
|
|
|
bzero(ti, sizeof(*ti));
|
|
|
|
|
|
|
|
ti->tcpi_state = tp->t_state;
|
|
|
|
if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP))
|
|
|
|
ti->tcpi_options |= TCPI_OPT_TIMESTAMPS;
|
|
|
|
if (tp->t_flags & TF_SACK_PERMIT)
|
|
|
|
ti->tcpi_options |= TCPI_OPT_SACK;
|
|
|
|
if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) {
|
|
|
|
ti->tcpi_options |= TCPI_OPT_WSCALE;
|
|
|
|
ti->tcpi_snd_wscale = tp->snd_scale;
|
|
|
|
ti->tcpi_rcv_wscale = tp->rcv_scale;
|
|
|
|
}
|
|
|
|
if (tp->t_flags & TF_ECN_PERMIT) {
|
|
|
|
ti->tcpi_options |= TCPI_OPT_ECN;
|
|
|
|
}
|
|
|
|
|
|
|
|
ti->tcpi_rto = tp->t_rxtcur * tick;
|
|
|
|
ti->tcpi_last_data_recv = (long)(hardclock_ticks -
|
|
|
|
(int)tp->t_rcvtime) * tick;
|
|
|
|
ti->tcpi_rtt = ((u_int64_t)tp->t_srtt * tick) >> TCP_RTT_SHIFT;
|
|
|
|
ti->tcpi_rttvar = ((u_int64_t)tp->t_rttvar * tick) >> TCP_RTTVAR_SHIFT;
|
|
|
|
|
|
|
|
ti->tcpi_snd_ssthresh = tp->snd_ssthresh;
|
|
|
|
/* Linux API wants these in # of segments, apparently */
|
|
|
|
ti->tcpi_snd_cwnd = tp->snd_cwnd / tp->t_segsz;
|
|
|
|
ti->tcpi_snd_wnd = tp->snd_wnd / tp->t_segsz;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* FreeBSD-specific extension fields for tcp_info.
|
|
|
|
*/
|
|
|
|
ti->tcpi_rcv_space = tp->rcv_wnd;
|
|
|
|
ti->tcpi_rcv_nxt = tp->rcv_nxt;
|
|
|
|
ti->tcpi_snd_bwnd = 0; /* Unused, kept for compat. */
|
|
|
|
ti->tcpi_snd_nxt = tp->snd_nxt;
|
|
|
|
ti->tcpi_snd_mss = tp->t_segsz;
|
|
|
|
ti->tcpi_rcv_mss = tp->t_segsz;
|
|
|
|
#ifdef TF_TOE
|
|
|
|
if (tp->t_flags & TF_TOE)
|
|
|
|
ti->tcpi_options |= TCPI_OPT_TOE;
|
|
|
|
#endif
|
|
|
|
/* From the redundant department of redundancies... */
|
|
|
|
ti->__tcpi_retransmits = ti->__tcpi_retrans =
|
|
|
|
ti->tcpi_snd_rexmitpack = tp->t_sndrexmitpack;
|
|
|
|
|
|
|
|
ti->tcpi_rcv_ooopack = tp->t_rcvoopack;
|
|
|
|
ti->tcpi_snd_zerowin = tp->t_sndzerowin;
|
|
|
|
}
|
|
|
|
|
1994-01-09 02:07:16 +03:00
|
|
|
int
|
2008-08-06 19:01:23 +04:00
|
|
|
tcp_ctloutput(int op, struct socket *so, struct sockopt *sopt)
|
1993-03-21 12:45:37 +03:00
|
|
|
{
|
1994-05-13 10:02:48 +04:00
|
|
|
int error = 0, s;
|
|
|
|
struct inpcb *inp;
|
1999-07-01 12:12:45 +04:00
|
|
|
#ifdef INET6
|
2000-03-30 16:51:13 +04:00
|
|
|
struct in6pcb *in6p;
|
1999-07-01 12:12:45 +04:00
|
|
|
#endif
|
2000-03-30 16:51:13 +04:00
|
|
|
struct tcpcb *tp;
|
2015-02-14 15:57:52 +03:00
|
|
|
struct tcp_info ti;
|
2007-06-20 19:29:17 +04:00
|
|
|
u_int ui;
|
1999-07-01 12:12:45 +04:00
|
|
|
int family; /* family of the socket */
|
2008-08-06 19:01:23 +04:00
|
|
|
int level, optname, optval;
|
|
|
|
|
|
|
|
level = sopt->sopt_level;
|
|
|
|
optname = sopt->sopt_name;
|
1999-07-01 12:12:45 +04:00
|
|
|
|
|
|
|
family = so->so_proto->pr_domain->dom_family;
|
1993-03-21 12:45:37 +03:00
|
|
|
|
1995-08-13 03:59:09 +04:00
|
|
|
s = splsoftnet();
|
1999-07-01 12:12:45 +04:00
|
|
|
switch (family) {
|
2000-10-17 07:06:42 +04:00
|
|
|
#ifdef INET
|
1999-07-01 12:12:45 +04:00
|
|
|
case PF_INET:
|
|
|
|
inp = sotoinpcb(so);
|
|
|
|
#ifdef INET6
|
|
|
|
in6p = NULL;
|
|
|
|
#endif
|
|
|
|
break;
|
2000-10-17 07:06:42 +04:00
|
|
|
#endif
|
1999-07-01 12:12:45 +04:00
|
|
|
#ifdef INET6
|
|
|
|
case PF_INET6:
|
|
|
|
inp = NULL;
|
|
|
|
in6p = sotoin6pcb(so);
|
|
|
|
break;
|
|
|
|
#endif
|
|
|
|
default:
|
|
|
|
splx(s);
|
2006-11-10 16:19:16 +03:00
|
|
|
panic("%s: af %d", __func__, family);
|
1999-07-01 12:12:45 +04:00
|
|
|
}
|
|
|
|
#ifndef INET6
|
|
|
|
if (inp == NULL)
|
|
|
|
#else
|
|
|
|
if (inp == NULL && in6p == NULL)
|
|
|
|
#endif
|
|
|
|
{
|
1994-05-13 10:02:48 +04:00
|
|
|
splx(s);
|
|
|
|
return (ECONNRESET);
|
|
|
|
}
|
|
|
|
if (level != IPPROTO_TCP) {
|
1999-07-01 12:12:45 +04:00
|
|
|
switch (family) {
|
2000-10-17 07:06:42 +04:00
|
|
|
#ifdef INET
|
1999-07-01 12:12:45 +04:00
|
|
|
case PF_INET:
|
2008-08-06 19:01:23 +04:00
|
|
|
error = ip_ctloutput(op, so, sopt);
|
1999-07-01 12:12:45 +04:00
|
|
|
break;
|
2000-10-17 07:06:42 +04:00
|
|
|
#endif
|
1999-07-01 12:12:45 +04:00
|
|
|
#ifdef INET6
|
|
|
|
case PF_INET6:
|
2008-08-06 19:01:23 +04:00
|
|
|
error = ip6_ctloutput(op, so, sopt);
|
1999-07-01 12:12:45 +04:00
|
|
|
break;
|
|
|
|
#endif
|
|
|
|
}
|
1994-05-13 10:02:48 +04:00
|
|
|
splx(s);
|
|
|
|
return (error);
|
|
|
|
}
|
1999-07-01 12:12:45 +04:00
|
|
|
if (inp)
|
|
|
|
tp = intotcpcb(inp);
|
|
|
|
#ifdef INET6
|
|
|
|
else if (in6p)
|
|
|
|
tp = in6totcpcb(in6p);
|
|
|
|
#endif
|
|
|
|
else
|
|
|
|
tp = NULL;
|
1993-03-21 12:45:37 +03:00
|
|
|
|
|
|
|
switch (op) {
|
|
|
|
case PRCO_SETOPT:
|
|
|
|
switch (optname) {
|
Initial commit of a port of the FreeBSD implementation of RFC 2385
(MD5 signatures for TCP, as used with BGP). Credit for original
FreeBSD code goes to Bruce M. Simpson, with FreeBSD sponsorship
credited to sentex.net. Shortening of the setsockopt() name
attributed to Vincent Jardin.
This commit is a minimal, working version of the FreeBSD code, as
MFC'ed to FreeBSD-4. It has received minimal testing with a ttcp
modified to set the TCP-MD5 option; BMS's additions to tcpdump-current
(tcpdump -M) confirm that the MD5 signatures are correct. Committed
as-is for further testing between a NetBSD BGP speaker (e.g., quagga)
and industry-standard BGP speakers (e.g., Cisco, Juniper).
NOTE: This version has two potential flaws. First, I do see any code
that verifies recieved TCP-MD5 signatures. Second, the TCP-MD5
options are internally padded and assumed to be 32-bit aligned. A more
space-efficient scheme is to pack all TCP options densely (and
possibly unaligned) into the TCP header ; then do one final padding to
a 4-byte boundary. Pre-existing comments note that accounting for
TCP-option space when we add SACK is yet to be done. For now, I'm
punting on that; we can solve it properly, in a way that will handle
SACK blocks, as a separate exercise.
In case a pullup to NetBSD-2 is requested, this adds sys/netipsec/xform_tcp.c
,and modifies:
sys/net/pfkeyv2.h,v 1.15
sys/netinet/files.netinet,v 1.5
sys/netinet/ip.h,v 1.25
sys/netinet/tcp.h,v 1.15
sys/netinet/tcp_input.c,v 1.200
sys/netinet/tcp_output.c,v 1.109
sys/netinet/tcp_subr.c,v 1.165
sys/netinet/tcp_usrreq.c,v 1.89
sys/netinet/tcp_var.h,v 1.109
sys/netipsec/files.netipsec,v 1.3
sys/netipsec/ipsec.c,v 1.11
sys/netipsec/ipsec.h,v 1.7
sys/netipsec/key.c,v 1.11
share/man/man4/tcp.4,v 1.16
lib/libipsec/pfkey.c,v 1.20
lib/libipsec/pfkey_dump.c,v 1.17
lib/libipsec/policy_token.l,v 1.8
sbin/setkey/parse.y,v 1.14
sbin/setkey/setkey.8,v 1.27
sbin/setkey/token.l,v 1.15
Note that the preceding two revisions to tcp.4 will be
required to cleanly apply this diff.
2004-04-26 02:25:03 +04:00
|
|
|
#ifdef TCP_SIGNATURE
|
|
|
|
case TCP_MD5SIG:
|
2008-08-06 19:01:23 +04:00
|
|
|
error = sockopt_getint(sopt, &optval);
|
Initial commit of a port of the FreeBSD implementation of RFC 2385
(MD5 signatures for TCP, as used with BGP). Credit for original
FreeBSD code goes to Bruce M. Simpson, with FreeBSD sponsorship
credited to sentex.net. Shortening of the setsockopt() name
attributed to Vincent Jardin.
This commit is a minimal, working version of the FreeBSD code, as
MFC'ed to FreeBSD-4. It has received minimal testing with a ttcp
modified to set the TCP-MD5 option; BMS's additions to tcpdump-current
(tcpdump -M) confirm that the MD5 signatures are correct. Committed
as-is for further testing between a NetBSD BGP speaker (e.g., quagga)
and industry-standard BGP speakers (e.g., Cisco, Juniper).
NOTE: This version has two potential flaws. First, I do see any code
that verifies recieved TCP-MD5 signatures. Second, the TCP-MD5
options are internally padded and assumed to be 32-bit aligned. A more
space-efficient scheme is to pack all TCP options densely (and
possibly unaligned) into the TCP header ; then do one final padding to
a 4-byte boundary. Pre-existing comments note that accounting for
TCP-option space when we add SACK is yet to be done. For now, I'm
punting on that; we can solve it properly, in a way that will handle
SACK blocks, as a separate exercise.
In case a pullup to NetBSD-2 is requested, this adds sys/netipsec/xform_tcp.c
,and modifies:
sys/net/pfkeyv2.h,v 1.15
sys/netinet/files.netinet,v 1.5
sys/netinet/ip.h,v 1.25
sys/netinet/tcp.h,v 1.15
sys/netinet/tcp_input.c,v 1.200
sys/netinet/tcp_output.c,v 1.109
sys/netinet/tcp_subr.c,v 1.165
sys/netinet/tcp_usrreq.c,v 1.89
sys/netinet/tcp_var.h,v 1.109
sys/netipsec/files.netipsec,v 1.3
sys/netipsec/ipsec.c,v 1.11
sys/netipsec/ipsec.h,v 1.7
sys/netipsec/key.c,v 1.11
share/man/man4/tcp.4,v 1.16
lib/libipsec/pfkey.c,v 1.20
lib/libipsec/pfkey_dump.c,v 1.17
lib/libipsec/policy_token.l,v 1.8
sbin/setkey/parse.y,v 1.14
sbin/setkey/setkey.8,v 1.27
sbin/setkey/token.l,v 1.15
Note that the preceding two revisions to tcp.4 will be
required to cleanly apply this diff.
2004-04-26 02:25:03 +04:00
|
|
|
if (error)
|
|
|
|
break;
|
2008-08-06 19:01:23 +04:00
|
|
|
if (optval > 0)
|
Initial commit of a port of the FreeBSD implementation of RFC 2385
(MD5 signatures for TCP, as used with BGP). Credit for original
FreeBSD code goes to Bruce M. Simpson, with FreeBSD sponsorship
credited to sentex.net. Shortening of the setsockopt() name
attributed to Vincent Jardin.
This commit is a minimal, working version of the FreeBSD code, as
MFC'ed to FreeBSD-4. It has received minimal testing with a ttcp
modified to set the TCP-MD5 option; BMS's additions to tcpdump-current
(tcpdump -M) confirm that the MD5 signatures are correct. Committed
as-is for further testing between a NetBSD BGP speaker (e.g., quagga)
and industry-standard BGP speakers (e.g., Cisco, Juniper).
NOTE: This version has two potential flaws. First, I do see any code
that verifies recieved TCP-MD5 signatures. Second, the TCP-MD5
options are internally padded and assumed to be 32-bit aligned. A more
space-efficient scheme is to pack all TCP options densely (and
possibly unaligned) into the TCP header ; then do one final padding to
a 4-byte boundary. Pre-existing comments note that accounting for
TCP-option space when we add SACK is yet to be done. For now, I'm
punting on that; we can solve it properly, in a way that will handle
SACK blocks, as a separate exercise.
In case a pullup to NetBSD-2 is requested, this adds sys/netipsec/xform_tcp.c
,and modifies:
sys/net/pfkeyv2.h,v 1.15
sys/netinet/files.netinet,v 1.5
sys/netinet/ip.h,v 1.25
sys/netinet/tcp.h,v 1.15
sys/netinet/tcp_input.c,v 1.200
sys/netinet/tcp_output.c,v 1.109
sys/netinet/tcp_subr.c,v 1.165
sys/netinet/tcp_usrreq.c,v 1.89
sys/netinet/tcp_var.h,v 1.109
sys/netipsec/files.netipsec,v 1.3
sys/netipsec/ipsec.c,v 1.11
sys/netipsec/ipsec.h,v 1.7
sys/netipsec/key.c,v 1.11
share/man/man4/tcp.4,v 1.16
lib/libipsec/pfkey.c,v 1.20
lib/libipsec/pfkey_dump.c,v 1.17
lib/libipsec/policy_token.l,v 1.8
sbin/setkey/parse.y,v 1.14
sbin/setkey/setkey.8,v 1.27
sbin/setkey/token.l,v 1.15
Note that the preceding two revisions to tcp.4 will be
required to cleanly apply this diff.
2004-04-26 02:25:03 +04:00
|
|
|
tp->t_flags |= TF_SIGNATURE;
|
2004-05-18 18:44:14 +04:00
|
|
|
else
|
Initial commit of a port of the FreeBSD implementation of RFC 2385
(MD5 signatures for TCP, as used with BGP). Credit for original
FreeBSD code goes to Bruce M. Simpson, with FreeBSD sponsorship
credited to sentex.net. Shortening of the setsockopt() name
attributed to Vincent Jardin.
This commit is a minimal, working version of the FreeBSD code, as
MFC'ed to FreeBSD-4. It has received minimal testing with a ttcp
modified to set the TCP-MD5 option; BMS's additions to tcpdump-current
(tcpdump -M) confirm that the MD5 signatures are correct. Committed
as-is for further testing between a NetBSD BGP speaker (e.g., quagga)
and industry-standard BGP speakers (e.g., Cisco, Juniper).
NOTE: This version has two potential flaws. First, I do see any code
that verifies recieved TCP-MD5 signatures. Second, the TCP-MD5
options are internally padded and assumed to be 32-bit aligned. A more
space-efficient scheme is to pack all TCP options densely (and
possibly unaligned) into the TCP header ; then do one final padding to
a 4-byte boundary. Pre-existing comments note that accounting for
TCP-option space when we add SACK is yet to be done. For now, I'm
punting on that; we can solve it properly, in a way that will handle
SACK blocks, as a separate exercise.
In case a pullup to NetBSD-2 is requested, this adds sys/netipsec/xform_tcp.c
,and modifies:
sys/net/pfkeyv2.h,v 1.15
sys/netinet/files.netinet,v 1.5
sys/netinet/ip.h,v 1.25
sys/netinet/tcp.h,v 1.15
sys/netinet/tcp_input.c,v 1.200
sys/netinet/tcp_output.c,v 1.109
sys/netinet/tcp_subr.c,v 1.165
sys/netinet/tcp_usrreq.c,v 1.89
sys/netinet/tcp_var.h,v 1.109
sys/netipsec/files.netipsec,v 1.3
sys/netipsec/ipsec.c,v 1.11
sys/netipsec/ipsec.h,v 1.7
sys/netipsec/key.c,v 1.11
share/man/man4/tcp.4,v 1.16
lib/libipsec/pfkey.c,v 1.20
lib/libipsec/pfkey_dump.c,v 1.17
lib/libipsec/policy_token.l,v 1.8
sbin/setkey/parse.y,v 1.14
sbin/setkey/setkey.8,v 1.27
sbin/setkey/token.l,v 1.15
Note that the preceding two revisions to tcp.4 will be
required to cleanly apply this diff.
2004-04-26 02:25:03 +04:00
|
|
|
tp->t_flags &= ~TF_SIGNATURE;
|
|
|
|
break;
|
|
|
|
#endif /* TCP_SIGNATURE */
|
|
|
|
|
1993-03-21 12:45:37 +03:00
|
|
|
case TCP_NODELAY:
|
2008-08-06 19:01:23 +04:00
|
|
|
error = sockopt_getint(sopt, &optval);
|
|
|
|
if (error)
|
|
|
|
break;
|
|
|
|
if (optval)
|
1993-03-21 12:45:37 +03:00
|
|
|
tp->t_flags |= TF_NODELAY;
|
|
|
|
else
|
|
|
|
tp->t_flags &= ~TF_NODELAY;
|
|
|
|
break;
|
|
|
|
|
1994-05-13 10:02:48 +04:00
|
|
|
case TCP_MAXSEG:
|
2008-08-06 19:01:23 +04:00
|
|
|
error = sockopt_getint(sopt, &optval);
|
|
|
|
if (error)
|
|
|
|
break;
|
|
|
|
if (optval > 0 && optval <= tp->t_peermss)
|
|
|
|
tp->t_peermss = optval; /* limit on send size */
|
1994-05-13 10:02:48 +04:00
|
|
|
else
|
|
|
|
error = EINVAL;
|
|
|
|
break;
|
2006-10-09 20:27:07 +04:00
|
|
|
#ifdef notyet
|
|
|
|
case TCP_CONGCTL:
|
2008-08-06 19:01:23 +04:00
|
|
|
/* XXX string overflow XXX */
|
|
|
|
error = tcp_congctl_select(tp, sopt->sopt_data);
|
2006-10-09 20:27:07 +04:00
|
|
|
break;
|
2008-08-06 19:01:23 +04:00
|
|
|
#endif
|
1994-05-13 10:02:48 +04:00
|
|
|
|
2007-06-20 19:29:17 +04:00
|
|
|
case TCP_KEEPIDLE:
|
2008-08-06 19:01:23 +04:00
|
|
|
error = sockopt_get(sopt, &ui, sizeof(ui));
|
|
|
|
if (error)
|
|
|
|
break;
|
|
|
|
if (ui > 0) {
|
2007-06-20 19:29:17 +04:00
|
|
|
tp->t_keepidle = ui;
|
|
|
|
change_keepalive(so, tp);
|
|
|
|
} else
|
|
|
|
error = EINVAL;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case TCP_KEEPINTVL:
|
2008-08-06 19:01:23 +04:00
|
|
|
error = sockopt_get(sopt, &ui, sizeof(ui));
|
|
|
|
if (error)
|
|
|
|
break;
|
|
|
|
if (ui > 0) {
|
2007-06-20 19:29:17 +04:00
|
|
|
tp->t_keepintvl = ui;
|
|
|
|
change_keepalive(so, tp);
|
|
|
|
} else
|
|
|
|
error = EINVAL;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case TCP_KEEPCNT:
|
2008-08-06 19:01:23 +04:00
|
|
|
error = sockopt_get(sopt, &ui, sizeof(ui));
|
|
|
|
if (error)
|
|
|
|
break;
|
|
|
|
if (ui > 0) {
|
2007-06-20 19:29:17 +04:00
|
|
|
tp->t_keepcnt = ui;
|
|
|
|
change_keepalive(so, tp);
|
|
|
|
} else
|
|
|
|
error = EINVAL;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case TCP_KEEPINIT:
|
2008-08-06 19:01:23 +04:00
|
|
|
error = sockopt_get(sopt, &ui, sizeof(ui));
|
|
|
|
if (error)
|
|
|
|
break;
|
|
|
|
if (ui > 0) {
|
2007-06-20 19:29:17 +04:00
|
|
|
tp->t_keepinit = ui;
|
|
|
|
change_keepalive(so, tp);
|
|
|
|
} else
|
|
|
|
error = EINVAL;
|
|
|
|
break;
|
|
|
|
|
1993-03-21 12:45:37 +03:00
|
|
|
default:
|
1994-05-13 10:02:48 +04:00
|
|
|
error = ENOPROTOOPT;
|
1993-03-21 12:45:37 +03:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case PRCO_GETOPT:
|
|
|
|
switch (optname) {
|
Initial commit of a port of the FreeBSD implementation of RFC 2385
(MD5 signatures for TCP, as used with BGP). Credit for original
FreeBSD code goes to Bruce M. Simpson, with FreeBSD sponsorship
credited to sentex.net. Shortening of the setsockopt() name
attributed to Vincent Jardin.
This commit is a minimal, working version of the FreeBSD code, as
MFC'ed to FreeBSD-4. It has received minimal testing with a ttcp
modified to set the TCP-MD5 option; BMS's additions to tcpdump-current
(tcpdump -M) confirm that the MD5 signatures are correct. Committed
as-is for further testing between a NetBSD BGP speaker (e.g., quagga)
and industry-standard BGP speakers (e.g., Cisco, Juniper).
NOTE: This version has two potential flaws. First, I do see any code
that verifies recieved TCP-MD5 signatures. Second, the TCP-MD5
options are internally padded and assumed to be 32-bit aligned. A more
space-efficient scheme is to pack all TCP options densely (and
possibly unaligned) into the TCP header ; then do one final padding to
a 4-byte boundary. Pre-existing comments note that accounting for
TCP-option space when we add SACK is yet to be done. For now, I'm
punting on that; we can solve it properly, in a way that will handle
SACK blocks, as a separate exercise.
In case a pullup to NetBSD-2 is requested, this adds sys/netipsec/xform_tcp.c
,and modifies:
sys/net/pfkeyv2.h,v 1.15
sys/netinet/files.netinet,v 1.5
sys/netinet/ip.h,v 1.25
sys/netinet/tcp.h,v 1.15
sys/netinet/tcp_input.c,v 1.200
sys/netinet/tcp_output.c,v 1.109
sys/netinet/tcp_subr.c,v 1.165
sys/netinet/tcp_usrreq.c,v 1.89
sys/netinet/tcp_var.h,v 1.109
sys/netipsec/files.netipsec,v 1.3
sys/netipsec/ipsec.c,v 1.11
sys/netipsec/ipsec.h,v 1.7
sys/netipsec/key.c,v 1.11
share/man/man4/tcp.4,v 1.16
lib/libipsec/pfkey.c,v 1.20
lib/libipsec/pfkey_dump.c,v 1.17
lib/libipsec/policy_token.l,v 1.8
sbin/setkey/parse.y,v 1.14
sbin/setkey/setkey.8,v 1.27
sbin/setkey/token.l,v 1.15
Note that the preceding two revisions to tcp.4 will be
required to cleanly apply this diff.
2004-04-26 02:25:03 +04:00
|
|
|
#ifdef TCP_SIGNATURE
|
|
|
|
case TCP_MD5SIG:
|
2008-08-06 19:01:23 +04:00
|
|
|
optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0;
|
|
|
|
error = sockopt_set(sopt, &optval, sizeof(optval));
|
Initial commit of a port of the FreeBSD implementation of RFC 2385
(MD5 signatures for TCP, as used with BGP). Credit for original
FreeBSD code goes to Bruce M. Simpson, with FreeBSD sponsorship
credited to sentex.net. Shortening of the setsockopt() name
attributed to Vincent Jardin.
This commit is a minimal, working version of the FreeBSD code, as
MFC'ed to FreeBSD-4. It has received minimal testing with a ttcp
modified to set the TCP-MD5 option; BMS's additions to tcpdump-current
(tcpdump -M) confirm that the MD5 signatures are correct. Committed
as-is for further testing between a NetBSD BGP speaker (e.g., quagga)
and industry-standard BGP speakers (e.g., Cisco, Juniper).
NOTE: This version has two potential flaws. First, I do see any code
that verifies recieved TCP-MD5 signatures. Second, the TCP-MD5
options are internally padded and assumed to be 32-bit aligned. A more
space-efficient scheme is to pack all TCP options densely (and
possibly unaligned) into the TCP header ; then do one final padding to
a 4-byte boundary. Pre-existing comments note that accounting for
TCP-option space when we add SACK is yet to be done. For now, I'm
punting on that; we can solve it properly, in a way that will handle
SACK blocks, as a separate exercise.
In case a pullup to NetBSD-2 is requested, this adds sys/netipsec/xform_tcp.c
,and modifies:
sys/net/pfkeyv2.h,v 1.15
sys/netinet/files.netinet,v 1.5
sys/netinet/ip.h,v 1.25
sys/netinet/tcp.h,v 1.15
sys/netinet/tcp_input.c,v 1.200
sys/netinet/tcp_output.c,v 1.109
sys/netinet/tcp_subr.c,v 1.165
sys/netinet/tcp_usrreq.c,v 1.89
sys/netinet/tcp_var.h,v 1.109
sys/netipsec/files.netipsec,v 1.3
sys/netipsec/ipsec.c,v 1.11
sys/netipsec/ipsec.h,v 1.7
sys/netipsec/key.c,v 1.11
share/man/man4/tcp.4,v 1.16
lib/libipsec/pfkey.c,v 1.20
lib/libipsec/pfkey_dump.c,v 1.17
lib/libipsec/policy_token.l,v 1.8
sbin/setkey/parse.y,v 1.14
sbin/setkey/setkey.8,v 1.27
sbin/setkey/token.l,v 1.15
Note that the preceding two revisions to tcp.4 will be
required to cleanly apply this diff.
2004-04-26 02:25:03 +04:00
|
|
|
break;
|
|
|
|
#endif
|
1993-03-21 12:45:37 +03:00
|
|
|
case TCP_NODELAY:
|
2008-08-06 19:01:23 +04:00
|
|
|
optval = tp->t_flags & TF_NODELAY;
|
|
|
|
error = sockopt_set(sopt, &optval, sizeof(optval));
|
1993-03-21 12:45:37 +03:00
|
|
|
break;
|
|
|
|
case TCP_MAXSEG:
|
2008-08-06 19:01:23 +04:00
|
|
|
optval = tp->t_peermss;
|
|
|
|
error = sockopt_set(sopt, &optval, sizeof(optval));
|
1993-03-21 12:45:37 +03:00
|
|
|
break;
|
2015-02-14 15:57:52 +03:00
|
|
|
case TCP_INFO:
|
|
|
|
tcp_fill_info(tp, &ti);
|
|
|
|
error = sockopt_set(sopt, &ti, sizeof ti);
|
|
|
|
break;
|
2006-10-09 20:27:07 +04:00
|
|
|
#ifdef notyet
|
|
|
|
case TCP_CONGCTL:
|
|
|
|
break;
|
|
|
|
#endif
|
1993-03-21 12:45:37 +03:00
|
|
|
default:
|
1994-05-13 10:02:48 +04:00
|
|
|
error = ENOPROTOOPT;
|
1993-03-21 12:45:37 +03:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
1994-05-13 10:02:48 +04:00
|
|
|
splx(s);
|
1993-03-21 12:45:37 +03:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
1994-10-13 17:26:15 +03:00
|
|
|
#ifndef TCP_SENDSPACE
|
2003-09-30 01:39:35 +04:00
|
|
|
#define TCP_SENDSPACE 1024*32
|
1994-10-13 17:26:15 +03:00
|
|
|
#endif
|
1997-07-29 02:18:48 +04:00
|
|
|
int tcp_sendspace = TCP_SENDSPACE;
|
1994-10-13 17:26:15 +03:00
|
|
|
#ifndef TCP_RECVSPACE
|
2003-09-30 01:39:35 +04:00
|
|
|
#define TCP_RECVSPACE 1024*32
|
1994-10-13 17:26:15 +03:00
|
|
|
#endif
|
1997-07-29 02:18:48 +04:00
|
|
|
int tcp_recvspace = TCP_RECVSPACE;
|
1993-03-21 12:45:37 +03:00
|
|
|
|
|
|
|
/*
|
2014-05-19 06:51:24 +04:00
|
|
|
* tcp_attach: attach TCP protocol to socket, allocating internet protocol
|
|
|
|
* control block, TCP control block, buffer space and entering LISTEN state
|
|
|
|
* if to accept connections.
|
1993-03-21 12:45:37 +03:00
|
|
|
*/
|
2014-05-19 06:51:24 +04:00
|
|
|
static int
|
|
|
|
tcp_attach(struct socket *so, int proto)
|
1993-03-21 12:45:37 +03:00
|
|
|
{
|
2000-03-30 16:51:13 +04:00
|
|
|
struct tcpcb *tp;
|
1993-03-21 12:45:37 +03:00
|
|
|
struct inpcb *inp;
|
1999-07-01 12:12:45 +04:00
|
|
|
#ifdef INET6
|
|
|
|
struct in6pcb *in6p;
|
|
|
|
#endif
|
2014-05-19 06:51:24 +04:00
|
|
|
int s, error, family;
|
|
|
|
|
|
|
|
/* Assign the lock (must happen even if we will error out). */
|
|
|
|
s = splsoftnet();
|
|
|
|
sosetlock(so);
|
|
|
|
KASSERT(solocked(so));
|
1999-07-01 12:12:45 +04:00
|
|
|
|
|
|
|
family = so->so_proto->pr_domain->dom_family;
|
2014-05-19 06:51:24 +04:00
|
|
|
switch (family) {
|
|
|
|
#ifdef INET
|
|
|
|
case PF_INET:
|
|
|
|
inp = sotoinpcb(so);
|
|
|
|
#ifdef INET6
|
|
|
|
in6p = NULL;
|
|
|
|
#endif
|
|
|
|
break;
|
|
|
|
#endif
|
|
|
|
#ifdef INET6
|
|
|
|
case PF_INET6:
|
|
|
|
inp = NULL;
|
|
|
|
in6p = sotoin6pcb(so);
|
|
|
|
break;
|
|
|
|
#endif
|
|
|
|
default:
|
|
|
|
error = EAFNOSUPPORT;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
KASSERT(inp == NULL);
|
|
|
|
#ifdef INET6
|
|
|
|
KASSERT(in6p == NULL);
|
|
|
|
#endif
|
1993-03-21 12:45:37 +03:00
|
|
|
|
2003-02-26 09:31:08 +03:00
|
|
|
#ifdef MBUFTRACE
|
2006-12-06 12:10:45 +03:00
|
|
|
so->so_mowner = &tcp_sock_mowner;
|
|
|
|
so->so_rcv.sb_mowner = &tcp_sock_rx_mowner;
|
|
|
|
so->so_snd.sb_mowner = &tcp_sock_tx_mowner;
|
2003-02-26 09:31:08 +03:00
|
|
|
#endif
|
1993-03-21 12:45:37 +03:00
|
|
|
if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
|
|
|
|
error = soreserve(so, tcp_sendspace, tcp_recvspace);
|
|
|
|
if (error)
|
2014-05-19 06:51:24 +04:00
|
|
|
goto out;
|
1993-03-21 12:45:37 +03:00
|
|
|
}
|
2007-08-02 06:42:40 +04:00
|
|
|
|
|
|
|
so->so_rcv.sb_flags |= SB_AUTOSIZE;
|
|
|
|
so->so_snd.sb_flags |= SB_AUTOSIZE;
|
|
|
|
|
1999-07-01 12:12:45 +04:00
|
|
|
switch (family) {
|
2000-10-17 07:06:42 +04:00
|
|
|
#ifdef INET
|
1999-07-01 12:12:45 +04:00
|
|
|
case PF_INET:
|
|
|
|
error = in_pcballoc(so, &tcbtable);
|
|
|
|
if (error)
|
2014-05-19 06:51:24 +04:00
|
|
|
goto out;
|
1999-07-01 12:12:45 +04:00
|
|
|
inp = sotoinpcb(so);
|
|
|
|
#ifdef INET6
|
|
|
|
in6p = NULL;
|
|
|
|
#endif
|
|
|
|
break;
|
2000-10-17 07:06:42 +04:00
|
|
|
#endif
|
1999-07-01 12:12:45 +04:00
|
|
|
#ifdef INET6
|
|
|
|
case PF_INET6:
|
2003-09-04 13:16:57 +04:00
|
|
|
error = in6_pcballoc(so, &tcbtable);
|
1999-07-01 12:12:45 +04:00
|
|
|
if (error)
|
2014-05-19 06:51:24 +04:00
|
|
|
goto out;
|
1999-07-01 12:12:45 +04:00
|
|
|
inp = NULL;
|
|
|
|
in6p = sotoin6pcb(so);
|
|
|
|
break;
|
|
|
|
#endif
|
|
|
|
default:
|
2014-05-19 06:51:24 +04:00
|
|
|
error = EAFNOSUPPORT;
|
|
|
|
goto out;
|
1999-07-01 12:12:45 +04:00
|
|
|
}
|
|
|
|
if (inp)
|
|
|
|
tp = tcp_newtcpcb(family, (void *)inp);
|
|
|
|
#ifdef INET6
|
|
|
|
else if (in6p)
|
|
|
|
tp = tcp_newtcpcb(family, (void *)in6p);
|
|
|
|
#endif
|
1999-07-02 16:45:32 +04:00
|
|
|
else
|
|
|
|
tp = NULL;
|
1999-07-01 12:12:45 +04:00
|
|
|
|
2014-05-19 06:51:24 +04:00
|
|
|
if (tp == NULL) {
|
1993-03-21 12:45:37 +03:00
|
|
|
int nofd = so->so_state & SS_NOFDREF; /* XXX */
|
|
|
|
|
|
|
|
so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */
|
2000-10-17 07:06:42 +04:00
|
|
|
#ifdef INET
|
1999-07-01 12:12:45 +04:00
|
|
|
if (inp)
|
|
|
|
in_pcbdetach(inp);
|
2000-10-17 07:06:42 +04:00
|
|
|
#endif
|
1999-07-01 12:12:45 +04:00
|
|
|
#ifdef INET6
|
2000-10-17 07:06:42 +04:00
|
|
|
if (in6p)
|
1999-07-01 12:12:45 +04:00
|
|
|
in6_pcbdetach(in6p);
|
|
|
|
#endif
|
1993-03-21 12:45:37 +03:00
|
|
|
so->so_state |= nofd;
|
2014-05-19 06:51:24 +04:00
|
|
|
error = ENOBUFS;
|
|
|
|
goto out;
|
1993-03-21 12:45:37 +03:00
|
|
|
}
|
|
|
|
tp->t_state = TCPS_CLOSED;
|
2014-05-19 06:51:24 +04:00
|
|
|
if ((so->so_options & SO_LINGER) && so->so_linger == 0) {
|
|
|
|
so->so_linger = TCP_LINGERTIME;
|
|
|
|
}
|
|
|
|
out:
|
|
|
|
KASSERT(solocked(so));
|
|
|
|
splx(s);
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
tcp_detach(struct socket *so)
|
|
|
|
{
|
2014-07-30 10:53:53 +04:00
|
|
|
struct inpcb *inp = NULL;
|
|
|
|
struct in6pcb *in6p = NULL;
|
2014-05-19 06:51:24 +04:00
|
|
|
struct tcpcb *tp = NULL;
|
2014-07-30 10:53:53 +04:00
|
|
|
int s;
|
2014-05-19 06:51:24 +04:00
|
|
|
|
2014-07-30 10:53:53 +04:00
|
|
|
if (tcp_getpcb(so, &inp, &in6p, &tp) != 0)
|
2014-05-19 06:51:24 +04:00
|
|
|
return;
|
2014-07-30 10:53:53 +04:00
|
|
|
|
|
|
|
s = splsoftnet();
|
split PRU_DISCONNECT, PRU_SHUTDOWN and PRU_ABORT function out of
pr_generic() usrreq switches and put into separate functions
xxx_disconnect(struct socket *)
xxx_shutdown(struct socket *)
xxx_abort(struct socket *)
- always KASSERT(solocked(so)) even if not implemented
- replace calls to pr_generic() with req =
PRU_{DISCONNECT,SHUTDOWN,ABORT}
with calls to pr_{disconnect,shutdown,abort}() respectively
rename existing internal functions used to implement above functionality
to permit use of the names for xxx_{disconnect,shutdown,abort}().
- {l2cap,sco,rfcomm}_disconnect() ->
{l2cap,sco,rfcomm}_disconnect_pcb()
- {unp,rip,tcp}_disconnect() -> {unp,rip,tcp}_disconnect1()
- unp_shutdown() -> unp_shutdown1()
patch reviewed by rmind
2014-07-31 07:39:35 +04:00
|
|
|
(void)tcp_disconnect1(tp);
|
2014-05-19 06:51:24 +04:00
|
|
|
splx(s);
|
1993-03-21 12:45:37 +03:00
|
|
|
}
|
|
|
|
|
2014-07-09 18:41:42 +04:00
|
|
|
static int
|
2015-04-25 01:32:37 +03:00
|
|
|
tcp_accept(struct socket *so, struct sockaddr *nam)
|
2014-07-09 18:41:42 +04:00
|
|
|
{
|
|
|
|
struct inpcb *inp = NULL;
|
|
|
|
struct in6pcb *in6p = NULL;
|
|
|
|
struct tcpcb *tp = NULL;
|
|
|
|
int ostate = 0;
|
2014-07-30 10:53:53 +04:00
|
|
|
int error = 0;
|
2014-08-02 07:55:26 +04:00
|
|
|
int s;
|
2014-07-09 18:41:42 +04:00
|
|
|
|
2014-07-30 10:53:53 +04:00
|
|
|
if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0)
|
|
|
|
return error;
|
2014-07-09 18:41:42 +04:00
|
|
|
|
2014-07-30 10:53:53 +04:00
|
|
|
ostate = tcp_debug_capture(tp, PRU_ACCEPT);
|
2014-07-09 18:41:42 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Accept a connection. Essentially all the work is
|
|
|
|
* done at higher levels; just return the address
|
|
|
|
* of the peer, storing through addr.
|
|
|
|
*/
|
2014-08-02 07:55:26 +04:00
|
|
|
s = splsoftnet();
|
2014-07-09 18:41:42 +04:00
|
|
|
#ifdef INET
|
2014-07-10 18:05:19 +04:00
|
|
|
if (inp) {
|
2015-04-25 01:32:37 +03:00
|
|
|
in_setpeeraddr(inp, (struct sockaddr_in *)nam);
|
2014-07-10 18:05:19 +04:00
|
|
|
}
|
2014-07-09 18:41:42 +04:00
|
|
|
#endif
|
|
|
|
#ifdef INET6
|
2014-07-10 18:05:19 +04:00
|
|
|
if (in6p) {
|
2015-04-25 01:32:37 +03:00
|
|
|
in6_setpeeraddr(in6p, (struct sockaddr_in6 *)nam);
|
2014-07-10 18:05:19 +04:00
|
|
|
}
|
2014-07-09 18:41:42 +04:00
|
|
|
#endif
|
|
|
|
tcp_debug_trace(so, tp, ostate, PRU_ACCEPT);
|
2014-08-02 07:55:26 +04:00
|
|
|
splx(s);
|
2014-07-30 10:53:53 +04:00
|
|
|
|
2014-07-09 18:41:42 +04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-07-24 19:12:03 +04:00
|
|
|
static int
|
2015-04-03 23:01:07 +03:00
|
|
|
tcp_bind(struct socket *so, struct sockaddr *nam, struct lwp *l)
|
2014-07-24 19:12:03 +04:00
|
|
|
{
|
2014-07-30 10:53:53 +04:00
|
|
|
struct inpcb *inp = NULL;
|
|
|
|
struct in6pcb *in6p = NULL;
|
2015-04-03 23:01:07 +03:00
|
|
|
struct sockaddr_in *sin = (struct sockaddr_in *)nam;
|
|
|
|
#ifdef INET6
|
|
|
|
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
|
|
|
|
#endif /* INET6 */
|
2014-07-24 19:12:03 +04:00
|
|
|
struct tcpcb *tp = NULL;
|
|
|
|
int s;
|
|
|
|
int error = 0;
|
|
|
|
int ostate = 0;
|
|
|
|
|
2014-07-30 10:53:53 +04:00
|
|
|
if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0)
|
|
|
|
return error;
|
2014-07-24 19:12:03 +04:00
|
|
|
|
2014-07-30 10:53:53 +04:00
|
|
|
ostate = tcp_debug_capture(tp, PRU_BIND);
|
2014-07-24 19:12:03 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Give the socket an address.
|
|
|
|
*/
|
2014-07-30 10:53:53 +04:00
|
|
|
s = splsoftnet();
|
|
|
|
switch (so->so_proto->pr_domain->dom_family) {
|
2014-07-24 19:12:03 +04:00
|
|
|
#ifdef INET
|
|
|
|
case PF_INET:
|
2015-04-03 23:01:07 +03:00
|
|
|
error = in_pcbbind(inp, sin, l);
|
2014-07-24 19:12:03 +04:00
|
|
|
break;
|
|
|
|
#endif
|
|
|
|
#ifdef INET6
|
|
|
|
case PF_INET6:
|
2015-04-03 23:01:07 +03:00
|
|
|
error = in6_pcbbind(in6p, sin6, l);
|
2014-07-24 19:12:03 +04:00
|
|
|
if (!error) {
|
|
|
|
/* mapped addr case */
|
|
|
|
if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr))
|
|
|
|
tp->t_family = AF_INET;
|
|
|
|
else
|
|
|
|
tp->t_family = AF_INET6;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
tcp_debug_trace(so, tp, ostate, PRU_BIND);
|
|
|
|
splx(s);
|
2014-07-30 10:53:53 +04:00
|
|
|
|
|
|
|
return error;
|
2014-07-24 19:12:03 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2014-08-05 09:24:26 +04:00
|
|
|
tcp_listen(struct socket *so, struct lwp *l)
|
2014-07-24 19:12:03 +04:00
|
|
|
{
|
2014-07-30 10:53:53 +04:00
|
|
|
struct inpcb *inp = NULL;
|
|
|
|
struct in6pcb *in6p = NULL;
|
2014-07-24 19:12:03 +04:00
|
|
|
struct tcpcb *tp = NULL;
|
|
|
|
int error = 0;
|
|
|
|
int ostate = 0;
|
2014-08-02 07:55:26 +04:00
|
|
|
int s;
|
2014-07-24 19:12:03 +04:00
|
|
|
|
2014-07-30 10:53:53 +04:00
|
|
|
if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0)
|
|
|
|
return error;
|
2014-07-24 19:12:03 +04:00
|
|
|
|
2014-07-30 10:53:53 +04:00
|
|
|
ostate = tcp_debug_capture(tp, PRU_LISTEN);
|
2014-07-24 19:12:03 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Prepare to accept connections.
|
|
|
|
*/
|
2014-07-30 10:53:53 +04:00
|
|
|
s = splsoftnet();
|
2014-07-24 19:12:03 +04:00
|
|
|
#ifdef INET
|
|
|
|
if (inp && inp->inp_lport == 0) {
|
2014-08-05 09:24:26 +04:00
|
|
|
error = in_pcbbind(inp, NULL, l);
|
2014-07-24 19:12:03 +04:00
|
|
|
if (error)
|
|
|
|
goto release;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef INET6
|
|
|
|
if (in6p && in6p->in6p_lport == 0) {
|
2014-08-05 09:24:26 +04:00
|
|
|
error = in6_pcbbind(in6p, NULL, l);
|
2014-07-24 19:12:03 +04:00
|
|
|
if (error)
|
|
|
|
goto release;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
tp->t_state = TCPS_LISTEN;
|
|
|
|
|
|
|
|
release:
|
2014-07-30 10:53:53 +04:00
|
|
|
tcp_debug_trace(so, tp, ostate, PRU_LISTEN);
|
2014-07-24 19:12:03 +04:00
|
|
|
splx(s);
|
2014-07-30 10:53:53 +04:00
|
|
|
|
|
|
|
return error;
|
2014-07-24 19:12:03 +04:00
|
|
|
}
|
|
|
|
|
2014-07-30 14:04:25 +04:00
|
|
|
static int
|
2015-05-02 20:18:03 +03:00
|
|
|
tcp_connect(struct socket *so, struct sockaddr *nam, struct lwp *l)
|
2014-07-30 14:04:25 +04:00
|
|
|
{
|
|
|
|
struct inpcb *inp = NULL;
|
|
|
|
struct in6pcb *in6p = NULL;
|
|
|
|
struct tcpcb *tp = NULL;
|
|
|
|
int s;
|
|
|
|
int error = 0;
|
|
|
|
int ostate = 0;
|
|
|
|
|
|
|
|
if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0)
|
|
|
|
return error;
|
|
|
|
|
|
|
|
ostate = tcp_debug_capture(tp, PRU_CONNECT);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initiate connection to peer.
|
|
|
|
* Create a template for use in transmissions on this connection.
|
|
|
|
* Enter SYN_SENT state, and mark socket as connecting.
|
|
|
|
* Start keep-alive timer, and seed output sequence space.
|
|
|
|
* Send initial segment on connection.
|
|
|
|
*/
|
2014-08-02 07:55:26 +04:00
|
|
|
s = splsoftnet();
|
2014-07-30 14:04:25 +04:00
|
|
|
#ifdef INET
|
|
|
|
if (inp) {
|
|
|
|
if (inp->inp_lport == 0) {
|
2014-08-05 09:24:26 +04:00
|
|
|
error = in_pcbbind(inp, NULL, l);
|
2014-07-30 14:04:25 +04:00
|
|
|
if (error)
|
|
|
|
goto release;
|
|
|
|
}
|
2015-05-02 20:18:03 +03:00
|
|
|
error = in_pcbconnect(inp, (struct sockaddr_in *)nam, l);
|
2014-07-30 14:04:25 +04:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef INET6
|
|
|
|
if (in6p) {
|
|
|
|
if (in6p->in6p_lport == 0) {
|
2014-08-05 09:24:26 +04:00
|
|
|
error = in6_pcbbind(in6p, NULL, l);
|
2014-07-30 14:04:25 +04:00
|
|
|
if (error)
|
|
|
|
goto release;
|
|
|
|
}
|
2015-05-02 20:18:03 +03:00
|
|
|
error = in6_pcbconnect(in6p, (struct sockaddr_in6 *)nam, l);
|
2014-07-30 14:04:25 +04:00
|
|
|
if (!error) {
|
|
|
|
/* mapped addr case */
|
|
|
|
if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr))
|
|
|
|
tp->t_family = AF_INET;
|
|
|
|
else
|
|
|
|
tp->t_family = AF_INET6;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
if (error)
|
|
|
|
goto release;
|
|
|
|
tp->t_template = tcp_template(tp);
|
|
|
|
if (tp->t_template == 0) {
|
|
|
|
#ifdef INET
|
|
|
|
if (inp)
|
|
|
|
in_pcbdisconnect(inp);
|
|
|
|
#endif
|
|
|
|
#ifdef INET6
|
|
|
|
if (in6p)
|
|
|
|
in6_pcbdisconnect(in6p);
|
|
|
|
#endif
|
|
|
|
error = ENOBUFS;
|
|
|
|
goto release;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* Compute window scaling to request.
|
|
|
|
* XXX: This should be moved to tcp_output().
|
|
|
|
*/
|
|
|
|
while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
|
|
|
|
(TCP_MAXWIN << tp->request_r_scale) < sb_max)
|
|
|
|
tp->request_r_scale++;
|
|
|
|
soisconnecting(so);
|
|
|
|
TCP_STATINC(TCP_STAT_CONNATTEMPT);
|
|
|
|
tp->t_state = TCPS_SYN_SENT;
|
|
|
|
TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepinit);
|
|
|
|
tp->iss = tcp_new_iss(tp, 0);
|
|
|
|
tcp_sendseqinit(tp);
|
|
|
|
error = tcp_output(tp);
|
|
|
|
|
|
|
|
release:
|
|
|
|
tcp_debug_trace(so, tp, ostate, PRU_CONNECT);
|
|
|
|
splx(s);
|
|
|
|
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
2014-08-09 09:33:00 +04:00
|
|
|
static int
|
|
|
|
tcp_connect2(struct socket *so, struct socket *so2)
|
|
|
|
{
|
|
|
|
struct inpcb *inp = NULL;
|
|
|
|
struct in6pcb *in6p = NULL;
|
|
|
|
struct tcpcb *tp = NULL;
|
|
|
|
int error = 0;
|
|
|
|
int ostate = 0;
|
|
|
|
|
|
|
|
KASSERT(solocked(so));
|
|
|
|
|
|
|
|
if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0)
|
|
|
|
return error;
|
|
|
|
|
|
|
|
ostate = tcp_debug_capture(tp, PRU_CONNECT2);
|
|
|
|
|
|
|
|
tcp_debug_trace(so, tp, ostate, PRU_CONNECT2);
|
|
|
|
|
|
|
|
return EOPNOTSUPP;
|
|
|
|
}
|
|
|
|
|
split PRU_DISCONNECT, PRU_SHUTDOWN and PRU_ABORT function out of
pr_generic() usrreq switches and put into separate functions
xxx_disconnect(struct socket *)
xxx_shutdown(struct socket *)
xxx_abort(struct socket *)
- always KASSERT(solocked(so)) even if not implemented
- replace calls to pr_generic() with req =
PRU_{DISCONNECT,SHUTDOWN,ABORT}
with calls to pr_{disconnect,shutdown,abort}() respectively
rename existing internal functions used to implement above functionality
to permit use of the names for xxx_{disconnect,shutdown,abort}().
- {l2cap,sco,rfcomm}_disconnect() ->
{l2cap,sco,rfcomm}_disconnect_pcb()
- {unp,rip,tcp}_disconnect() -> {unp,rip,tcp}_disconnect1()
- unp_shutdown() -> unp_shutdown1()
patch reviewed by rmind
2014-07-31 07:39:35 +04:00
|
|
|
static int
|
|
|
|
tcp_disconnect(struct socket *so)
|
|
|
|
{
|
|
|
|
struct inpcb *inp = NULL;
|
|
|
|
struct in6pcb *in6p = NULL;
|
|
|
|
struct tcpcb *tp = NULL;
|
|
|
|
int error = 0;
|
|
|
|
int ostate = 0;
|
2014-08-02 07:55:26 +04:00
|
|
|
int s;
|
split PRU_DISCONNECT, PRU_SHUTDOWN and PRU_ABORT function out of
pr_generic() usrreq switches and put into separate functions
xxx_disconnect(struct socket *)
xxx_shutdown(struct socket *)
xxx_abort(struct socket *)
- always KASSERT(solocked(so)) even if not implemented
- replace calls to pr_generic() with req =
PRU_{DISCONNECT,SHUTDOWN,ABORT}
with calls to pr_{disconnect,shutdown,abort}() respectively
rename existing internal functions used to implement above functionality
to permit use of the names for xxx_{disconnect,shutdown,abort}().
- {l2cap,sco,rfcomm}_disconnect() ->
{l2cap,sco,rfcomm}_disconnect_pcb()
- {unp,rip,tcp}_disconnect() -> {unp,rip,tcp}_disconnect1()
- unp_shutdown() -> unp_shutdown1()
patch reviewed by rmind
2014-07-31 07:39:35 +04:00
|
|
|
|
|
|
|
if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0)
|
|
|
|
return error;
|
|
|
|
|
|
|
|
ostate = tcp_debug_capture(tp, PRU_DISCONNECT);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initiate disconnect from peer.
|
|
|
|
* If connection never passed embryonic stage, just drop;
|
|
|
|
* else if don't need to let data drain, then can just drop anyways,
|
|
|
|
* else have to begin TCP shutdown process: mark socket disconnecting,
|
|
|
|
* drain unread data, state switch to reflect user close, and
|
|
|
|
* send segment (e.g. FIN) to peer. Socket will be really disconnected
|
|
|
|
* when peer sends FIN and acks ours.
|
|
|
|
*
|
|
|
|
* SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
|
|
|
|
*/
|
|
|
|
s = splsoftnet();
|
|
|
|
tp = tcp_disconnect1(tp);
|
|
|
|
tcp_debug_trace(so, tp, ostate, PRU_DISCONNECT);
|
|
|
|
splx(s);
|
|
|
|
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
tcp_shutdown(struct socket *so)
|
|
|
|
{
|
|
|
|
struct inpcb *inp = NULL;
|
|
|
|
struct in6pcb *in6p = NULL;
|
|
|
|
struct tcpcb *tp = NULL;
|
|
|
|
int error = 0;
|
|
|
|
int ostate = 0;
|
2014-08-02 07:55:26 +04:00
|
|
|
int s;
|
split PRU_DISCONNECT, PRU_SHUTDOWN and PRU_ABORT function out of
pr_generic() usrreq switches and put into separate functions
xxx_disconnect(struct socket *)
xxx_shutdown(struct socket *)
xxx_abort(struct socket *)
- always KASSERT(solocked(so)) even if not implemented
- replace calls to pr_generic() with req =
PRU_{DISCONNECT,SHUTDOWN,ABORT}
with calls to pr_{disconnect,shutdown,abort}() respectively
rename existing internal functions used to implement above functionality
to permit use of the names for xxx_{disconnect,shutdown,abort}().
- {l2cap,sco,rfcomm}_disconnect() ->
{l2cap,sco,rfcomm}_disconnect_pcb()
- {unp,rip,tcp}_disconnect() -> {unp,rip,tcp}_disconnect1()
- unp_shutdown() -> unp_shutdown1()
patch reviewed by rmind
2014-07-31 07:39:35 +04:00
|
|
|
|
|
|
|
if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0)
|
|
|
|
return error;
|
|
|
|
|
|
|
|
ostate = tcp_debug_capture(tp, PRU_SHUTDOWN);
|
|
|
|
/*
|
|
|
|
* Mark the connection as being incapable of further output.
|
|
|
|
*/
|
|
|
|
s = splsoftnet();
|
|
|
|
socantsendmore(so);
|
|
|
|
tp = tcp_usrclosed(tp);
|
|
|
|
if (tp)
|
|
|
|
error = tcp_output(tp);
|
|
|
|
tcp_debug_trace(so, tp, ostate, PRU_SHUTDOWN);
|
|
|
|
splx(s);
|
|
|
|
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
tcp_abort(struct socket *so)
|
|
|
|
{
|
|
|
|
struct inpcb *inp = NULL;
|
|
|
|
struct in6pcb *in6p = NULL;
|
|
|
|
struct tcpcb *tp = NULL;
|
|
|
|
int error = 0;
|
|
|
|
int ostate = 0;
|
2014-08-02 07:55:26 +04:00
|
|
|
int s;
|
split PRU_DISCONNECT, PRU_SHUTDOWN and PRU_ABORT function out of
pr_generic() usrreq switches and put into separate functions
xxx_disconnect(struct socket *)
xxx_shutdown(struct socket *)
xxx_abort(struct socket *)
- always KASSERT(solocked(so)) even if not implemented
- replace calls to pr_generic() with req =
PRU_{DISCONNECT,SHUTDOWN,ABORT}
with calls to pr_{disconnect,shutdown,abort}() respectively
rename existing internal functions used to implement above functionality
to permit use of the names for xxx_{disconnect,shutdown,abort}().
- {l2cap,sco,rfcomm}_disconnect() ->
{l2cap,sco,rfcomm}_disconnect_pcb()
- {unp,rip,tcp}_disconnect() -> {unp,rip,tcp}_disconnect1()
- unp_shutdown() -> unp_shutdown1()
patch reviewed by rmind
2014-07-31 07:39:35 +04:00
|
|
|
|
|
|
|
if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0)
|
|
|
|
return error;
|
|
|
|
|
|
|
|
ostate = tcp_debug_capture(tp, PRU_ABORT);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Abort the TCP.
|
|
|
|
*/
|
|
|
|
s = splsoftnet();
|
|
|
|
tp = tcp_drop(tp, ECONNABORTED);
|
|
|
|
tcp_debug_trace(so, tp, ostate, PRU_ABORT);
|
|
|
|
splx(s);
|
|
|
|
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
2014-06-22 12:10:18 +04:00
|
|
|
static int
|
2014-07-01 09:49:18 +04:00
|
|
|
tcp_ioctl(struct socket *so, u_long cmd, void *nam, struct ifnet *ifp)
|
2014-06-22 12:10:18 +04:00
|
|
|
{
|
|
|
|
switch (so->so_proto->pr_domain->dom_family) {
|
|
|
|
#ifdef INET
|
|
|
|
case PF_INET:
|
2014-07-01 09:49:18 +04:00
|
|
|
return in_control(so, cmd, nam, ifp);
|
2014-06-22 12:10:18 +04:00
|
|
|
#endif
|
|
|
|
#ifdef INET6
|
|
|
|
case PF_INET6:
|
2014-07-01 09:49:18 +04:00
|
|
|
return in6_control(so, cmd, nam, ifp);
|
2014-06-22 12:10:18 +04:00
|
|
|
#endif
|
|
|
|
default:
|
|
|
|
return EAFNOSUPPORT;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-07-06 07:33:33 +04:00
|
|
|
static int
|
|
|
|
tcp_stat(struct socket *so, struct stat *ub)
|
|
|
|
{
|
2014-07-07 21:13:56 +04:00
|
|
|
KASSERT(solocked(so));
|
|
|
|
|
2014-07-07 19:13:21 +04:00
|
|
|
/* stat: don't bother with a blocksize. */
|
|
|
|
return 0;
|
2014-07-06 07:33:33 +04:00
|
|
|
}
|
|
|
|
|
* split PRU_PEERADDR and PRU_SOCKADDR function out of pr_generic()
usrreq switches and put into separate functions
xxx_{peer,sock}addr(struct socket *, struct mbuf *).
- KASSERT(solocked(so)) always in new functions even if request
is not implemented
- KASSERT(pcb != NULL) and KASSERT(nam) if the request is
implemented and not for tcp.
* for tcp roll #ifdef KPROF and #ifdef DEBUG code from tcp_usrreq() into
easier to cut & paste functions tcp_debug_capture() and
tcp_debug_trace()
- functions provided by rmind
- remaining use of PRU_{PEER,SOCK}ADDR #define to be removed in a
future commit.
* rename netbt functions to permit consistency of pru function names
(as has been done with other requests already split out).
- l2cap_{peer,sock}addr() -> l2cap_{peer,sock}_addr_pcb()
- rfcomm_{peer,sock}addr() -> rfcomm_{peer,sock}_addr_pcb()
- sco_{peer,sock}addr() -> sco_{peer,sock}_addr_pcb()
* split/refactor do_sys_getsockname(lwp, fd, which, nam) into
two functions do_sys_get{peer,sock}name(fd, nam).
- move PRU_PEERADDR handling into do_sys_getpeername() from
do_sys_getsockname()
- have svr4_stream directly call do_sys_get{sock,peer}name()
respectively instead of providing `which' & fix a DPRINTF string
that incorrectly wrote "getpeername" when it meant "getsockname"
- fix sys_getpeername() and sys_getsockname() to call
do_sys_get{sock,peer}name() without `which' and `lwp' & adjust
comments
- bump kernel version for removal of lwp & which parameters from
do_sys_getsockname()
note: future cleanup to remove struct mbuf * abuse in
xxx_{peer,sock}name()
still to come, not done in this commit since it is easier to do post
split.
patch reviewed by rmind
welcome to 6.99.47
2014-07-09 08:54:03 +04:00
|
|
|
static int
|
2015-04-25 01:32:37 +03:00
|
|
|
tcp_peeraddr(struct socket *so, struct sockaddr *nam)
|
* split PRU_PEERADDR and PRU_SOCKADDR function out of pr_generic()
usrreq switches and put into separate functions
xxx_{peer,sock}addr(struct socket *, struct mbuf *).
- KASSERT(solocked(so)) always in new functions even if request
is not implemented
- KASSERT(pcb != NULL) and KASSERT(nam) if the request is
implemented and not for tcp.
* for tcp roll #ifdef KPROF and #ifdef DEBUG code from tcp_usrreq() into
easier to cut & paste functions tcp_debug_capture() and
tcp_debug_trace()
- functions provided by rmind
- remaining use of PRU_{PEER,SOCK}ADDR #define to be removed in a
future commit.
* rename netbt functions to permit consistency of pru function names
(as has been done with other requests already split out).
- l2cap_{peer,sock}addr() -> l2cap_{peer,sock}_addr_pcb()
- rfcomm_{peer,sock}addr() -> rfcomm_{peer,sock}_addr_pcb()
- sco_{peer,sock}addr() -> sco_{peer,sock}_addr_pcb()
* split/refactor do_sys_getsockname(lwp, fd, which, nam) into
two functions do_sys_get{peer,sock}name(fd, nam).
- move PRU_PEERADDR handling into do_sys_getpeername() from
do_sys_getsockname()
- have svr4_stream directly call do_sys_get{sock,peer}name()
respectively instead of providing `which' & fix a DPRINTF string
that incorrectly wrote "getpeername" when it meant "getsockname"
- fix sys_getpeername() and sys_getsockname() to call
do_sys_get{sock,peer}name() without `which' and `lwp' & adjust
comments
- bump kernel version for removal of lwp & which parameters from
do_sys_getsockname()
note: future cleanup to remove struct mbuf * abuse in
xxx_{peer,sock}name()
still to come, not done in this commit since it is easier to do post
split.
patch reviewed by rmind
welcome to 6.99.47
2014-07-09 08:54:03 +04:00
|
|
|
{
|
|
|
|
struct inpcb *inp = NULL;
|
|
|
|
struct in6pcb *in6p = NULL;
|
|
|
|
struct tcpcb *tp = NULL;
|
|
|
|
int ostate = 0;
|
2014-07-30 10:53:53 +04:00
|
|
|
int error = 0;
|
2014-08-02 07:55:26 +04:00
|
|
|
int s;
|
* split PRU_PEERADDR and PRU_SOCKADDR function out of pr_generic()
usrreq switches and put into separate functions
xxx_{peer,sock}addr(struct socket *, struct mbuf *).
- KASSERT(solocked(so)) always in new functions even if request
is not implemented
- KASSERT(pcb != NULL) and KASSERT(nam) if the request is
implemented and not for tcp.
* for tcp roll #ifdef KPROF and #ifdef DEBUG code from tcp_usrreq() into
easier to cut & paste functions tcp_debug_capture() and
tcp_debug_trace()
- functions provided by rmind
- remaining use of PRU_{PEER,SOCK}ADDR #define to be removed in a
future commit.
* rename netbt functions to permit consistency of pru function names
(as has been done with other requests already split out).
- l2cap_{peer,sock}addr() -> l2cap_{peer,sock}_addr_pcb()
- rfcomm_{peer,sock}addr() -> rfcomm_{peer,sock}_addr_pcb()
- sco_{peer,sock}addr() -> sco_{peer,sock}_addr_pcb()
* split/refactor do_sys_getsockname(lwp, fd, which, nam) into
two functions do_sys_get{peer,sock}name(fd, nam).
- move PRU_PEERADDR handling into do_sys_getpeername() from
do_sys_getsockname()
- have svr4_stream directly call do_sys_get{sock,peer}name()
respectively instead of providing `which' & fix a DPRINTF string
that incorrectly wrote "getpeername" when it meant "getsockname"
- fix sys_getpeername() and sys_getsockname() to call
do_sys_get{sock,peer}name() without `which' and `lwp' & adjust
comments
- bump kernel version for removal of lwp & which parameters from
do_sys_getsockname()
note: future cleanup to remove struct mbuf * abuse in
xxx_{peer,sock}name()
still to come, not done in this commit since it is easier to do post
split.
patch reviewed by rmind
welcome to 6.99.47
2014-07-09 08:54:03 +04:00
|
|
|
|
2014-07-30 10:53:53 +04:00
|
|
|
if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0)
|
|
|
|
return error;
|
* split PRU_PEERADDR and PRU_SOCKADDR function out of pr_generic()
usrreq switches and put into separate functions
xxx_{peer,sock}addr(struct socket *, struct mbuf *).
- KASSERT(solocked(so)) always in new functions even if request
is not implemented
- KASSERT(pcb != NULL) and KASSERT(nam) if the request is
implemented and not for tcp.
* for tcp roll #ifdef KPROF and #ifdef DEBUG code from tcp_usrreq() into
easier to cut & paste functions tcp_debug_capture() and
tcp_debug_trace()
- functions provided by rmind
- remaining use of PRU_{PEER,SOCK}ADDR #define to be removed in a
future commit.
* rename netbt functions to permit consistency of pru function names
(as has been done with other requests already split out).
- l2cap_{peer,sock}addr() -> l2cap_{peer,sock}_addr_pcb()
- rfcomm_{peer,sock}addr() -> rfcomm_{peer,sock}_addr_pcb()
- sco_{peer,sock}addr() -> sco_{peer,sock}_addr_pcb()
* split/refactor do_sys_getsockname(lwp, fd, which, nam) into
two functions do_sys_get{peer,sock}name(fd, nam).
- move PRU_PEERADDR handling into do_sys_getpeername() from
do_sys_getsockname()
- have svr4_stream directly call do_sys_get{sock,peer}name()
respectively instead of providing `which' & fix a DPRINTF string
that incorrectly wrote "getpeername" when it meant "getsockname"
- fix sys_getpeername() and sys_getsockname() to call
do_sys_get{sock,peer}name() without `which' and `lwp' & adjust
comments
- bump kernel version for removal of lwp & which parameters from
do_sys_getsockname()
note: future cleanup to remove struct mbuf * abuse in
xxx_{peer,sock}name()
still to come, not done in this commit since it is easier to do post
split.
patch reviewed by rmind
welcome to 6.99.47
2014-07-09 08:54:03 +04:00
|
|
|
|
2014-07-30 10:53:53 +04:00
|
|
|
ostate = tcp_debug_capture(tp, PRU_PEERADDR);
|
* split PRU_PEERADDR and PRU_SOCKADDR function out of pr_generic()
usrreq switches and put into separate functions
xxx_{peer,sock}addr(struct socket *, struct mbuf *).
- KASSERT(solocked(so)) always in new functions even if request
is not implemented
- KASSERT(pcb != NULL) and KASSERT(nam) if the request is
implemented and not for tcp.
* for tcp roll #ifdef KPROF and #ifdef DEBUG code from tcp_usrreq() into
easier to cut & paste functions tcp_debug_capture() and
tcp_debug_trace()
- functions provided by rmind
- remaining use of PRU_{PEER,SOCK}ADDR #define to be removed in a
future commit.
* rename netbt functions to permit consistency of pru function names
(as has been done with other requests already split out).
- l2cap_{peer,sock}addr() -> l2cap_{peer,sock}_addr_pcb()
- rfcomm_{peer,sock}addr() -> rfcomm_{peer,sock}_addr_pcb()
- sco_{peer,sock}addr() -> sco_{peer,sock}_addr_pcb()
* split/refactor do_sys_getsockname(lwp, fd, which, nam) into
two functions do_sys_get{peer,sock}name(fd, nam).
- move PRU_PEERADDR handling into do_sys_getpeername() from
do_sys_getsockname()
- have svr4_stream directly call do_sys_get{sock,peer}name()
respectively instead of providing `which' & fix a DPRINTF string
that incorrectly wrote "getpeername" when it meant "getsockname"
- fix sys_getpeername() and sys_getsockname() to call
do_sys_get{sock,peer}name() without `which' and `lwp' & adjust
comments
- bump kernel version for removal of lwp & which parameters from
do_sys_getsockname()
note: future cleanup to remove struct mbuf * abuse in
xxx_{peer,sock}name()
still to come, not done in this commit since it is easier to do post
split.
patch reviewed by rmind
welcome to 6.99.47
2014-07-09 08:54:03 +04:00
|
|
|
|
2014-08-02 07:55:26 +04:00
|
|
|
s = splsoftnet();
|
* split PRU_PEERADDR and PRU_SOCKADDR function out of pr_generic()
usrreq switches and put into separate functions
xxx_{peer,sock}addr(struct socket *, struct mbuf *).
- KASSERT(solocked(so)) always in new functions even if request
is not implemented
- KASSERT(pcb != NULL) and KASSERT(nam) if the request is
implemented and not for tcp.
* for tcp roll #ifdef KPROF and #ifdef DEBUG code from tcp_usrreq() into
easier to cut & paste functions tcp_debug_capture() and
tcp_debug_trace()
- functions provided by rmind
- remaining use of PRU_{PEER,SOCK}ADDR #define to be removed in a
future commit.
* rename netbt functions to permit consistency of pru function names
(as has been done with other requests already split out).
- l2cap_{peer,sock}addr() -> l2cap_{peer,sock}_addr_pcb()
- rfcomm_{peer,sock}addr() -> rfcomm_{peer,sock}_addr_pcb()
- sco_{peer,sock}addr() -> sco_{peer,sock}_addr_pcb()
* split/refactor do_sys_getsockname(lwp, fd, which, nam) into
two functions do_sys_get{peer,sock}name(fd, nam).
- move PRU_PEERADDR handling into do_sys_getpeername() from
do_sys_getsockname()
- have svr4_stream directly call do_sys_get{sock,peer}name()
respectively instead of providing `which' & fix a DPRINTF string
that incorrectly wrote "getpeername" when it meant "getsockname"
- fix sys_getpeername() and sys_getsockname() to call
do_sys_get{sock,peer}name() without `which' and `lwp' & adjust
comments
- bump kernel version for removal of lwp & which parameters from
do_sys_getsockname()
note: future cleanup to remove struct mbuf * abuse in
xxx_{peer,sock}name()
still to come, not done in this commit since it is easier to do post
split.
patch reviewed by rmind
welcome to 6.99.47
2014-07-09 08:54:03 +04:00
|
|
|
#ifdef INET
|
2015-04-25 01:32:37 +03:00
|
|
|
if (inp) {
|
|
|
|
in_setpeeraddr(inp, (struct sockaddr_in *)nam);
|
|
|
|
}
|
* split PRU_PEERADDR and PRU_SOCKADDR function out of pr_generic()
usrreq switches and put into separate functions
xxx_{peer,sock}addr(struct socket *, struct mbuf *).
- KASSERT(solocked(so)) always in new functions even if request
is not implemented
- KASSERT(pcb != NULL) and KASSERT(nam) if the request is
implemented and not for tcp.
* for tcp roll #ifdef KPROF and #ifdef DEBUG code from tcp_usrreq() into
easier to cut & paste functions tcp_debug_capture() and
tcp_debug_trace()
- functions provided by rmind
- remaining use of PRU_{PEER,SOCK}ADDR #define to be removed in a
future commit.
* rename netbt functions to permit consistency of pru function names
(as has been done with other requests already split out).
- l2cap_{peer,sock}addr() -> l2cap_{peer,sock}_addr_pcb()
- rfcomm_{peer,sock}addr() -> rfcomm_{peer,sock}_addr_pcb()
- sco_{peer,sock}addr() -> sco_{peer,sock}_addr_pcb()
* split/refactor do_sys_getsockname(lwp, fd, which, nam) into
two functions do_sys_get{peer,sock}name(fd, nam).
- move PRU_PEERADDR handling into do_sys_getpeername() from
do_sys_getsockname()
- have svr4_stream directly call do_sys_get{sock,peer}name()
respectively instead of providing `which' & fix a DPRINTF string
that incorrectly wrote "getpeername" when it meant "getsockname"
- fix sys_getpeername() and sys_getsockname() to call
do_sys_get{sock,peer}name() without `which' and `lwp' & adjust
comments
- bump kernel version for removal of lwp & which parameters from
do_sys_getsockname()
note: future cleanup to remove struct mbuf * abuse in
xxx_{peer,sock}name()
still to come, not done in this commit since it is easier to do post
split.
patch reviewed by rmind
welcome to 6.99.47
2014-07-09 08:54:03 +04:00
|
|
|
#endif
|
|
|
|
#ifdef INET6
|
2015-04-25 01:32:37 +03:00
|
|
|
if (in6p) {
|
|
|
|
in6_setpeeraddr(in6p, (struct sockaddr_in6 *)nam);
|
|
|
|
}
|
* split PRU_PEERADDR and PRU_SOCKADDR function out of pr_generic()
usrreq switches and put into separate functions
xxx_{peer,sock}addr(struct socket *, struct mbuf *).
- KASSERT(solocked(so)) always in new functions even if request
is not implemented
- KASSERT(pcb != NULL) and KASSERT(nam) if the request is
implemented and not for tcp.
* for tcp roll #ifdef KPROF and #ifdef DEBUG code from tcp_usrreq() into
easier to cut & paste functions tcp_debug_capture() and
tcp_debug_trace()
- functions provided by rmind
- remaining use of PRU_{PEER,SOCK}ADDR #define to be removed in a
future commit.
* rename netbt functions to permit consistency of pru function names
(as has been done with other requests already split out).
- l2cap_{peer,sock}addr() -> l2cap_{peer,sock}_addr_pcb()
- rfcomm_{peer,sock}addr() -> rfcomm_{peer,sock}_addr_pcb()
- sco_{peer,sock}addr() -> sco_{peer,sock}_addr_pcb()
* split/refactor do_sys_getsockname(lwp, fd, which, nam) into
two functions do_sys_get{peer,sock}name(fd, nam).
- move PRU_PEERADDR handling into do_sys_getpeername() from
do_sys_getsockname()
- have svr4_stream directly call do_sys_get{sock,peer}name()
respectively instead of providing `which' & fix a DPRINTF string
that incorrectly wrote "getpeername" when it meant "getsockname"
- fix sys_getpeername() and sys_getsockname() to call
do_sys_get{sock,peer}name() without `which' and `lwp' & adjust
comments
- bump kernel version for removal of lwp & which parameters from
do_sys_getsockname()
note: future cleanup to remove struct mbuf * abuse in
xxx_{peer,sock}name()
still to come, not done in this commit since it is easier to do post
split.
patch reviewed by rmind
welcome to 6.99.47
2014-07-09 08:54:03 +04:00
|
|
|
#endif
|
|
|
|
tcp_debug_trace(so, tp, ostate, PRU_PEERADDR);
|
2014-08-02 07:55:26 +04:00
|
|
|
splx(s);
|
* split PRU_PEERADDR and PRU_SOCKADDR function out of pr_generic()
usrreq switches and put into separate functions
xxx_{peer,sock}addr(struct socket *, struct mbuf *).
- KASSERT(solocked(so)) always in new functions even if request
is not implemented
- KASSERT(pcb != NULL) and KASSERT(nam) if the request is
implemented and not for tcp.
* for tcp roll #ifdef KPROF and #ifdef DEBUG code from tcp_usrreq() into
easier to cut & paste functions tcp_debug_capture() and
tcp_debug_trace()
- functions provided by rmind
- remaining use of PRU_{PEER,SOCK}ADDR #define to be removed in a
future commit.
* rename netbt functions to permit consistency of pru function names
(as has been done with other requests already split out).
- l2cap_{peer,sock}addr() -> l2cap_{peer,sock}_addr_pcb()
- rfcomm_{peer,sock}addr() -> rfcomm_{peer,sock}_addr_pcb()
- sco_{peer,sock}addr() -> sco_{peer,sock}_addr_pcb()
* split/refactor do_sys_getsockname(lwp, fd, which, nam) into
two functions do_sys_get{peer,sock}name(fd, nam).
- move PRU_PEERADDR handling into do_sys_getpeername() from
do_sys_getsockname()
- have svr4_stream directly call do_sys_get{sock,peer}name()
respectively instead of providing `which' & fix a DPRINTF string
that incorrectly wrote "getpeername" when it meant "getsockname"
- fix sys_getpeername() and sys_getsockname() to call
do_sys_get{sock,peer}name() without `which' and `lwp' & adjust
comments
- bump kernel version for removal of lwp & which parameters from
do_sys_getsockname()
note: future cleanup to remove struct mbuf * abuse in
xxx_{peer,sock}name()
still to come, not done in this commit since it is easier to do post
split.
patch reviewed by rmind
welcome to 6.99.47
2014-07-09 08:54:03 +04:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2015-04-25 01:32:37 +03:00
|
|
|
tcp_sockaddr(struct socket *so, struct sockaddr *nam)
|
* split PRU_PEERADDR and PRU_SOCKADDR function out of pr_generic()
usrreq switches and put into separate functions
xxx_{peer,sock}addr(struct socket *, struct mbuf *).
- KASSERT(solocked(so)) always in new functions even if request
is not implemented
- KASSERT(pcb != NULL) and KASSERT(nam) if the request is
implemented and not for tcp.
* for tcp roll #ifdef KPROF and #ifdef DEBUG code from tcp_usrreq() into
easier to cut & paste functions tcp_debug_capture() and
tcp_debug_trace()
- functions provided by rmind
- remaining use of PRU_{PEER,SOCK}ADDR #define to be removed in a
future commit.
* rename netbt functions to permit consistency of pru function names
(as has been done with other requests already split out).
- l2cap_{peer,sock}addr() -> l2cap_{peer,sock}_addr_pcb()
- rfcomm_{peer,sock}addr() -> rfcomm_{peer,sock}_addr_pcb()
- sco_{peer,sock}addr() -> sco_{peer,sock}_addr_pcb()
* split/refactor do_sys_getsockname(lwp, fd, which, nam) into
two functions do_sys_get{peer,sock}name(fd, nam).
- move PRU_PEERADDR handling into do_sys_getpeername() from
do_sys_getsockname()
- have svr4_stream directly call do_sys_get{sock,peer}name()
respectively instead of providing `which' & fix a DPRINTF string
that incorrectly wrote "getpeername" when it meant "getsockname"
- fix sys_getpeername() and sys_getsockname() to call
do_sys_get{sock,peer}name() without `which' and `lwp' & adjust
comments
- bump kernel version for removal of lwp & which parameters from
do_sys_getsockname()
note: future cleanup to remove struct mbuf * abuse in
xxx_{peer,sock}name()
still to come, not done in this commit since it is easier to do post
split.
patch reviewed by rmind
welcome to 6.99.47
2014-07-09 08:54:03 +04:00
|
|
|
{
|
|
|
|
struct inpcb *inp = NULL;
|
|
|
|
struct in6pcb *in6p = NULL;
|
|
|
|
struct tcpcb *tp = NULL;
|
|
|
|
int ostate = 0;
|
2014-07-30 10:53:53 +04:00
|
|
|
int error = 0;
|
2014-08-02 07:55:26 +04:00
|
|
|
int s;
|
* split PRU_PEERADDR and PRU_SOCKADDR function out of pr_generic()
usrreq switches and put into separate functions
xxx_{peer,sock}addr(struct socket *, struct mbuf *).
- KASSERT(solocked(so)) always in new functions even if request
is not implemented
- KASSERT(pcb != NULL) and KASSERT(nam) if the request is
implemented and not for tcp.
* for tcp roll #ifdef KPROF and #ifdef DEBUG code from tcp_usrreq() into
easier to cut & paste functions tcp_debug_capture() and
tcp_debug_trace()
- functions provided by rmind
- remaining use of PRU_{PEER,SOCK}ADDR #define to be removed in a
future commit.
* rename netbt functions to permit consistency of pru function names
(as has been done with other requests already split out).
- l2cap_{peer,sock}addr() -> l2cap_{peer,sock}_addr_pcb()
- rfcomm_{peer,sock}addr() -> rfcomm_{peer,sock}_addr_pcb()
- sco_{peer,sock}addr() -> sco_{peer,sock}_addr_pcb()
* split/refactor do_sys_getsockname(lwp, fd, which, nam) into
two functions do_sys_get{peer,sock}name(fd, nam).
- move PRU_PEERADDR handling into do_sys_getpeername() from
do_sys_getsockname()
- have svr4_stream directly call do_sys_get{sock,peer}name()
respectively instead of providing `which' & fix a DPRINTF string
that incorrectly wrote "getpeername" when it meant "getsockname"
- fix sys_getpeername() and sys_getsockname() to call
do_sys_get{sock,peer}name() without `which' and `lwp' & adjust
comments
- bump kernel version for removal of lwp & which parameters from
do_sys_getsockname()
note: future cleanup to remove struct mbuf * abuse in
xxx_{peer,sock}name()
still to come, not done in this commit since it is easier to do post
split.
patch reviewed by rmind
welcome to 6.99.47
2014-07-09 08:54:03 +04:00
|
|
|
|
2014-07-30 10:53:53 +04:00
|
|
|
if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0)
|
|
|
|
return error;
|
* split PRU_PEERADDR and PRU_SOCKADDR function out of pr_generic()
usrreq switches and put into separate functions
xxx_{peer,sock}addr(struct socket *, struct mbuf *).
- KASSERT(solocked(so)) always in new functions even if request
is not implemented
- KASSERT(pcb != NULL) and KASSERT(nam) if the request is
implemented and not for tcp.
* for tcp roll #ifdef KPROF and #ifdef DEBUG code from tcp_usrreq() into
easier to cut & paste functions tcp_debug_capture() and
tcp_debug_trace()
- functions provided by rmind
- remaining use of PRU_{PEER,SOCK}ADDR #define to be removed in a
future commit.
* rename netbt functions to permit consistency of pru function names
(as has been done with other requests already split out).
- l2cap_{peer,sock}addr() -> l2cap_{peer,sock}_addr_pcb()
- rfcomm_{peer,sock}addr() -> rfcomm_{peer,sock}_addr_pcb()
- sco_{peer,sock}addr() -> sco_{peer,sock}_addr_pcb()
* split/refactor do_sys_getsockname(lwp, fd, which, nam) into
two functions do_sys_get{peer,sock}name(fd, nam).
- move PRU_PEERADDR handling into do_sys_getpeername() from
do_sys_getsockname()
- have svr4_stream directly call do_sys_get{sock,peer}name()
respectively instead of providing `which' & fix a DPRINTF string
that incorrectly wrote "getpeername" when it meant "getsockname"
- fix sys_getpeername() and sys_getsockname() to call
do_sys_get{sock,peer}name() without `which' and `lwp' & adjust
comments
- bump kernel version for removal of lwp & which parameters from
do_sys_getsockname()
note: future cleanup to remove struct mbuf * abuse in
xxx_{peer,sock}name()
still to come, not done in this commit since it is easier to do post
split.
patch reviewed by rmind
welcome to 6.99.47
2014-07-09 08:54:03 +04:00
|
|
|
|
2014-07-30 10:53:53 +04:00
|
|
|
ostate = tcp_debug_capture(tp, PRU_SOCKADDR);
|
* split PRU_PEERADDR and PRU_SOCKADDR function out of pr_generic()
usrreq switches and put into separate functions
xxx_{peer,sock}addr(struct socket *, struct mbuf *).
- KASSERT(solocked(so)) always in new functions even if request
is not implemented
- KASSERT(pcb != NULL) and KASSERT(nam) if the request is
implemented and not for tcp.
* for tcp roll #ifdef KPROF and #ifdef DEBUG code from tcp_usrreq() into
easier to cut & paste functions tcp_debug_capture() and
tcp_debug_trace()
- functions provided by rmind
- remaining use of PRU_{PEER,SOCK}ADDR #define to be removed in a
future commit.
* rename netbt functions to permit consistency of pru function names
(as has been done with other requests already split out).
- l2cap_{peer,sock}addr() -> l2cap_{peer,sock}_addr_pcb()
- rfcomm_{peer,sock}addr() -> rfcomm_{peer,sock}_addr_pcb()
- sco_{peer,sock}addr() -> sco_{peer,sock}_addr_pcb()
* split/refactor do_sys_getsockname(lwp, fd, which, nam) into
two functions do_sys_get{peer,sock}name(fd, nam).
- move PRU_PEERADDR handling into do_sys_getpeername() from
do_sys_getsockname()
- have svr4_stream directly call do_sys_get{sock,peer}name()
respectively instead of providing `which' & fix a DPRINTF string
that incorrectly wrote "getpeername" when it meant "getsockname"
- fix sys_getpeername() and sys_getsockname() to call
do_sys_get{sock,peer}name() without `which' and `lwp' & adjust
comments
- bump kernel version for removal of lwp & which parameters from
do_sys_getsockname()
note: future cleanup to remove struct mbuf * abuse in
xxx_{peer,sock}name()
still to come, not done in this commit since it is easier to do post
split.
patch reviewed by rmind
welcome to 6.99.47
2014-07-09 08:54:03 +04:00
|
|
|
|
2014-08-02 07:55:26 +04:00
|
|
|
s = splsoftnet();
|
* split PRU_PEERADDR and PRU_SOCKADDR function out of pr_generic()
usrreq switches and put into separate functions
xxx_{peer,sock}addr(struct socket *, struct mbuf *).
- KASSERT(solocked(so)) always in new functions even if request
is not implemented
- KASSERT(pcb != NULL) and KASSERT(nam) if the request is
implemented and not for tcp.
* for tcp roll #ifdef KPROF and #ifdef DEBUG code from tcp_usrreq() into
easier to cut & paste functions tcp_debug_capture() and
tcp_debug_trace()
- functions provided by rmind
- remaining use of PRU_{PEER,SOCK}ADDR #define to be removed in a
future commit.
* rename netbt functions to permit consistency of pru function names
(as has been done with other requests already split out).
- l2cap_{peer,sock}addr() -> l2cap_{peer,sock}_addr_pcb()
- rfcomm_{peer,sock}addr() -> rfcomm_{peer,sock}_addr_pcb()
- sco_{peer,sock}addr() -> sco_{peer,sock}_addr_pcb()
* split/refactor do_sys_getsockname(lwp, fd, which, nam) into
two functions do_sys_get{peer,sock}name(fd, nam).
- move PRU_PEERADDR handling into do_sys_getpeername() from
do_sys_getsockname()
- have svr4_stream directly call do_sys_get{sock,peer}name()
respectively instead of providing `which' & fix a DPRINTF string
that incorrectly wrote "getpeername" when it meant "getsockname"
- fix sys_getpeername() and sys_getsockname() to call
do_sys_get{sock,peer}name() without `which' and `lwp' & adjust
comments
- bump kernel version for removal of lwp & which parameters from
do_sys_getsockname()
note: future cleanup to remove struct mbuf * abuse in
xxx_{peer,sock}name()
still to come, not done in this commit since it is easier to do post
split.
patch reviewed by rmind
welcome to 6.99.47
2014-07-09 08:54:03 +04:00
|
|
|
#ifdef INET
|
2015-04-25 01:32:37 +03:00
|
|
|
if (inp) {
|
|
|
|
in_setsockaddr(inp, (struct sockaddr_in *)nam);
|
|
|
|
}
|
* split PRU_PEERADDR and PRU_SOCKADDR function out of pr_generic()
usrreq switches and put into separate functions
xxx_{peer,sock}addr(struct socket *, struct mbuf *).
- KASSERT(solocked(so)) always in new functions even if request
is not implemented
- KASSERT(pcb != NULL) and KASSERT(nam) if the request is
implemented and not for tcp.
* for tcp roll #ifdef KPROF and #ifdef DEBUG code from tcp_usrreq() into
easier to cut & paste functions tcp_debug_capture() and
tcp_debug_trace()
- functions provided by rmind
- remaining use of PRU_{PEER,SOCK}ADDR #define to be removed in a
future commit.
* rename netbt functions to permit consistency of pru function names
(as has been done with other requests already split out).
- l2cap_{peer,sock}addr() -> l2cap_{peer,sock}_addr_pcb()
- rfcomm_{peer,sock}addr() -> rfcomm_{peer,sock}_addr_pcb()
- sco_{peer,sock}addr() -> sco_{peer,sock}_addr_pcb()
* split/refactor do_sys_getsockname(lwp, fd, which, nam) into
two functions do_sys_get{peer,sock}name(fd, nam).
- move PRU_PEERADDR handling into do_sys_getpeername() from
do_sys_getsockname()
- have svr4_stream directly call do_sys_get{sock,peer}name()
respectively instead of providing `which' & fix a DPRINTF string
that incorrectly wrote "getpeername" when it meant "getsockname"
- fix sys_getpeername() and sys_getsockname() to call
do_sys_get{sock,peer}name() without `which' and `lwp' & adjust
comments
- bump kernel version for removal of lwp & which parameters from
do_sys_getsockname()
note: future cleanup to remove struct mbuf * abuse in
xxx_{peer,sock}name()
still to come, not done in this commit since it is easier to do post
split.
patch reviewed by rmind
welcome to 6.99.47
2014-07-09 08:54:03 +04:00
|
|
|
#endif
|
|
|
|
#ifdef INET6
|
2015-04-25 01:32:37 +03:00
|
|
|
if (in6p) {
|
|
|
|
in6_setsockaddr(in6p, (struct sockaddr_in6 *)nam);
|
|
|
|
}
|
* split PRU_PEERADDR and PRU_SOCKADDR function out of pr_generic()
usrreq switches and put into separate functions
xxx_{peer,sock}addr(struct socket *, struct mbuf *).
- KASSERT(solocked(so)) always in new functions even if request
is not implemented
- KASSERT(pcb != NULL) and KASSERT(nam) if the request is
implemented and not for tcp.
* for tcp roll #ifdef KPROF and #ifdef DEBUG code from tcp_usrreq() into
easier to cut & paste functions tcp_debug_capture() and
tcp_debug_trace()
- functions provided by rmind
- remaining use of PRU_{PEER,SOCK}ADDR #define to be removed in a
future commit.
* rename netbt functions to permit consistency of pru function names
(as has been done with other requests already split out).
- l2cap_{peer,sock}addr() -> l2cap_{peer,sock}_addr_pcb()
- rfcomm_{peer,sock}addr() -> rfcomm_{peer,sock}_addr_pcb()
- sco_{peer,sock}addr() -> sco_{peer,sock}_addr_pcb()
* split/refactor do_sys_getsockname(lwp, fd, which, nam) into
two functions do_sys_get{peer,sock}name(fd, nam).
- move PRU_PEERADDR handling into do_sys_getpeername() from
do_sys_getsockname()
- have svr4_stream directly call do_sys_get{sock,peer}name()
respectively instead of providing `which' & fix a DPRINTF string
that incorrectly wrote "getpeername" when it meant "getsockname"
- fix sys_getpeername() and sys_getsockname() to call
do_sys_get{sock,peer}name() without `which' and `lwp' & adjust
comments
- bump kernel version for removal of lwp & which parameters from
do_sys_getsockname()
note: future cleanup to remove struct mbuf * abuse in
xxx_{peer,sock}name()
still to come, not done in this commit since it is easier to do post
split.
patch reviewed by rmind
welcome to 6.99.47
2014-07-09 08:54:03 +04:00
|
|
|
#endif
|
|
|
|
tcp_debug_trace(so, tp, ostate, PRU_SOCKADDR);
|
2014-08-02 07:55:26 +04:00
|
|
|
splx(s);
|
* split PRU_PEERADDR and PRU_SOCKADDR function out of pr_generic()
usrreq switches and put into separate functions
xxx_{peer,sock}addr(struct socket *, struct mbuf *).
- KASSERT(solocked(so)) always in new functions even if request
is not implemented
- KASSERT(pcb != NULL) and KASSERT(nam) if the request is
implemented and not for tcp.
* for tcp roll #ifdef KPROF and #ifdef DEBUG code from tcp_usrreq() into
easier to cut & paste functions tcp_debug_capture() and
tcp_debug_trace()
- functions provided by rmind
- remaining use of PRU_{PEER,SOCK}ADDR #define to be removed in a
future commit.
* rename netbt functions to permit consistency of pru function names
(as has been done with other requests already split out).
- l2cap_{peer,sock}addr() -> l2cap_{peer,sock}_addr_pcb()
- rfcomm_{peer,sock}addr() -> rfcomm_{peer,sock}_addr_pcb()
- sco_{peer,sock}addr() -> sco_{peer,sock}_addr_pcb()
* split/refactor do_sys_getsockname(lwp, fd, which, nam) into
two functions do_sys_get{peer,sock}name(fd, nam).
- move PRU_PEERADDR handling into do_sys_getpeername() from
do_sys_getsockname()
- have svr4_stream directly call do_sys_get{sock,peer}name()
respectively instead of providing `which' & fix a DPRINTF string
that incorrectly wrote "getpeername" when it meant "getsockname"
- fix sys_getpeername() and sys_getsockname() to call
do_sys_get{sock,peer}name() without `which' and `lwp' & adjust
comments
- bump kernel version for removal of lwp & which parameters from
do_sys_getsockname()
note: future cleanup to remove struct mbuf * abuse in
xxx_{peer,sock}name()
still to come, not done in this commit since it is easier to do post
split.
patch reviewed by rmind
welcome to 6.99.47
2014-07-09 08:54:03 +04:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-08-08 07:05:44 +04:00
|
|
|
static int
|
|
|
|
tcp_rcvd(struct socket *so, int flags, struct lwp *l)
|
|
|
|
{
|
|
|
|
struct inpcb *inp = NULL;
|
|
|
|
struct in6pcb *in6p = NULL;
|
|
|
|
struct tcpcb *tp = NULL;
|
|
|
|
int ostate = 0;
|
|
|
|
int error = 0;
|
|
|
|
int s;
|
|
|
|
|
|
|
|
if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0)
|
|
|
|
return error;
|
|
|
|
|
|
|
|
ostate = tcp_debug_capture(tp, PRU_RCVD);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* After a receive, possibly send window update to peer.
|
|
|
|
*
|
|
|
|
* soreceive() calls this function when a user receives
|
|
|
|
* ancillary data on a listening socket. We don't call
|
|
|
|
* tcp_output in such a case, since there is no header
|
|
|
|
* template for a listening socket and hence the kernel
|
|
|
|
* will panic.
|
|
|
|
*/
|
|
|
|
s = splsoftnet();
|
|
|
|
if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0)
|
|
|
|
(void) tcp_output(tp);
|
|
|
|
splx(s);
|
|
|
|
|
|
|
|
tcp_debug_trace(so, tp, ostate, PRU_RCVD);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-07-23 17:17:18 +04:00
|
|
|
static int
|
|
|
|
tcp_recvoob(struct socket *so, struct mbuf *m, int flags)
|
|
|
|
{
|
|
|
|
struct inpcb *inp = NULL;
|
|
|
|
struct in6pcb *in6p = NULL;
|
|
|
|
struct tcpcb *tp = NULL;
|
|
|
|
int ostate = 0;
|
2014-07-30 10:53:53 +04:00
|
|
|
int error = 0;
|
2014-08-02 07:55:26 +04:00
|
|
|
int s;
|
2014-07-23 17:17:18 +04:00
|
|
|
|
2014-07-30 10:53:53 +04:00
|
|
|
if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0)
|
|
|
|
return error;
|
2014-07-23 17:17:18 +04:00
|
|
|
|
2014-07-30 10:53:53 +04:00
|
|
|
ostate = tcp_debug_capture(tp, PRU_RCVOOB);
|
2014-07-23 17:17:18 +04:00
|
|
|
|
2014-08-02 07:55:26 +04:00
|
|
|
s = splsoftnet();
|
2014-07-23 17:17:18 +04:00
|
|
|
if ((so->so_oobmark == 0 &&
|
|
|
|
(so->so_state & SS_RCVATMARK) == 0) ||
|
|
|
|
so->so_options & SO_OOBINLINE ||
|
2014-08-02 07:55:26 +04:00
|
|
|
tp->t_oobflags & TCPOOB_HADDATA) {
|
|
|
|
splx(s);
|
2014-07-23 17:17:18 +04:00
|
|
|
return EINVAL;
|
2014-08-02 07:55:26 +04:00
|
|
|
}
|
2014-07-23 17:17:18 +04:00
|
|
|
|
2014-08-02 07:55:26 +04:00
|
|
|
if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
|
|
|
|
splx(s);
|
2014-07-23 17:17:18 +04:00
|
|
|
return EWOULDBLOCK;
|
2014-08-02 07:55:26 +04:00
|
|
|
}
|
2014-07-23 17:17:18 +04:00
|
|
|
|
|
|
|
m->m_len = 1;
|
|
|
|
*mtod(m, char *) = tp->t_iobc;
|
|
|
|
if ((flags & MSG_PEEK) == 0)
|
|
|
|
tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
|
|
|
|
|
|
|
|
tcp_debug_trace(so, tp, ostate, PRU_RCVOOB);
|
2014-08-02 07:55:26 +04:00
|
|
|
splx(s);
|
2014-07-23 17:17:18 +04:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-08-05 11:55:31 +04:00
|
|
|
static int
|
2015-05-02 20:18:03 +03:00
|
|
|
tcp_send(struct socket *so, struct mbuf *m, struct sockaddr *nam,
|
2014-08-05 11:55:31 +04:00
|
|
|
struct mbuf *control, struct lwp *l)
|
|
|
|
{
|
|
|
|
struct inpcb *inp = NULL;
|
|
|
|
struct in6pcb *in6p = NULL;
|
|
|
|
struct tcpcb *tp = NULL;
|
|
|
|
int ostate = 0;
|
|
|
|
int error = 0;
|
|
|
|
int s;
|
|
|
|
|
|
|
|
if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0)
|
|
|
|
return error;
|
|
|
|
|
|
|
|
ostate = tcp_debug_capture(tp, PRU_SEND);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Do a send by putting data in output queue and updating urgent
|
|
|
|
* marker if URG set. Possibly send more data.
|
|
|
|
*/
|
|
|
|
s = splsoftnet();
|
|
|
|
if (control && control->m_len) {
|
|
|
|
m_freem(control);
|
|
|
|
m_freem(m);
|
|
|
|
tcp_debug_trace(so, tp, ostate, PRU_SEND);
|
|
|
|
splx(s);
|
|
|
|
return EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
sbappendstream(&so->so_snd, m);
|
|
|
|
error = tcp_output(tp);
|
|
|
|
tcp_debug_trace(so, tp, ostate, PRU_SEND);
|
|
|
|
splx(s);
|
|
|
|
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
2014-07-23 17:17:18 +04:00
|
|
|
static int
|
|
|
|
tcp_sendoob(struct socket *so, struct mbuf *m, struct mbuf *control)
|
|
|
|
{
|
|
|
|
struct inpcb *inp = NULL;
|
|
|
|
struct in6pcb *in6p = NULL;
|
|
|
|
struct tcpcb *tp = NULL;
|
|
|
|
int ostate = 0;
|
|
|
|
int error = 0;
|
2014-08-02 07:55:26 +04:00
|
|
|
int s;
|
2014-07-23 17:17:18 +04:00
|
|
|
|
2014-07-30 10:53:53 +04:00
|
|
|
if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0)
|
|
|
|
return error;
|
2014-07-23 17:17:18 +04:00
|
|
|
|
2014-07-30 10:53:53 +04:00
|
|
|
ostate = tcp_debug_capture(tp, PRU_SENDOOB);
|
2014-07-23 17:17:18 +04:00
|
|
|
|
2014-08-02 07:55:26 +04:00
|
|
|
s = splsoftnet();
|
2014-07-23 17:17:18 +04:00
|
|
|
if (sbspace(&so->so_snd) < -512) {
|
|
|
|
m_freem(m);
|
2014-08-02 07:55:26 +04:00
|
|
|
splx(s);
|
2014-07-23 17:17:18 +04:00
|
|
|
return ENOBUFS;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* According to RFC961 (Assigned Protocols),
|
|
|
|
* the urgent pointer points to the last octet
|
|
|
|
* of urgent data. We continue, however,
|
|
|
|
* to consider it to indicate the first octet
|
|
|
|
* of data past the urgent section.
|
|
|
|
* Otherwise, snd_up should be one lower.
|
|
|
|
*/
|
|
|
|
sbappendstream(&so->so_snd, m);
|
|
|
|
tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
|
|
|
|
tp->t_force = 1;
|
|
|
|
error = tcp_output(tp);
|
|
|
|
tp->t_force = 0;
|
|
|
|
tcp_debug_trace(so, tp, ostate, PRU_SENDOOB);
|
2014-08-02 07:55:26 +04:00
|
|
|
splx(s);
|
2014-07-23 17:17:18 +04:00
|
|
|
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
2014-08-09 09:33:00 +04:00
|
|
|
static int
|
|
|
|
tcp_purgeif(struct socket *so, struct ifnet *ifp)
|
|
|
|
{
|
|
|
|
int s;
|
|
|
|
|
|
|
|
s = splsoftnet();
|
|
|
|
mutex_enter(softnet_lock);
|
|
|
|
switch (so->so_proto->pr_domain->dom_family) {
|
|
|
|
#ifdef INET
|
|
|
|
case PF_INET:
|
|
|
|
in_pcbpurgeif0(&tcbtable, ifp);
|
|
|
|
in_purgeif(ifp);
|
|
|
|
in_pcbpurgeif(&tcbtable, ifp);
|
|
|
|
break;
|
|
|
|
#endif
|
|
|
|
#ifdef INET6
|
|
|
|
case PF_INET6:
|
|
|
|
in6_pcbpurgeif0(&tcbtable, ifp);
|
|
|
|
in6_purgeif(ifp);
|
|
|
|
in6_pcbpurgeif(&tcbtable, ifp);
|
|
|
|
break;
|
|
|
|
#endif
|
|
|
|
default:
|
|
|
|
mutex_exit(softnet_lock);
|
|
|
|
splx(s);
|
|
|
|
return EAFNOSUPPORT;
|
|
|
|
}
|
|
|
|
mutex_exit(softnet_lock);
|
|
|
|
splx(s);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
1993-03-21 12:45:37 +03:00
|
|
|
/*
|
|
|
|
* Initiate (or continue) disconnect.
|
|
|
|
* If embryonic state, just send reset (once).
|
|
|
|
* If in ``let data drain'' option and linger null, just drop.
|
|
|
|
* Otherwise (hard), mark socket disconnecting and drop
|
|
|
|
* current input data; switch states based on user close, and
|
|
|
|
* send segment to peer (with FIN).
|
|
|
|
*/
|
|
|
|
struct tcpcb *
|
split PRU_DISCONNECT, PRU_SHUTDOWN and PRU_ABORT function out of
pr_generic() usrreq switches and put into separate functions
xxx_disconnect(struct socket *)
xxx_shutdown(struct socket *)
xxx_abort(struct socket *)
- always KASSERT(solocked(so)) even if not implemented
- replace calls to pr_generic() with req =
PRU_{DISCONNECT,SHUTDOWN,ABORT}
with calls to pr_{disconnect,shutdown,abort}() respectively
rename existing internal functions used to implement above functionality
to permit use of the names for xxx_{disconnect,shutdown,abort}().
- {l2cap,sco,rfcomm}_disconnect() ->
{l2cap,sco,rfcomm}_disconnect_pcb()
- {unp,rip,tcp}_disconnect() -> {unp,rip,tcp}_disconnect1()
- unp_shutdown() -> unp_shutdown1()
patch reviewed by rmind
2014-07-31 07:39:35 +04:00
|
|
|
tcp_disconnect1(struct tcpcb *tp)
|
1993-03-21 12:45:37 +03:00
|
|
|
{
|
1999-07-01 12:12:45 +04:00
|
|
|
struct socket *so;
|
|
|
|
|
|
|
|
if (tp->t_inpcb)
|
|
|
|
so = tp->t_inpcb->inp_socket;
|
|
|
|
#ifdef INET6
|
|
|
|
else if (tp->t_in6pcb)
|
|
|
|
so = tp->t_in6pcb->in6p_socket;
|
|
|
|
#endif
|
|
|
|
else
|
|
|
|
so = NULL;
|
1993-03-21 12:45:37 +03:00
|
|
|
|
1994-10-14 19:01:48 +03:00
|
|
|
if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
|
1993-03-21 12:45:37 +03:00
|
|
|
tp = tcp_close(tp);
|
|
|
|
else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
|
|
|
|
tp = tcp_drop(tp, 0);
|
|
|
|
else {
|
|
|
|
soisdisconnecting(so);
|
|
|
|
sbflush(&so->so_rcv);
|
|
|
|
tp = tcp_usrclosed(tp);
|
|
|
|
if (tp)
|
|
|
|
(void) tcp_output(tp);
|
|
|
|
}
|
|
|
|
return (tp);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* User issued close, and wish to trail through shutdown states:
|
|
|
|
* if never received SYN, just forget it. If got a SYN from peer,
|
|
|
|
* but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
|
|
|
|
* If already got a FIN from peer, then almost done; go to LAST_ACK
|
|
|
|
* state. In all other cases, have already sent FIN to peer (e.g.
|
|
|
|
* after PRU_SHUTDOWN), and just have to play tedious game waiting
|
|
|
|
* for peer to send FIN or not respond to keep-alives, etc.
|
|
|
|
* We can let the user exit from the close as soon as the FIN is acked.
|
|
|
|
*/
|
|
|
|
struct tcpcb *
|
2005-02-04 02:50:33 +03:00
|
|
|
tcp_usrclosed(struct tcpcb *tp)
|
1993-03-21 12:45:37 +03:00
|
|
|
{
|
|
|
|
|
|
|
|
switch (tp->t_state) {
|
|
|
|
|
|
|
|
case TCPS_CLOSED:
|
|
|
|
case TCPS_LISTEN:
|
|
|
|
case TCPS_SYN_SENT:
|
|
|
|
tp->t_state = TCPS_CLOSED;
|
|
|
|
tp = tcp_close(tp);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case TCPS_SYN_RECEIVED:
|
|
|
|
case TCPS_ESTABLISHED:
|
|
|
|
tp->t_state = TCPS_FIN_WAIT_1;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case TCPS_CLOSE_WAIT:
|
|
|
|
tp->t_state = TCPS_LAST_ACK;
|
|
|
|
break;
|
|
|
|
}
|
1996-01-31 08:37:29 +03:00
|
|
|
if (tp && tp->t_state >= TCPS_FIN_WAIT_2) {
|
1999-07-01 12:12:45 +04:00
|
|
|
struct socket *so;
|
|
|
|
if (tp->t_inpcb)
|
|
|
|
so = tp->t_inpcb->inp_socket;
|
|
|
|
#ifdef INET6
|
|
|
|
else if (tp->t_in6pcb)
|
|
|
|
so = tp->t_in6pcb->in6p_socket;
|
|
|
|
#endif
|
|
|
|
else
|
|
|
|
so = NULL;
|
2006-04-15 03:17:24 +04:00
|
|
|
if (so)
|
|
|
|
soisdisconnected(so);
|
1996-01-31 08:42:37 +03:00
|
|
|
/*
|
|
|
|
* If we are in FIN_WAIT_2, we arrived here because the
|
|
|
|
* application did a shutdown of the send side. Like the
|
|
|
|
* case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after
|
|
|
|
* a full close, we start a timer to make sure sockets are
|
|
|
|
* not left in FIN_WAIT_2 forever.
|
|
|
|
*/
|
2007-06-20 19:29:17 +04:00
|
|
|
if ((tp->t_state == TCPS_FIN_WAIT_2) && (tp->t_maxidle > 0))
|
|
|
|
TCP_TIMER_ARM(tp, TCPT_2MSL, tp->t_maxidle);
|
Reduces the resources demanded by TCP sessions in TIME_WAIT-state using
methods called Vestigial Time-Wait (VTW) and Maximum Segment Lifetime
Truncation (MSLT).
MSLT and VTW were contributed by Coyote Point Systems, Inc.
Even after a TCP session enters the TIME_WAIT state, its corresponding
socket and protocol control blocks (PCBs) stick around until the TCP
Maximum Segment Lifetime (MSL) expires. On a host whose workload
necessarily creates and closes down many TCP sockets, the sockets & PCBs
for TCP sessions in TIME_WAIT state amount to many megabytes of dead
weight in RAM.
Maximum Segment Lifetimes Truncation (MSLT) assigns each TCP session to
a class based on the nearness of the peer. Corresponding to each class
is an MSL, and a session uses the MSL of its class. The classes are
loopback (local host equals remote host), local (local host and remote
host are on the same link/subnet), and remote (local host and remote
host communicate via one or more gateways). Classes corresponding to
nearer peers have lower MSLs by default: 2 seconds for loopback, 10
seconds for local, 60 seconds for remote. Loopback and local sessions
expire more quickly when MSLT is used.
Vestigial Time-Wait (VTW) replaces a TIME_WAIT session's PCB/socket
dead weight with a compact representation of the session, called a
"vestigial PCB". VTW data structures are designed to be very fast and
memory-efficient: for fast insertion and lookup of vestigial PCBs,
the PCBs are stored in a hash table that is designed to minimize the
number of cacheline visits per lookup/insertion. The memory both
for vestigial PCBs and for elements of the PCB hashtable come from
fixed-size pools, and linked data structures exploit this to conserve
memory by representing references with a narrow index/offset from the
start of a pool instead of a pointer. When space for new vestigial PCBs
runs out, VTW makes room by discarding old vestigial PCBs, oldest first.
VTW cooperates with MSLT.
It may help to think of VTW as a "FIN cache" by analogy to the SYN
cache.
A 2.8-GHz Pentium 4 running a test workload that creates TIME_WAIT
sessions as fast as it can is approximately 17% idle when VTW is active
versus 0% idle when VTW is inactive. It has 103 megabytes more free RAM
when VTW is active (approximately 64k vestigial PCBs are created) than
when it is inactive.
2011-05-03 22:28:44 +04:00
|
|
|
else if (tp->t_state == TCPS_TIME_WAIT
|
|
|
|
&& ((tp->t_inpcb
|
|
|
|
&& (tcp4_vtw_enable & 1)
|
|
|
|
&& vtw_add(AF_INET, tp))
|
|
|
|
||
|
|
|
|
(tp->t_in6pcb
|
|
|
|
&& (tcp6_vtw_enable & 1)
|
|
|
|
&& vtw_add(AF_INET6, tp)))) {
|
|
|
|
tp = 0;
|
|
|
|
}
|
1996-01-31 08:37:29 +03:00
|
|
|
}
|
1993-03-21 12:45:37 +03:00
|
|
|
return (tp);
|
|
|
|
}
|
1995-09-30 10:02:00 +03:00
|
|
|
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
/*
|
|
|
|
* sysctl helper routine for net.inet.ip.mssdflt. it can't be less
|
|
|
|
* than 32.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
sysctl_net_inet_tcp_mssdflt(SYSCTLFN_ARGS)
|
|
|
|
{
|
|
|
|
int error, mssdflt;
|
|
|
|
struct sysctlnode node;
|
|
|
|
|
|
|
|
mssdflt = tcp_mssdflt;
|
|
|
|
node = *rnode;
|
|
|
|
node.sysctl_data = &mssdflt;
|
|
|
|
error = sysctl_lookup(SYSCTLFN_CALL(&node));
|
|
|
|
if (error || newp == NULL)
|
|
|
|
return (error);
|
|
|
|
|
|
|
|
if (mssdflt < 32)
|
|
|
|
return (EINVAL);
|
|
|
|
tcp_mssdflt = mssdflt;
|
|
|
|
|
2013-12-02 13:39:54 +04:00
|
|
|
mutex_enter(softnet_lock);
|
|
|
|
tcp_tcpcb_template();
|
|
|
|
mutex_exit(softnet_lock);
|
|
|
|
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
return (0);
|
|
|
|
}
|
1998-04-30 00:43:29 +04:00
|
|
|
|
2013-12-02 13:39:54 +04:00
|
|
|
/*
|
|
|
|
* sysctl helper for TCP CB template update
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
sysctl_update_tcpcb_template(SYSCTLFN_ARGS)
|
|
|
|
{
|
|
|
|
int t, error;
|
|
|
|
struct sysctlnode node;
|
|
|
|
|
|
|
|
/* follow procedures in sysctl(9) manpage */
|
|
|
|
t = *(int *)rnode->sysctl_data;
|
|
|
|
node = *rnode;
|
|
|
|
node.sysctl_data = &t;
|
|
|
|
error = sysctl_lookup(SYSCTLFN_CALL(&node));
|
|
|
|
if (error || newp == NULL)
|
|
|
|
return error;
|
|
|
|
|
|
|
|
if (t < 0)
|
|
|
|
return EINVAL;
|
|
|
|
|
|
|
|
*(int *)rnode->sysctl_data = t;
|
|
|
|
|
|
|
|
mutex_enter(softnet_lock);
|
|
|
|
tcp_tcpcb_template();
|
|
|
|
mutex_exit(softnet_lock);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
1995-09-30 10:02:00 +03:00
|
|
|
/*
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
* sysctl helper routine for setting port related values under
|
|
|
|
* net.inet.ip and net.inet6.ip6. does basic range checking and does
|
|
|
|
* additional checks for each type. this code has placed in
|
|
|
|
* tcp_input.c since INET and INET6 both use the same tcp code.
|
|
|
|
*
|
|
|
|
* this helper is not static so that both inet and inet6 can use it.
|
1995-09-30 10:02:00 +03:00
|
|
|
*/
|
|
|
|
int
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
sysctl_net_inet_ip_ports(SYSCTLFN_ARGS)
|
1995-09-30 10:02:00 +03:00
|
|
|
{
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
int error, tmp;
|
|
|
|
int apmin, apmax;
|
|
|
|
#ifndef IPNOPRIVPORTS
|
|
|
|
int lpmin, lpmax;
|
|
|
|
#endif /* IPNOPRIVPORTS */
|
|
|
|
struct sysctlnode node;
|
|
|
|
|
|
|
|
if (namelen != 0)
|
|
|
|
return (EINVAL);
|
|
|
|
|
|
|
|
switch (name[-3]) {
|
|
|
|
#ifdef INET
|
|
|
|
case PF_INET:
|
|
|
|
apmin = anonportmin;
|
|
|
|
apmax = anonportmax;
|
|
|
|
#ifndef IPNOPRIVPORTS
|
|
|
|
lpmin = lowportmin;
|
|
|
|
lpmax = lowportmax;
|
|
|
|
#endif /* IPNOPRIVPORTS */
|
|
|
|
break;
|
|
|
|
#endif /* INET */
|
|
|
|
#ifdef INET6
|
|
|
|
case PF_INET6:
|
|
|
|
apmin = ip6_anonportmin;
|
|
|
|
apmax = ip6_anonportmax;
|
|
|
|
#ifndef IPNOPRIVPORTS
|
|
|
|
lpmin = ip6_lowportmin;
|
|
|
|
lpmax = ip6_lowportmax;
|
|
|
|
#endif /* IPNOPRIVPORTS */
|
|
|
|
break;
|
|
|
|
#endif /* INET6 */
|
|
|
|
default:
|
|
|
|
return (EINVAL);
|
1998-09-10 23:53:28 +04:00
|
|
|
}
|
1998-09-10 14:46:03 +04:00
|
|
|
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
/*
|
|
|
|
* insert temporary copy into node, perform lookup on
|
|
|
|
* temporary, then restore pointer
|
|
|
|
*/
|
|
|
|
node = *rnode;
|
|
|
|
tmp = *(int*)rnode->sysctl_data;
|
|
|
|
node.sysctl_data = &tmp;
|
|
|
|
error = sysctl_lookup(SYSCTLFN_CALL(&node));
|
|
|
|
if (error || newp == NULL)
|
|
|
|
return (error);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* simple port range check
|
|
|
|
*/
|
|
|
|
if (tmp < 0 || tmp > 65535)
|
|
|
|
return (EINVAL);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* per-node range checks
|
|
|
|
*/
|
|
|
|
switch (rnode->sysctl_num) {
|
|
|
|
case IPCTL_ANONPORTMIN:
|
2009-02-18 16:18:32 +03:00
|
|
|
case IPV6CTL_ANONPORTMIN:
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
if (tmp >= apmax)
|
|
|
|
return (EINVAL);
|
|
|
|
#ifndef IPNOPRIVPORTS
|
|
|
|
if (tmp < IPPORT_RESERVED)
|
|
|
|
return (EINVAL);
|
|
|
|
#endif /* IPNOPRIVPORTS */
|
|
|
|
break;
|
|
|
|
|
|
|
|
case IPCTL_ANONPORTMAX:
|
2009-02-18 16:18:32 +03:00
|
|
|
case IPV6CTL_ANONPORTMAX:
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
if (apmin >= tmp)
|
|
|
|
return (EINVAL);
|
|
|
|
#ifndef IPNOPRIVPORTS
|
|
|
|
if (tmp < IPPORT_RESERVED)
|
|
|
|
return (EINVAL);
|
|
|
|
#endif /* IPNOPRIVPORTS */
|
|
|
|
break;
|
2003-04-20 00:58:35 +04:00
|
|
|
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
#ifndef IPNOPRIVPORTS
|
|
|
|
case IPCTL_LOWPORTMIN:
|
2009-02-18 16:18:32 +03:00
|
|
|
case IPV6CTL_LOWPORTMIN:
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
if (tmp >= lpmax ||
|
|
|
|
tmp > IPPORT_RESERVEDMAX ||
|
|
|
|
tmp < IPPORT_RESERVEDMIN)
|
|
|
|
return (EINVAL);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case IPCTL_LOWPORTMAX:
|
2009-02-18 16:18:32 +03:00
|
|
|
case IPV6CTL_LOWPORTMAX:
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
if (lpmin >= tmp ||
|
|
|
|
tmp > IPPORT_RESERVEDMAX ||
|
|
|
|
tmp < IPPORT_RESERVEDMIN)
|
|
|
|
return (EINVAL);
|
|
|
|
break;
|
|
|
|
#endif /* IPNOPRIVPORTS */
|
|
|
|
|
|
|
|
default:
|
|
|
|
return (EINVAL);
|
|
|
|
}
|
2003-04-20 00:58:35 +04:00
|
|
|
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
*(int*)rnode->sysctl_data = tmp;
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2007-06-26 03:35:12 +04:00
|
|
|
static inline int
|
|
|
|
copyout_uid(struct socket *sockp, void *oldp, size_t *oldlenp)
|
|
|
|
{
|
|
|
|
if (oldp) {
|
2013-10-04 20:20:35 +04:00
|
|
|
size_t sz;
|
|
|
|
uid_t uid;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
if (sockp->so_cred == NULL)
|
|
|
|
return EPERM;
|
|
|
|
|
|
|
|
uid = kauth_cred_geteuid(sockp->so_cred);
|
2007-06-26 03:35:12 +04:00
|
|
|
sz = MIN(sizeof(uid), *oldlenp);
|
2013-10-04 20:20:35 +04:00
|
|
|
if ((error = copyout(&uid, oldp, sz)) != 0)
|
2007-06-26 03:35:12 +04:00
|
|
|
return error;
|
|
|
|
}
|
2013-10-04 20:20:35 +04:00
|
|
|
*oldlenp = sizeof(uid_t);
|
2007-06-26 03:35:12 +04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int
|
|
|
|
inet4_ident_core(struct in_addr raddr, u_int rport,
|
|
|
|
struct in_addr laddr, u_int lport,
|
|
|
|
void *oldp, size_t *oldlenp,
|
|
|
|
struct lwp *l, int dodrop)
|
|
|
|
{
|
|
|
|
struct inpcb *inp;
|
|
|
|
struct socket *sockp;
|
|
|
|
|
Reduces the resources demanded by TCP sessions in TIME_WAIT-state using
methods called Vestigial Time-Wait (VTW) and Maximum Segment Lifetime
Truncation (MSLT).
MSLT and VTW were contributed by Coyote Point Systems, Inc.
Even after a TCP session enters the TIME_WAIT state, its corresponding
socket and protocol control blocks (PCBs) stick around until the TCP
Maximum Segment Lifetime (MSL) expires. On a host whose workload
necessarily creates and closes down many TCP sockets, the sockets & PCBs
for TCP sessions in TIME_WAIT state amount to many megabytes of dead
weight in RAM.
Maximum Segment Lifetimes Truncation (MSLT) assigns each TCP session to
a class based on the nearness of the peer. Corresponding to each class
is an MSL, and a session uses the MSL of its class. The classes are
loopback (local host equals remote host), local (local host and remote
host are on the same link/subnet), and remote (local host and remote
host communicate via one or more gateways). Classes corresponding to
nearer peers have lower MSLs by default: 2 seconds for loopback, 10
seconds for local, 60 seconds for remote. Loopback and local sessions
expire more quickly when MSLT is used.
Vestigial Time-Wait (VTW) replaces a TIME_WAIT session's PCB/socket
dead weight with a compact representation of the session, called a
"vestigial PCB". VTW data structures are designed to be very fast and
memory-efficient: for fast insertion and lookup of vestigial PCBs,
the PCBs are stored in a hash table that is designed to minimize the
number of cacheline visits per lookup/insertion. The memory both
for vestigial PCBs and for elements of the PCB hashtable come from
fixed-size pools, and linked data structures exploit this to conserve
memory by representing references with a narrow index/offset from the
start of a pool instead of a pointer. When space for new vestigial PCBs
runs out, VTW makes room by discarding old vestigial PCBs, oldest first.
VTW cooperates with MSLT.
It may help to think of VTW as a "FIN cache" by analogy to the SYN
cache.
A 2.8-GHz Pentium 4 running a test workload that creates TIME_WAIT
sessions as fast as it can is approximately 17% idle when VTW is active
versus 0% idle when VTW is inactive. It has 103 megabytes more free RAM
when VTW is active (approximately 64k vestigial PCBs are created) than
when it is inactive.
2011-05-03 22:28:44 +04:00
|
|
|
inp = in_pcblookup_connect(&tcbtable, raddr, rport, laddr, lport, 0);
|
2007-06-26 03:35:12 +04:00
|
|
|
|
|
|
|
if (inp == NULL || (sockp = inp->inp_socket) == NULL)
|
|
|
|
return ESRCH;
|
|
|
|
|
|
|
|
if (dodrop) {
|
|
|
|
struct tcpcb *tp;
|
2009-04-16 00:44:24 +04:00
|
|
|
int error;
|
2007-06-26 03:35:12 +04:00
|
|
|
|
|
|
|
if (inp == NULL || (tp = intotcpcb(inp)) == NULL ||
|
|
|
|
(inp->inp_socket->so_options & SO_ACCEPTCONN) != 0)
|
|
|
|
return ESRCH;
|
2009-04-16 00:44:24 +04:00
|
|
|
|
|
|
|
error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_SOCKET,
|
|
|
|
KAUTH_REQ_NETWORK_SOCKET_DROP, inp->inp_socket, tp, NULL);
|
|
|
|
if (error)
|
|
|
|
return (error);
|
2007-06-26 03:35:12 +04:00
|
|
|
|
|
|
|
(void)tcp_drop(tp, ECONNABORTED);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
return copyout_uid(sockp, oldp, oldlenp);
|
|
|
|
}
|
|
|
|
|
2007-06-26 13:19:36 +04:00
|
|
|
#ifdef INET6
|
2007-06-26 03:35:12 +04:00
|
|
|
static inline int
|
|
|
|
inet6_ident_core(struct in6_addr *raddr, u_int rport,
|
|
|
|
struct in6_addr *laddr, u_int lport,
|
|
|
|
void *oldp, size_t *oldlenp,
|
|
|
|
struct lwp *l, int dodrop)
|
|
|
|
{
|
|
|
|
struct in6pcb *in6p;
|
|
|
|
struct socket *sockp;
|
|
|
|
|
Reduces the resources demanded by TCP sessions in TIME_WAIT-state using
methods called Vestigial Time-Wait (VTW) and Maximum Segment Lifetime
Truncation (MSLT).
MSLT and VTW were contributed by Coyote Point Systems, Inc.
Even after a TCP session enters the TIME_WAIT state, its corresponding
socket and protocol control blocks (PCBs) stick around until the TCP
Maximum Segment Lifetime (MSL) expires. On a host whose workload
necessarily creates and closes down many TCP sockets, the sockets & PCBs
for TCP sessions in TIME_WAIT state amount to many megabytes of dead
weight in RAM.
Maximum Segment Lifetimes Truncation (MSLT) assigns each TCP session to
a class based on the nearness of the peer. Corresponding to each class
is an MSL, and a session uses the MSL of its class. The classes are
loopback (local host equals remote host), local (local host and remote
host are on the same link/subnet), and remote (local host and remote
host communicate via one or more gateways). Classes corresponding to
nearer peers have lower MSLs by default: 2 seconds for loopback, 10
seconds for local, 60 seconds for remote. Loopback and local sessions
expire more quickly when MSLT is used.
Vestigial Time-Wait (VTW) replaces a TIME_WAIT session's PCB/socket
dead weight with a compact representation of the session, called a
"vestigial PCB". VTW data structures are designed to be very fast and
memory-efficient: for fast insertion and lookup of vestigial PCBs,
the PCBs are stored in a hash table that is designed to minimize the
number of cacheline visits per lookup/insertion. The memory both
for vestigial PCBs and for elements of the PCB hashtable come from
fixed-size pools, and linked data structures exploit this to conserve
memory by representing references with a narrow index/offset from the
start of a pool instead of a pointer. When space for new vestigial PCBs
runs out, VTW makes room by discarding old vestigial PCBs, oldest first.
VTW cooperates with MSLT.
It may help to think of VTW as a "FIN cache" by analogy to the SYN
cache.
A 2.8-GHz Pentium 4 running a test workload that creates TIME_WAIT
sessions as fast as it can is approximately 17% idle when VTW is active
versus 0% idle when VTW is inactive. It has 103 megabytes more free RAM
when VTW is active (approximately 64k vestigial PCBs are created) than
when it is inactive.
2011-05-03 22:28:44 +04:00
|
|
|
in6p = in6_pcblookup_connect(&tcbtable, raddr, rport, laddr, lport, 0, 0);
|
2007-06-26 03:35:12 +04:00
|
|
|
|
|
|
|
if (in6p == NULL || (sockp = in6p->in6p_socket) == NULL)
|
|
|
|
return ESRCH;
|
|
|
|
|
|
|
|
if (dodrop) {
|
|
|
|
struct tcpcb *tp;
|
2009-04-16 00:44:24 +04:00
|
|
|
int error;
|
2007-06-26 03:35:12 +04:00
|
|
|
|
|
|
|
if (in6p == NULL || (tp = in6totcpcb(in6p)) == NULL ||
|
|
|
|
(in6p->in6p_socket->so_options & SO_ACCEPTCONN) != 0)
|
|
|
|
return ESRCH;
|
|
|
|
|
2009-04-16 00:44:24 +04:00
|
|
|
error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_SOCKET,
|
|
|
|
KAUTH_REQ_NETWORK_SOCKET_DROP, in6p->in6p_socket, tp, NULL);
|
|
|
|
if (error)
|
|
|
|
return (error);
|
2007-06-26 03:35:12 +04:00
|
|
|
|
|
|
|
(void)tcp_drop(tp, ECONNABORTED);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
return copyout_uid(sockp, oldp, oldlenp);
|
|
|
|
}
|
2007-06-26 13:19:36 +04:00
|
|
|
#endif
|
2007-06-26 03:35:12 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* sysctl helper routine for the net.inet.tcp.drop and
|
|
|
|
* net.inet6.tcp6.drop nodes.
|
|
|
|
*/
|
|
|
|
#define sysctl_net_inet_tcp_drop sysctl_net_inet_tcp_ident
|
|
|
|
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
/*
|
|
|
|
* sysctl helper routine for the net.inet.tcp.ident and
|
|
|
|
* net.inet6.tcp6.ident nodes. contains backwards compat code for the
|
|
|
|
* old way of looking up the ident information for ipv4 which involves
|
|
|
|
* stuffing the port/addr pairs into the mib lookup.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
sysctl_net_inet_tcp_ident(SYSCTLFN_ARGS)
|
2003-04-20 00:58:35 +04:00
|
|
|
{
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
#ifdef INET
|
|
|
|
struct sockaddr_in *si4[2];
|
|
|
|
#endif /* INET */
|
|
|
|
#ifdef INET6
|
|
|
|
struct sockaddr_in6 *si6[2];
|
|
|
|
#endif /* INET6 */
|
|
|
|
struct sockaddr_storage sa[2];
|
2008-08-20 22:35:20 +04:00
|
|
|
int error, pf, dodrop;
|
2007-06-26 03:35:12 +04:00
|
|
|
|
|
|
|
dodrop = name[-1] == TCPCTL_DROP;
|
|
|
|
if (dodrop) {
|
|
|
|
if (oldp != NULL || *oldlenp != 0)
|
|
|
|
return EINVAL;
|
|
|
|
if (newp == NULL)
|
|
|
|
return EPERM;
|
|
|
|
if (newlen < sizeof(sa))
|
|
|
|
return ENOMEM;
|
|
|
|
}
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
if (namelen != 4 && namelen != 0)
|
2007-06-26 03:35:12 +04:00
|
|
|
return EINVAL;
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
if (name[-2] != IPPROTO_TCP)
|
2007-06-26 03:35:12 +04:00
|
|
|
return EINVAL;
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
pf = name[-3];
|
|
|
|
|
|
|
|
/* old style lookup, ipv4 only */
|
|
|
|
if (namelen == 4) {
|
|
|
|
#ifdef INET
|
2004-03-29 08:59:02 +04:00
|
|
|
struct in_addr laddr, raddr;
|
|
|
|
u_int lport, rport;
|
|
|
|
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
if (pf != PF_INET)
|
2007-06-26 03:35:12 +04:00
|
|
|
return EPROTONOSUPPORT;
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
raddr.s_addr = (uint32_t)name[0];
|
|
|
|
rport = (u_int)name[1];
|
|
|
|
laddr.s_addr = (uint32_t)name[2];
|
|
|
|
lport = (u_int)name[3];
|
2007-06-26 03:35:12 +04:00
|
|
|
|
2008-08-20 22:35:20 +04:00
|
|
|
mutex_enter(softnet_lock);
|
2007-06-26 03:35:12 +04:00
|
|
|
error = inet4_ident_core(raddr, rport, laddr, lport,
|
|
|
|
oldp, oldlenp, l, dodrop);
|
2008-08-20 22:35:20 +04:00
|
|
|
mutex_exit(softnet_lock);
|
2007-06-26 03:35:12 +04:00
|
|
|
return error;
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
#else /* INET */
|
2007-06-26 03:35:12 +04:00
|
|
|
return EINVAL;
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
#endif /* INET */
|
|
|
|
}
|
|
|
|
|
|
|
|
if (newp == NULL || newlen != sizeof(sa))
|
2007-06-26 03:35:12 +04:00
|
|
|
return EINVAL;
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
error = copyin(newp, &sa, newlen);
|
|
|
|
if (error)
|
2007-06-26 03:35:12 +04:00
|
|
|
return error;
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* requested families must match
|
|
|
|
*/
|
|
|
|
if (pf != sa[0].ss_family || sa[0].ss_family != sa[1].ss_family)
|
2007-06-26 03:35:12 +04:00
|
|
|
return EINVAL;
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
|
|
|
|
switch (pf) {
|
2007-06-29 01:11:12 +04:00
|
|
|
#ifdef INET6
|
|
|
|
case PF_INET6:
|
|
|
|
si6[0] = (struct sockaddr_in6*)&sa[0];
|
|
|
|
si6[1] = (struct sockaddr_in6*)&sa[1];
|
|
|
|
if (si6[0]->sin6_len != sizeof(*si6[0]) ||
|
|
|
|
si6[1]->sin6_len != sizeof(*si6[1]))
|
|
|
|
return EINVAL;
|
|
|
|
|
|
|
|
if (!IN6_IS_ADDR_V4MAPPED(&si6[0]->sin6_addr) &&
|
|
|
|
!IN6_IS_ADDR_V4MAPPED(&si6[1]->sin6_addr)) {
|
|
|
|
error = sa6_embedscope(si6[0], ip6_use_defzone);
|
|
|
|
if (error)
|
|
|
|
return error;
|
|
|
|
error = sa6_embedscope(si6[1], ip6_use_defzone);
|
|
|
|
if (error)
|
|
|
|
return error;
|
|
|
|
|
2008-08-20 22:35:20 +04:00
|
|
|
mutex_enter(softnet_lock);
|
2007-06-29 01:11:12 +04:00
|
|
|
error = inet6_ident_core(&si6[0]->sin6_addr,
|
|
|
|
si6[0]->sin6_port, &si6[1]->sin6_addr,
|
|
|
|
si6[1]->sin6_port, oldp, oldlenp, l, dodrop);
|
2008-08-20 22:35:20 +04:00
|
|
|
mutex_exit(softnet_lock);
|
2007-06-29 01:11:12 +04:00
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (IN6_IS_ADDR_V4MAPPED(&si6[0]->sin6_addr) !=
|
|
|
|
IN6_IS_ADDR_V4MAPPED(&si6[1]->sin6_addr))
|
|
|
|
return EINVAL;
|
|
|
|
|
|
|
|
in6_sin6_2_sin_in_sock((struct sockaddr *)&sa[0]);
|
|
|
|
in6_sin6_2_sin_in_sock((struct sockaddr *)&sa[1]);
|
|
|
|
/*FALLTHROUGH*/
|
|
|
|
#endif /* INET6 */
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
#ifdef INET
|
2007-06-26 03:35:12 +04:00
|
|
|
case PF_INET:
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
si4[0] = (struct sockaddr_in*)&sa[0];
|
|
|
|
si4[1] = (struct sockaddr_in*)&sa[1];
|
|
|
|
if (si4[0]->sin_len != sizeof(*si4[0]) ||
|
2007-06-29 01:11:12 +04:00
|
|
|
si4[0]->sin_len != sizeof(*si4[1]))
|
2007-06-26 03:35:12 +04:00
|
|
|
return EINVAL;
|
|
|
|
|
2008-08-20 22:35:20 +04:00
|
|
|
mutex_enter(softnet_lock);
|
2007-06-26 03:35:12 +04:00
|
|
|
error = inet4_ident_core(si4[0]->sin_addr, si4[0]->sin_port,
|
|
|
|
si4[1]->sin_addr, si4[1]->sin_port,
|
|
|
|
oldp, oldlenp, l, dodrop);
|
2008-08-20 22:35:20 +04:00
|
|
|
mutex_exit(softnet_lock);
|
2007-06-26 03:35:12 +04:00
|
|
|
return error;
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
#endif /* INET */
|
2007-06-26 03:35:12 +04:00
|
|
|
default:
|
|
|
|
return EPROTONOSUPPORT;
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-03-09 08:07:19 +03:00
|
|
|
/*
|
|
|
|
* sysctl helper for the inet and inet6 pcblists. handles tcp/udp and
|
|
|
|
* inet/inet6, as well as raw pcbs for each. specifically not
|
|
|
|
* declared static so that raw sockets and udp/udp6 can use it as
|
|
|
|
* well.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
sysctl_inpcblist(SYSCTLFN_ARGS)
|
|
|
|
{
|
|
|
|
#ifdef INET
|
|
|
|
struct sockaddr_in *in;
|
2005-05-30 01:41:23 +04:00
|
|
|
const struct inpcb *inp;
|
2005-03-09 08:07:19 +03:00
|
|
|
#endif
|
|
|
|
#ifdef INET6
|
|
|
|
struct sockaddr_in6 *in6;
|
2005-05-30 01:41:23 +04:00
|
|
|
const struct in6pcb *in6p;
|
2005-03-09 08:07:19 +03:00
|
|
|
#endif
|
2005-05-30 01:41:23 +04:00
|
|
|
struct inpcbtable *pcbtbl = __UNCONST(rnode->sysctl_data);
|
|
|
|
const struct inpcb_hdr *inph;
|
2005-03-09 08:07:19 +03:00
|
|
|
struct tcpcb *tp;
|
|
|
|
struct kinfo_pcb pcb;
|
|
|
|
char *dp;
|
|
|
|
size_t len, needed, elem_size, out_size;
|
|
|
|
int error, elem_count, pf, proto, pf2;
|
|
|
|
|
|
|
|
if (namelen != 4)
|
|
|
|
return (EINVAL);
|
|
|
|
|
2006-04-15 03:09:16 +04:00
|
|
|
if (oldp != NULL) {
|
|
|
|
len = *oldlenp;
|
|
|
|
elem_size = name[2];
|
|
|
|
elem_count = name[3];
|
|
|
|
if (elem_size != sizeof(pcb))
|
|
|
|
return EINVAL;
|
|
|
|
} else {
|
|
|
|
len = 0;
|
|
|
|
elem_count = INT_MAX;
|
|
|
|
elem_size = sizeof(pcb);
|
|
|
|
}
|
2005-03-09 08:07:19 +03:00
|
|
|
error = 0;
|
|
|
|
dp = oldp;
|
2006-04-15 03:09:16 +04:00
|
|
|
out_size = elem_size;
|
2005-03-09 08:07:19 +03:00
|
|
|
needed = 0;
|
|
|
|
|
|
|
|
if (namelen == 1 && name[0] == CTL_QUERY)
|
2005-06-09 06:19:59 +04:00
|
|
|
return (sysctl_query(SYSCTLFN_CALL(rnode)));
|
2005-03-09 08:07:19 +03:00
|
|
|
|
|
|
|
if (name - oname != 4)
|
|
|
|
return (EINVAL);
|
|
|
|
|
|
|
|
pf = oname[1];
|
|
|
|
proto = oname[2];
|
2006-04-15 04:29:25 +04:00
|
|
|
pf2 = (oldp != NULL) ? pf : 0;
|
2005-03-09 08:07:19 +03:00
|
|
|
|
2008-08-20 22:35:20 +04:00
|
|
|
mutex_enter(softnet_lock);
|
|
|
|
|
2013-11-23 18:20:21 +04:00
|
|
|
TAILQ_FOREACH(inph, &pcbtbl->inpt_queue, inph_queue) {
|
2005-03-10 08:49:14 +03:00
|
|
|
#ifdef INET
|
2005-05-30 01:41:23 +04:00
|
|
|
inp = (const struct inpcb *)inph;
|
2005-03-10 08:49:14 +03:00
|
|
|
#endif
|
|
|
|
#ifdef INET6
|
2005-05-30 01:41:23 +04:00
|
|
|
in6p = (const struct in6pcb *)inph;
|
2005-03-10 08:49:14 +03:00
|
|
|
#endif
|
2005-03-09 08:07:19 +03:00
|
|
|
|
2005-03-10 08:49:14 +03:00
|
|
|
if (inph->inph_af != pf)
|
2005-03-09 08:07:19 +03:00
|
|
|
continue;
|
|
|
|
|
2006-10-13 19:39:18 +04:00
|
|
|
if (kauth_authorize_network(l->l_cred, KAUTH_NETWORK_SOCKET,
|
|
|
|
KAUTH_REQ_NETWORK_SOCKET_CANSEE, inph->inph_socket, NULL,
|
|
|
|
NULL) != 0)
|
2005-09-07 21:58:13 +04:00
|
|
|
continue;
|
|
|
|
|
2005-03-09 08:07:19 +03:00
|
|
|
memset(&pcb, 0, sizeof(pcb));
|
|
|
|
|
|
|
|
pcb.ki_family = pf;
|
|
|
|
pcb.ki_type = proto;
|
|
|
|
|
|
|
|
switch (pf2) {
|
|
|
|
case 0:
|
|
|
|
/* just probing for size */
|
|
|
|
break;
|
|
|
|
#ifdef INET
|
|
|
|
case PF_INET:
|
|
|
|
pcb.ki_family = inp->inp_socket->so_proto->
|
|
|
|
pr_domain->dom_family;
|
|
|
|
pcb.ki_type = inp->inp_socket->so_proto->
|
|
|
|
pr_type;
|
|
|
|
pcb.ki_protocol = inp->inp_socket->so_proto->
|
|
|
|
pr_protocol;
|
|
|
|
pcb.ki_pflags = inp->inp_flags;
|
|
|
|
|
|
|
|
pcb.ki_sostate = inp->inp_socket->so_state;
|
|
|
|
pcb.ki_prstate = inp->inp_state;
|
|
|
|
if (proto == IPPROTO_TCP) {
|
|
|
|
tp = intotcpcb(inp);
|
|
|
|
pcb.ki_tstate = tp->t_state;
|
|
|
|
pcb.ki_tflags = tp->t_flags;
|
|
|
|
}
|
|
|
|
|
|
|
|
pcb.ki_pcbaddr = PTRTOUINT64(inp);
|
|
|
|
pcb.ki_ppcbaddr = PTRTOUINT64(inp->inp_ppcb);
|
|
|
|
pcb.ki_sockaddr = PTRTOUINT64(inp->inp_socket);
|
|
|
|
|
|
|
|
pcb.ki_rcvq = inp->inp_socket->so_rcv.sb_cc;
|
|
|
|
pcb.ki_sndq = inp->inp_socket->so_snd.sb_cc;
|
|
|
|
|
|
|
|
in = satosin(&pcb.ki_src);
|
|
|
|
in->sin_len = sizeof(*in);
|
|
|
|
in->sin_family = pf;
|
|
|
|
in->sin_port = inp->inp_lport;
|
|
|
|
in->sin_addr = inp->inp_laddr;
|
|
|
|
if (pcb.ki_prstate >= INP_CONNECTED) {
|
|
|
|
in = satosin(&pcb.ki_dst);
|
|
|
|
in->sin_len = sizeof(*in);
|
|
|
|
in->sin_family = pf;
|
|
|
|
in->sin_port = inp->inp_fport;
|
|
|
|
in->sin_addr = inp->inp_faddr;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
#endif
|
|
|
|
#ifdef INET6
|
|
|
|
case PF_INET6:
|
|
|
|
pcb.ki_family = in6p->in6p_socket->so_proto->
|
|
|
|
pr_domain->dom_family;
|
|
|
|
pcb.ki_type = in6p->in6p_socket->so_proto->pr_type;
|
|
|
|
pcb.ki_protocol = in6p->in6p_socket->so_proto->
|
|
|
|
pr_protocol;
|
|
|
|
pcb.ki_pflags = in6p->in6p_flags;
|
|
|
|
|
|
|
|
pcb.ki_sostate = in6p->in6p_socket->so_state;
|
|
|
|
pcb.ki_prstate = in6p->in6p_state;
|
|
|
|
if (proto == IPPROTO_TCP) {
|
|
|
|
tp = in6totcpcb(in6p);
|
|
|
|
pcb.ki_tstate = tp->t_state;
|
|
|
|
pcb.ki_tflags = tp->t_flags;
|
|
|
|
}
|
|
|
|
|
|
|
|
pcb.ki_pcbaddr = PTRTOUINT64(in6p);
|
|
|
|
pcb.ki_ppcbaddr = PTRTOUINT64(in6p->in6p_ppcb);
|
|
|
|
pcb.ki_sockaddr = PTRTOUINT64(in6p->in6p_socket);
|
|
|
|
|
|
|
|
pcb.ki_rcvq = in6p->in6p_socket->so_rcv.sb_cc;
|
|
|
|
pcb.ki_sndq = in6p->in6p_socket->so_snd.sb_cc;
|
|
|
|
|
|
|
|
in6 = satosin6(&pcb.ki_src);
|
|
|
|
in6->sin6_len = sizeof(*in6);
|
|
|
|
in6->sin6_family = pf;
|
|
|
|
in6->sin6_port = in6p->in6p_lport;
|
|
|
|
in6->sin6_flowinfo = in6p->in6p_flowinfo;
|
|
|
|
in6->sin6_addr = in6p->in6p_laddr;
|
|
|
|
in6->sin6_scope_id = 0; /* XXX? */
|
|
|
|
|
|
|
|
if (pcb.ki_prstate >= IN6P_CONNECTED) {
|
|
|
|
in6 = satosin6(&pcb.ki_dst);
|
|
|
|
in6->sin6_len = sizeof(*in6);
|
|
|
|
in6->sin6_family = pf;
|
|
|
|
in6->sin6_port = in6p->in6p_fport;
|
|
|
|
in6->sin6_flowinfo = in6p->in6p_flowinfo;
|
|
|
|
in6->sin6_addr = in6p->in6p_faddr;
|
|
|
|
in6->sin6_scope_id = 0; /* XXX? */
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
if (len >= elem_size && elem_count > 0) {
|
|
|
|
error = copyout(&pcb, dp, out_size);
|
2009-06-07 20:20:29 +04:00
|
|
|
if (error) {
|
|
|
|
mutex_exit(softnet_lock);
|
2005-03-09 08:07:19 +03:00
|
|
|
return (error);
|
2009-06-07 20:20:29 +04:00
|
|
|
}
|
2005-03-09 08:07:19 +03:00
|
|
|
dp += elem_size;
|
|
|
|
len -= elem_size;
|
|
|
|
}
|
2009-03-11 08:55:22 +03:00
|
|
|
needed += elem_size;
|
|
|
|
if (elem_count > 0 && elem_count != INT_MAX)
|
|
|
|
elem_count--;
|
2005-03-09 08:07:19 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
*oldlenp = needed;
|
|
|
|
if (oldp == NULL)
|
|
|
|
*oldlenp += PCB_SLOP * sizeof(struct kinfo_pcb);
|
|
|
|
|
2008-08-20 22:35:20 +04:00
|
|
|
mutex_exit(softnet_lock);
|
|
|
|
|
2005-03-09 08:07:19 +03:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
2006-10-09 20:27:07 +04:00
|
|
|
static int
|
|
|
|
sysctl_tcp_congctl(SYSCTLFN_ARGS)
|
|
|
|
{
|
|
|
|
struct sysctlnode node;
|
2008-08-20 22:35:20 +04:00
|
|
|
int error;
|
2006-10-09 20:27:07 +04:00
|
|
|
char newname[TCPCC_MAXLEN];
|
|
|
|
|
|
|
|
strlcpy(newname, tcp_congctl_global_name, sizeof(newname) - 1);
|
|
|
|
|
|
|
|
node = *rnode;
|
|
|
|
node.sysctl_data = newname;
|
|
|
|
node.sysctl_size = sizeof(newname);
|
|
|
|
|
|
|
|
error = sysctl_lookup(SYSCTLFN_CALL(&node));
|
|
|
|
|
|
|
|
if (error ||
|
|
|
|
newp == NULL ||
|
|
|
|
strncmp(newname, tcp_congctl_global_name, sizeof(newname)) == 0)
|
|
|
|
return error;
|
|
|
|
|
2008-08-20 22:35:20 +04:00
|
|
|
mutex_enter(softnet_lock);
|
|
|
|
error = tcp_congctl_select(NULL, newname);
|
|
|
|
mutex_exit(softnet_lock);
|
|
|
|
|
2006-10-09 20:27:07 +04:00
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
2013-04-10 04:16:03 +04:00
|
|
|
static int
|
|
|
|
sysctl_tcp_init_win(SYSCTLFN_ARGS)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
u_int iw;
|
|
|
|
struct sysctlnode node;
|
|
|
|
|
|
|
|
iw = *(u_int *)rnode->sysctl_data;
|
|
|
|
node = *rnode;
|
|
|
|
node.sysctl_data = &iw;
|
|
|
|
node.sysctl_size = sizeof(iw);
|
|
|
|
error = sysctl_lookup(SYSCTLFN_CALL(&node));
|
|
|
|
if (error || newp == NULL)
|
|
|
|
return error;
|
|
|
|
|
|
|
|
if (iw >= __arraycount(tcp_init_win_max))
|
|
|
|
return EINVAL;
|
|
|
|
*(u_int *)rnode->sysctl_data = iw;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2007-06-20 19:29:17 +04:00
|
|
|
static int
|
|
|
|
sysctl_tcp_keep(SYSCTLFN_ARGS)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
u_int tmp;
|
|
|
|
struct sysctlnode node;
|
|
|
|
|
|
|
|
node = *rnode;
|
|
|
|
tmp = *(u_int *)rnode->sysctl_data;
|
|
|
|
node.sysctl_data = &tmp;
|
|
|
|
|
|
|
|
error = sysctl_lookup(SYSCTLFN_CALL(&node));
|
|
|
|
if (error || newp == NULL)
|
|
|
|
return error;
|
|
|
|
|
2008-08-20 22:35:20 +04:00
|
|
|
mutex_enter(softnet_lock);
|
|
|
|
|
2007-06-20 19:29:17 +04:00
|
|
|
*(u_int *)rnode->sysctl_data = tmp;
|
|
|
|
tcp_tcpcb_template(); /* update the template */
|
2008-08-20 22:35:20 +04:00
|
|
|
|
|
|
|
mutex_exit(softnet_lock);
|
2007-06-20 19:29:17 +04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2008-04-12 09:58:22 +04:00
|
|
|
static int
|
|
|
|
sysctl_net_inet_tcp_stats(SYSCTLFN_ARGS)
|
|
|
|
{
|
|
|
|
|
2008-05-04 11:22:14 +04:00
|
|
|
return (NETSTAT_SYSCTL(tcpstat_percpu, TCP_NSTATS));
|
2008-04-12 09:58:22 +04:00
|
|
|
}
|
2007-06-20 19:29:17 +04:00
|
|
|
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
/*
|
|
|
|
* this (second stage) setup routine is a replacement for tcp_sysctl()
|
|
|
|
* (which is currently used for ipv4 and ipv6)
|
|
|
|
*/
|
|
|
|
static void
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_net_inet_tcp_setup2(struct sysctllog **clog, int pf, const char *pfname,
|
|
|
|
const char *tcpname)
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
{
|
2006-10-19 18:14:34 +04:00
|
|
|
const struct sysctlnode *sack_node;
|
2006-10-19 15:40:51 +04:00
|
|
|
const struct sysctlnode *abc_node;
|
2006-10-19 18:14:34 +04:00
|
|
|
const struct sysctlnode *ecn_node;
|
|
|
|
const struct sysctlnode *congctl_node;
|
Reduces the resources demanded by TCP sessions in TIME_WAIT-state using
methods called Vestigial Time-Wait (VTW) and Maximum Segment Lifetime
Truncation (MSLT).
MSLT and VTW were contributed by Coyote Point Systems, Inc.
Even after a TCP session enters the TIME_WAIT state, its corresponding
socket and protocol control blocks (PCBs) stick around until the TCP
Maximum Segment Lifetime (MSL) expires. On a host whose workload
necessarily creates and closes down many TCP sockets, the sockets & PCBs
for TCP sessions in TIME_WAIT state amount to many megabytes of dead
weight in RAM.
Maximum Segment Lifetimes Truncation (MSLT) assigns each TCP session to
a class based on the nearness of the peer. Corresponding to each class
is an MSL, and a session uses the MSL of its class. The classes are
loopback (local host equals remote host), local (local host and remote
host are on the same link/subnet), and remote (local host and remote
host communicate via one or more gateways). Classes corresponding to
nearer peers have lower MSLs by default: 2 seconds for loopback, 10
seconds for local, 60 seconds for remote. Loopback and local sessions
expire more quickly when MSLT is used.
Vestigial Time-Wait (VTW) replaces a TIME_WAIT session's PCB/socket
dead weight with a compact representation of the session, called a
"vestigial PCB". VTW data structures are designed to be very fast and
memory-efficient: for fast insertion and lookup of vestigial PCBs,
the PCBs are stored in a hash table that is designed to minimize the
number of cacheline visits per lookup/insertion. The memory both
for vestigial PCBs and for elements of the PCB hashtable come from
fixed-size pools, and linked data structures exploit this to conserve
memory by representing references with a narrow index/offset from the
start of a pool instead of a pointer. When space for new vestigial PCBs
runs out, VTW makes room by discarding old vestigial PCBs, oldest first.
VTW cooperates with MSLT.
It may help to think of VTW as a "FIN cache" by analogy to the SYN
cache.
A 2.8-GHz Pentium 4 running a test workload that creates TIME_WAIT
sessions as fast as it can is approximately 17% idle when VTW is active
versus 0% idle when VTW is inactive. It has 103 megabytes more free RAM
when VTW is active (approximately 64k vestigial PCBs are created) than
when it is inactive.
2011-05-03 22:28:44 +04:00
|
|
|
const struct sysctlnode *mslt_node;
|
|
|
|
const struct sysctlnode *vtw_node;
|
2005-09-06 06:41:14 +04:00
|
|
|
#ifdef TCP_DEBUG
|
|
|
|
extern struct tcp_debug tcp_debug[TCP_NDEBUG];
|
|
|
|
extern int tcp_debx;
|
|
|
|
#endif
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT,
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
CTLTYPE_NODE, pfname, NULL,
|
|
|
|
NULL, 0, NULL, 0,
|
|
|
|
CTL_NET, pf, CTL_EOL);
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT,
|
2004-05-25 08:33:59 +04:00
|
|
|
CTLTYPE_NODE, tcpname,
|
|
|
|
SYSCTL_DESCR("TCP related settings"),
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
NULL, 0, NULL, 0,
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, CTL_EOL);
|
|
|
|
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
2004-05-25 08:33:59 +04:00
|
|
|
CTLTYPE_INT, "rfc1323",
|
|
|
|
SYSCTL_DESCR("Enable RFC1323 TCP extensions"),
|
2013-12-02 13:39:54 +04:00
|
|
|
sysctl_update_tcpcb_template, 0, &tcp_do_rfc1323, 0,
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
CTL_NET, pf, IPPROTO_TCP, TCPCTL_RFC1323, CTL_EOL);
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
2004-05-25 08:33:59 +04:00
|
|
|
CTLTYPE_INT, "sendspace",
|
|
|
|
SYSCTL_DESCR("Default TCP send buffer size"),
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
NULL, 0, &tcp_sendspace, 0,
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, TCPCTL_SENDSPACE, CTL_EOL);
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
2004-05-25 08:33:59 +04:00
|
|
|
CTLTYPE_INT, "recvspace",
|
|
|
|
SYSCTL_DESCR("Default TCP receive buffer size"),
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
NULL, 0, &tcp_recvspace, 0,
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, TCPCTL_RECVSPACE, CTL_EOL);
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
2004-05-25 08:33:59 +04:00
|
|
|
CTLTYPE_INT, "mssdflt",
|
|
|
|
SYSCTL_DESCR("Default maximum segment size"),
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
sysctl_net_inet_tcp_mssdflt, 0, &tcp_mssdflt, 0,
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, TCPCTL_MSSDFLT, CTL_EOL);
|
2007-08-02 06:42:40 +04:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
|
|
|
CTLTYPE_INT, "minmss",
|
|
|
|
SYSCTL_DESCR("Lower limit for TCP maximum segment size"),
|
|
|
|
NULL, 0, &tcp_minmss, 0,
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
|
2009-09-10 02:41:28 +04:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
|
|
|
CTLTYPE_INT, "msl",
|
|
|
|
SYSCTL_DESCR("Maximum Segment Life"),
|
|
|
|
NULL, 0, &tcp_msl, 0,
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, TCPCTL_MSL, CTL_EOL);
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
2004-05-25 08:33:59 +04:00
|
|
|
CTLTYPE_INT, "syn_cache_limit",
|
|
|
|
SYSCTL_DESCR("Maximum number of entries in the TCP "
|
|
|
|
"compressed state engine"),
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
NULL, 0, &tcp_syn_cache_limit, 0,
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_CACHE_LIMIT,
|
|
|
|
CTL_EOL);
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
2004-05-25 08:33:59 +04:00
|
|
|
CTLTYPE_INT, "syn_bucket_limit",
|
|
|
|
SYSCTL_DESCR("Maximum number of entries per hash "
|
|
|
|
"bucket in the TCP compressed state "
|
|
|
|
"engine"),
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
NULL, 0, &tcp_syn_bucket_limit, 0,
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_BUCKET_LIMIT,
|
|
|
|
CTL_EOL);
|
|
|
|
#if 0 /* obsoleted */
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
2004-05-25 08:33:59 +04:00
|
|
|
CTLTYPE_INT, "syn_cache_interval",
|
|
|
|
SYSCTL_DESCR("TCP compressed state engine's timer interval"),
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
NULL, 0, &tcp_syn_cache_interval, 0,
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_CACHE_INTER,
|
|
|
|
CTL_EOL);
|
|
|
|
#endif
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
2004-05-25 08:33:59 +04:00
|
|
|
CTLTYPE_INT, "init_win",
|
|
|
|
SYSCTL_DESCR("Initial TCP congestion window"),
|
2013-04-10 04:16:03 +04:00
|
|
|
sysctl_tcp_init_win, 0, &tcp_init_win, 0,
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
CTL_NET, pf, IPPROTO_TCP, TCPCTL_INIT_WIN, CTL_EOL);
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
2004-05-25 08:33:59 +04:00
|
|
|
CTLTYPE_INT, "mss_ifmtu",
|
|
|
|
SYSCTL_DESCR("Use interface MTU for calculating MSS"),
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
NULL, 0, &tcp_mss_ifmtu, 0,
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, TCPCTL_MSS_IFMTU, CTL_EOL);
|
2005-04-05 05:07:17 +04:00
|
|
|
sysctl_createv(clog, 0, NULL, &sack_node,
|
|
|
|
CTLFLAG_PERMANENT,
|
|
|
|
CTLTYPE_NODE, "sack",
|
|
|
|
SYSCTL_DESCR("RFC2018 Selective ACKnowledgement tunables"),
|
|
|
|
NULL, 0, NULL, 0,
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_EOL);
|
2006-10-19 18:14:34 +04:00
|
|
|
|
|
|
|
/* Congctl subtree */
|
|
|
|
sysctl_createv(clog, 0, NULL, &congctl_node,
|
2006-10-09 20:27:07 +04:00
|
|
|
CTLFLAG_PERMANENT,
|
|
|
|
CTLTYPE_NODE, "congctl",
|
|
|
|
SYSCTL_DESCR("TCP Congestion Control"),
|
2006-10-19 18:14:34 +04:00
|
|
|
NULL, 0, NULL, 0,
|
2006-10-09 20:27:07 +04:00
|
|
|
CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
|
2006-10-19 18:14:34 +04:00
|
|
|
sysctl_createv(clog, 0, &congctl_node, NULL,
|
2006-10-09 20:27:07 +04:00
|
|
|
CTLFLAG_PERMANENT,
|
|
|
|
CTLTYPE_STRING, "available",
|
|
|
|
SYSCTL_DESCR("Available Congestion Control Mechanisms"),
|
2012-06-03 01:36:41 +04:00
|
|
|
NULL, 0, tcp_congctl_avail, 0, CTL_CREATE, CTL_EOL);
|
2006-10-19 18:14:34 +04:00
|
|
|
sysctl_createv(clog, 0, &congctl_node, NULL,
|
2006-10-09 20:27:07 +04:00
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
|
|
|
CTLTYPE_STRING, "selected",
|
|
|
|
SYSCTL_DESCR("Selected Congestion Control Mechanism"),
|
|
|
|
sysctl_tcp_congctl, 0, NULL, TCPCC_MAXLEN,
|
|
|
|
CTL_CREATE, CTL_EOL);
|
|
|
|
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
2004-05-25 08:33:59 +04:00
|
|
|
CTLTYPE_INT, "win_scale",
|
|
|
|
SYSCTL_DESCR("Use RFC1323 window scale options"),
|
2013-12-02 13:39:54 +04:00
|
|
|
sysctl_update_tcpcb_template, 0, &tcp_do_win_scale, 0,
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
CTL_NET, pf, IPPROTO_TCP, TCPCTL_WSCALE, CTL_EOL);
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
2004-05-25 08:33:59 +04:00
|
|
|
CTLTYPE_INT, "timestamps",
|
|
|
|
SYSCTL_DESCR("Use RFC1323 time stamp options"),
|
2013-12-02 13:39:54 +04:00
|
|
|
sysctl_update_tcpcb_template, 0, &tcp_do_timestamps, 0,
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
CTL_NET, pf, IPPROTO_TCP, TCPCTL_TSTAMP, CTL_EOL);
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
2004-05-25 08:33:59 +04:00
|
|
|
CTLTYPE_INT, "compat_42",
|
|
|
|
SYSCTL_DESCR("Enable workarounds for 4.2BSD TCP bugs"),
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
NULL, 0, &tcp_compat_42, 0,
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, TCPCTL_COMPAT_42, CTL_EOL);
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
2004-05-25 08:33:59 +04:00
|
|
|
CTLTYPE_INT, "cwm",
|
|
|
|
SYSCTL_DESCR("Hughes/Touch/Heidemann Congestion Window "
|
|
|
|
"Monitoring"),
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
NULL, 0, &tcp_cwm, 0,
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, TCPCTL_CWM, CTL_EOL);
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
2004-05-25 08:33:59 +04:00
|
|
|
CTLTYPE_INT, "cwm_burstsize",
|
|
|
|
SYSCTL_DESCR("Congestion Window Monitoring allowed "
|
|
|
|
"burst count in packets"),
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
NULL, 0, &tcp_cwm_burstsize, 0,
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, TCPCTL_CWM_BURSTSIZE,
|
|
|
|
CTL_EOL);
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
2004-05-25 08:33:59 +04:00
|
|
|
CTLTYPE_INT, "ack_on_push",
|
|
|
|
SYSCTL_DESCR("Immediately return ACK when PSH is "
|
|
|
|
"received"),
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
NULL, 0, &tcp_ack_on_push, 0,
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, TCPCTL_ACK_ON_PUSH, CTL_EOL);
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
2004-05-25 08:33:59 +04:00
|
|
|
CTLTYPE_INT, "keepidle",
|
|
|
|
SYSCTL_DESCR("Allowed connection idle ticks before a "
|
|
|
|
"keepalive probe is sent"),
|
2007-06-20 19:29:17 +04:00
|
|
|
sysctl_tcp_keep, 0, &tcp_keepidle, 0,
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPIDLE, CTL_EOL);
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
2004-05-25 08:33:59 +04:00
|
|
|
CTLTYPE_INT, "keepintvl",
|
|
|
|
SYSCTL_DESCR("Ticks before next keepalive probe is sent"),
|
2007-06-20 19:29:17 +04:00
|
|
|
sysctl_tcp_keep, 0, &tcp_keepintvl, 0,
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPINTVL, CTL_EOL);
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
2004-05-25 08:33:59 +04:00
|
|
|
CTLTYPE_INT, "keepcnt",
|
|
|
|
SYSCTL_DESCR("Number of keepalive probes to send"),
|
2007-06-20 19:29:17 +04:00
|
|
|
sysctl_tcp_keep, 0, &tcp_keepcnt, 0,
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPCNT, CTL_EOL);
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
|
2004-05-25 08:33:59 +04:00
|
|
|
CTLTYPE_INT, "slowhz",
|
|
|
|
SYSCTL_DESCR("Keepalive ticks per second"),
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
NULL, PR_SLOWHZ, NULL, 0,
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, TCPCTL_SLOWHZ, CTL_EOL);
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
2004-05-25 08:33:59 +04:00
|
|
|
CTLTYPE_INT, "log_refused",
|
|
|
|
SYSCTL_DESCR("Log refused TCP connections"),
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
NULL, 0, &tcp_log_refused, 0,
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, TCPCTL_LOG_REFUSED, CTL_EOL);
|
|
|
|
#if 0 /* obsoleted */
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
CTLTYPE_INT, "rstratelimit", NULL,
|
|
|
|
NULL, 0, &tcp_rst_ratelim, 0,
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, TCPCTL_RSTRATELIMIT, CTL_EOL);
|
|
|
|
#endif
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
2004-05-25 08:33:59 +04:00
|
|
|
CTLTYPE_INT, "rstppslimit",
|
|
|
|
SYSCTL_DESCR("Maximum number of RST packets to send "
|
|
|
|
"per second"),
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
NULL, 0, &tcp_rst_ppslim, 0,
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, TCPCTL_RSTPPSLIMIT, CTL_EOL);
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
2004-05-25 08:33:59 +04:00
|
|
|
CTLTYPE_INT, "delack_ticks",
|
|
|
|
SYSCTL_DESCR("Number of ticks to delay sending an ACK"),
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
NULL, 0, &tcp_delack_ticks, 0,
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, TCPCTL_DELACK_TICKS, CTL_EOL);
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
2004-05-25 08:33:59 +04:00
|
|
|
CTLTYPE_INT, "init_win_local",
|
|
|
|
SYSCTL_DESCR("Initial TCP window size (in segments)"),
|
2013-04-10 04:16:03 +04:00
|
|
|
sysctl_tcp_init_win, 0, &tcp_init_win_local, 0,
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
CTL_NET, pf, IPPROTO_TCP, TCPCTL_INIT_WIN_LOCAL,
|
|
|
|
CTL_EOL);
|
2004-03-24 18:34:46 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
2004-05-25 08:33:59 +04:00
|
|
|
CTLTYPE_STRUCT, "ident",
|
|
|
|
SYSCTL_DESCR("RFC1413 Identification Protocol lookups"),
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
sysctl_net_inet_tcp_ident, 0, NULL, sizeof(uid_t),
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, TCPCTL_IDENT, CTL_EOL);
|
2004-12-15 07:25:19 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
|
|
|
CTLTYPE_INT, "do_loopback_cksum",
|
|
|
|
SYSCTL_DESCR("Perform TCP checksum on loopback"),
|
|
|
|
NULL, 0, &tcp_do_loopback_cksum, 0,
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, TCPCTL_LOOPBACKCKSUM,
|
|
|
|
CTL_EOL);
|
2005-03-09 08:07:19 +03:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT,
|
2005-03-11 09:16:15 +03:00
|
|
|
CTLTYPE_STRUCT, "pcblist",
|
2005-03-09 08:07:19 +03:00
|
|
|
SYSCTL_DESCR("TCP protocol control block list"),
|
|
|
|
sysctl_inpcblist, 0, &tcbtable, 0,
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, CTL_CREATE,
|
|
|
|
CTL_EOL);
|
2007-06-20 19:29:17 +04:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
|
|
|
CTLTYPE_INT, "keepinit",
|
|
|
|
SYSCTL_DESCR("Ticks before initial tcp connection times out"),
|
|
|
|
sysctl_tcp_keep, 0, &tcp_keepinit, 0,
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
|
2005-04-05 05:07:17 +04:00
|
|
|
|
2007-08-02 06:42:40 +04:00
|
|
|
/* TCP socket buffers auto-sizing nodes */
|
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
|
|
|
CTLTYPE_INT, "recvbuf_auto",
|
|
|
|
SYSCTL_DESCR("Enable automatic receive "
|
|
|
|
"buffer sizing (experimental)"),
|
|
|
|
NULL, 0, &tcp_do_autorcvbuf, 0,
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
|
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
|
|
|
CTLTYPE_INT, "recvbuf_inc",
|
|
|
|
SYSCTL_DESCR("Incrementor step size of "
|
|
|
|
"automatic receive buffer"),
|
|
|
|
NULL, 0, &tcp_autorcvbuf_inc, 0,
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
|
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
|
|
|
CTLTYPE_INT, "recvbuf_max",
|
|
|
|
SYSCTL_DESCR("Max size of automatic receive buffer"),
|
|
|
|
NULL, 0, &tcp_autorcvbuf_max, 0,
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
|
|
|
|
|
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
|
|
|
CTLTYPE_INT, "sendbuf_auto",
|
|
|
|
SYSCTL_DESCR("Enable automatic send "
|
|
|
|
"buffer sizing (experimental)"),
|
|
|
|
NULL, 0, &tcp_do_autosndbuf, 0,
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
|
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
|
|
|
CTLTYPE_INT, "sendbuf_inc",
|
|
|
|
SYSCTL_DESCR("Incrementor step size of "
|
|
|
|
"automatic send buffer"),
|
|
|
|
NULL, 0, &tcp_autosndbuf_inc, 0,
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
|
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
|
|
|
CTLTYPE_INT, "sendbuf_max",
|
|
|
|
SYSCTL_DESCR("Max size of automatic send buffer"),
|
|
|
|
NULL, 0, &tcp_autosndbuf_max, 0,
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
|
|
|
|
|
2006-10-19 18:14:34 +04:00
|
|
|
/* ECN subtree */
|
|
|
|
sysctl_createv(clog, 0, NULL, &ecn_node,
|
|
|
|
CTLFLAG_PERMANENT,
|
|
|
|
CTLTYPE_NODE, "ecn",
|
|
|
|
SYSCTL_DESCR("RFC3168 Explicit Congestion Notification"),
|
|
|
|
NULL, 0, NULL, 0,
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
|
|
|
|
sysctl_createv(clog, 0, &ecn_node, NULL,
|
2006-09-05 04:29:35 +04:00
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
|
|
|
CTLTYPE_INT, "enable",
|
|
|
|
SYSCTL_DESCR("Enable TCP Explicit Congestion "
|
|
|
|
"Notification"),
|
2006-10-19 18:14:34 +04:00
|
|
|
NULL, 0, &tcp_do_ecn, 0, CTL_CREATE, CTL_EOL);
|
|
|
|
sysctl_createv(clog, 0, &ecn_node, NULL,
|
2006-09-05 04:29:35 +04:00
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
|
|
|
CTLTYPE_INT, "maxretries",
|
|
|
|
SYSCTL_DESCR("Number of times to retry ECN setup "
|
|
|
|
"before disabling ECN on the connection"),
|
2006-10-19 18:14:34 +04:00
|
|
|
NULL, 0, &tcp_ecn_maxretries, 0, CTL_CREATE, CTL_EOL);
|
|
|
|
|
2014-10-18 12:33:23 +04:00
|
|
|
/* SACK gets its own little subtree. */
|
2005-04-05 05:07:17 +04:00
|
|
|
sysctl_createv(clog, 0, NULL, &sack_node,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
|
|
|
CTLTYPE_INT, "enable",
|
|
|
|
SYSCTL_DESCR("Enable RFC2018 Selective ACKnowledgement"),
|
|
|
|
NULL, 0, &tcp_do_sack, 0,
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL);
|
|
|
|
sysctl_createv(clog, 0, NULL, &sack_node,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
|
|
|
CTLTYPE_INT, "maxholes",
|
|
|
|
SYSCTL_DESCR("Maximum number of TCP SACK holes allowed per connection"),
|
|
|
|
NULL, 0, &tcp_sack_tp_maxholes, 0,
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL);
|
|
|
|
sysctl_createv(clog, 0, NULL, &sack_node,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
|
|
|
CTLTYPE_INT, "globalmaxholes",
|
|
|
|
SYSCTL_DESCR("Global maximum number of TCP SACK holes"),
|
|
|
|
NULL, 0, &tcp_sack_globalmaxholes, 0,
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL);
|
|
|
|
sysctl_createv(clog, 0, NULL, &sack_node,
|
|
|
|
CTLFLAG_PERMANENT,
|
|
|
|
CTLTYPE_INT, "globalholes",
|
|
|
|
SYSCTL_DESCR("Global number of TCP SACK holes"),
|
|
|
|
NULL, 0, &tcp_sack_globalholes, 0,
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL);
|
2005-08-05 13:21:25 +04:00
|
|
|
|
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT,
|
|
|
|
CTLTYPE_STRUCT, "stats",
|
|
|
|
SYSCTL_DESCR("TCP statistics"),
|
2008-04-12 09:58:22 +04:00
|
|
|
sysctl_net_inet_tcp_stats, 0, NULL, 0,
|
2005-08-05 13:21:25 +04:00
|
|
|
CTL_NET, pf, IPPROTO_TCP, TCPCTL_STATS,
|
|
|
|
CTL_EOL);
|
Reduces the resources demanded by TCP sessions in TIME_WAIT-state using
methods called Vestigial Time-Wait (VTW) and Maximum Segment Lifetime
Truncation (MSLT).
MSLT and VTW were contributed by Coyote Point Systems, Inc.
Even after a TCP session enters the TIME_WAIT state, its corresponding
socket and protocol control blocks (PCBs) stick around until the TCP
Maximum Segment Lifetime (MSL) expires. On a host whose workload
necessarily creates and closes down many TCP sockets, the sockets & PCBs
for TCP sessions in TIME_WAIT state amount to many megabytes of dead
weight in RAM.
Maximum Segment Lifetimes Truncation (MSLT) assigns each TCP session to
a class based on the nearness of the peer. Corresponding to each class
is an MSL, and a session uses the MSL of its class. The classes are
loopback (local host equals remote host), local (local host and remote
host are on the same link/subnet), and remote (local host and remote
host communicate via one or more gateways). Classes corresponding to
nearer peers have lower MSLs by default: 2 seconds for loopback, 10
seconds for local, 60 seconds for remote. Loopback and local sessions
expire more quickly when MSLT is used.
Vestigial Time-Wait (VTW) replaces a TIME_WAIT session's PCB/socket
dead weight with a compact representation of the session, called a
"vestigial PCB". VTW data structures are designed to be very fast and
memory-efficient: for fast insertion and lookup of vestigial PCBs,
the PCBs are stored in a hash table that is designed to minimize the
number of cacheline visits per lookup/insertion. The memory both
for vestigial PCBs and for elements of the PCB hashtable come from
fixed-size pools, and linked data structures exploit this to conserve
memory by representing references with a narrow index/offset from the
start of a pool instead of a pointer. When space for new vestigial PCBs
runs out, VTW makes room by discarding old vestigial PCBs, oldest first.
VTW cooperates with MSLT.
It may help to think of VTW as a "FIN cache" by analogy to the SYN
cache.
A 2.8-GHz Pentium 4 running a test workload that creates TIME_WAIT
sessions as fast as it can is approximately 17% idle when VTW is active
versus 0% idle when VTW is inactive. It has 103 megabytes more free RAM
when VTW is active (approximately 64k vestigial PCBs are created) than
when it is inactive.
2011-05-03 22:28:44 +04:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
|
|
|
CTLTYPE_INT, "local_by_rtt",
|
|
|
|
SYSCTL_DESCR("Use RTT estimator to decide which hosts "
|
|
|
|
"are local"),
|
|
|
|
NULL, 0, &tcp_rttlocal, 0,
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
|
2005-09-06 06:41:14 +04:00
|
|
|
#ifdef TCP_DEBUG
|
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT,
|
|
|
|
CTLTYPE_STRUCT, "debug",
|
|
|
|
SYSCTL_DESCR("TCP sockets debug information"),
|
|
|
|
NULL, 0, &tcp_debug, sizeof(tcp_debug),
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, TCPCTL_DEBUG,
|
|
|
|
CTL_EOL);
|
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT,
|
|
|
|
CTLTYPE_INT, "debx",
|
2005-09-06 06:57:04 +04:00
|
|
|
SYSCTL_DESCR("Number of TCP debug sockets messages"),
|
2005-09-06 06:41:14 +04:00
|
|
|
NULL, 0, &tcp_debx, sizeof(tcp_debx),
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, TCPCTL_DEBX,
|
|
|
|
CTL_EOL);
|
|
|
|
#endif
|
2007-06-26 03:35:12 +04:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
|
|
|
CTLTYPE_STRUCT, "drop",
|
|
|
|
SYSCTL_DESCR("TCP drop connection"),
|
|
|
|
sysctl_net_inet_tcp_drop, 0, NULL, 0,
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, TCPCTL_DROP, CTL_EOL);
|
2006-10-16 22:13:56 +04:00
|
|
|
sysctl_createv(clog, 0, NULL, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
|
|
|
CTLTYPE_INT, "iss_hash",
|
|
|
|
SYSCTL_DESCR("Enable RFC 1948 ISS by cryptographic "
|
|
|
|
"hash computation"),
|
|
|
|
NULL, 0, &tcp_do_rfc1948, sizeof(tcp_do_rfc1948),
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, CTL_CREATE,
|
|
|
|
CTL_EOL);
|
2005-09-06 06:41:14 +04:00
|
|
|
|
2006-10-19 15:40:51 +04:00
|
|
|
/* ABC subtree */
|
|
|
|
|
|
|
|
sysctl_createv(clog, 0, NULL, &abc_node,
|
|
|
|
CTLFLAG_PERMANENT, CTLTYPE_NODE, "abc",
|
|
|
|
SYSCTL_DESCR("RFC3465 Appropriate Byte Counting (ABC)"),
|
|
|
|
NULL, 0, NULL, 0,
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
|
|
|
|
sysctl_createv(clog, 0, &abc_node, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
|
|
|
CTLTYPE_INT, "enable",
|
|
|
|
SYSCTL_DESCR("Enable RFC3465 Appropriate Byte Counting"),
|
|
|
|
NULL, 0, &tcp_do_abc, 0, CTL_CREATE, CTL_EOL);
|
|
|
|
sysctl_createv(clog, 0, &abc_node, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
|
|
|
CTLTYPE_INT, "aggressive",
|
|
|
|
SYSCTL_DESCR("1: L=2*SMSS 0: L=1*SMSS"),
|
|
|
|
NULL, 0, &tcp_abc_aggressive, 0, CTL_CREATE, CTL_EOL);
|
Reduces the resources demanded by TCP sessions in TIME_WAIT-state using
methods called Vestigial Time-Wait (VTW) and Maximum Segment Lifetime
Truncation (MSLT).
MSLT and VTW were contributed by Coyote Point Systems, Inc.
Even after a TCP session enters the TIME_WAIT state, its corresponding
socket and protocol control blocks (PCBs) stick around until the TCP
Maximum Segment Lifetime (MSL) expires. On a host whose workload
necessarily creates and closes down many TCP sockets, the sockets & PCBs
for TCP sessions in TIME_WAIT state amount to many megabytes of dead
weight in RAM.
Maximum Segment Lifetimes Truncation (MSLT) assigns each TCP session to
a class based on the nearness of the peer. Corresponding to each class
is an MSL, and a session uses the MSL of its class. The classes are
loopback (local host equals remote host), local (local host and remote
host are on the same link/subnet), and remote (local host and remote
host communicate via one or more gateways). Classes corresponding to
nearer peers have lower MSLs by default: 2 seconds for loopback, 10
seconds for local, 60 seconds for remote. Loopback and local sessions
expire more quickly when MSLT is used.
Vestigial Time-Wait (VTW) replaces a TIME_WAIT session's PCB/socket
dead weight with a compact representation of the session, called a
"vestigial PCB". VTW data structures are designed to be very fast and
memory-efficient: for fast insertion and lookup of vestigial PCBs,
the PCBs are stored in a hash table that is designed to minimize the
number of cacheline visits per lookup/insertion. The memory both
for vestigial PCBs and for elements of the PCB hashtable come from
fixed-size pools, and linked data structures exploit this to conserve
memory by representing references with a narrow index/offset from the
start of a pool instead of a pointer. When space for new vestigial PCBs
runs out, VTW makes room by discarding old vestigial PCBs, oldest first.
VTW cooperates with MSLT.
It may help to think of VTW as a "FIN cache" by analogy to the SYN
cache.
A 2.8-GHz Pentium 4 running a test workload that creates TIME_WAIT
sessions as fast as it can is approximately 17% idle when VTW is active
versus 0% idle when VTW is inactive. It has 103 megabytes more free RAM
when VTW is active (approximately 64k vestigial PCBs are created) than
when it is inactive.
2011-05-03 22:28:44 +04:00
|
|
|
|
|
|
|
/* MSL tuning subtree */
|
|
|
|
|
|
|
|
sysctl_createv(clog, 0, NULL, &mslt_node,
|
|
|
|
CTLFLAG_PERMANENT, CTLTYPE_NODE, "mslt",
|
|
|
|
SYSCTL_DESCR("MSL Tuning for TIME_WAIT truncation"),
|
|
|
|
NULL, 0, NULL, 0,
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
|
|
|
|
sysctl_createv(clog, 0, &mslt_node, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
|
|
|
CTLTYPE_INT, "enable",
|
|
|
|
SYSCTL_DESCR("Enable TIME_WAIT truncation"),
|
|
|
|
NULL, 0, &tcp_msl_enable, 0, CTL_CREATE, CTL_EOL);
|
|
|
|
sysctl_createv(clog, 0, &mslt_node, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
|
|
|
CTLTYPE_INT, "loopback",
|
|
|
|
SYSCTL_DESCR("MSL value to use for loopback connections"),
|
|
|
|
NULL, 0, &tcp_msl_loop, 0, CTL_CREATE, CTL_EOL);
|
|
|
|
sysctl_createv(clog, 0, &mslt_node, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
|
|
|
CTLTYPE_INT, "local",
|
|
|
|
SYSCTL_DESCR("MSL value to use for local connections"),
|
|
|
|
NULL, 0, &tcp_msl_local, 0, CTL_CREATE, CTL_EOL);
|
|
|
|
sysctl_createv(clog, 0, &mslt_node, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
|
|
|
CTLTYPE_INT, "remote",
|
|
|
|
SYSCTL_DESCR("MSL value to use for remote connections"),
|
|
|
|
NULL, 0, &tcp_msl_remote, 0, CTL_CREATE, CTL_EOL);
|
|
|
|
sysctl_createv(clog, 0, &mslt_node, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
|
|
|
CTLTYPE_INT, "remote_threshold",
|
|
|
|
SYSCTL_DESCR("RTT estimate value to promote local to remote"),
|
|
|
|
NULL, 0, &tcp_msl_remote_threshold, 0, CTL_CREATE, CTL_EOL);
|
|
|
|
|
|
|
|
/* vestigial TIME_WAIT tuning subtree */
|
|
|
|
|
|
|
|
sysctl_createv(clog, 0, NULL, &vtw_node,
|
|
|
|
CTLFLAG_PERMANENT, CTLTYPE_NODE, "vtw",
|
|
|
|
SYSCTL_DESCR("Tuning for Vestigial TIME_WAIT"),
|
|
|
|
NULL, 0, NULL, 0,
|
|
|
|
CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
|
|
|
|
sysctl_createv(clog, 0, &vtw_node, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
|
|
|
|
CTLTYPE_INT, "enable",
|
|
|
|
SYSCTL_DESCR("Enable Vestigial TIME_WAIT"),
|
2011-06-06 23:15:43 +04:00
|
|
|
sysctl_tcp_vtw_enable, 0,
|
Reduces the resources demanded by TCP sessions in TIME_WAIT-state using
methods called Vestigial Time-Wait (VTW) and Maximum Segment Lifetime
Truncation (MSLT).
MSLT and VTW were contributed by Coyote Point Systems, Inc.
Even after a TCP session enters the TIME_WAIT state, its corresponding
socket and protocol control blocks (PCBs) stick around until the TCP
Maximum Segment Lifetime (MSL) expires. On a host whose workload
necessarily creates and closes down many TCP sockets, the sockets & PCBs
for TCP sessions in TIME_WAIT state amount to many megabytes of dead
weight in RAM.
Maximum Segment Lifetimes Truncation (MSLT) assigns each TCP session to
a class based on the nearness of the peer. Corresponding to each class
is an MSL, and a session uses the MSL of its class. The classes are
loopback (local host equals remote host), local (local host and remote
host are on the same link/subnet), and remote (local host and remote
host communicate via one or more gateways). Classes corresponding to
nearer peers have lower MSLs by default: 2 seconds for loopback, 10
seconds for local, 60 seconds for remote. Loopback and local sessions
expire more quickly when MSLT is used.
Vestigial Time-Wait (VTW) replaces a TIME_WAIT session's PCB/socket
dead weight with a compact representation of the session, called a
"vestigial PCB". VTW data structures are designed to be very fast and
memory-efficient: for fast insertion and lookup of vestigial PCBs,
the PCBs are stored in a hash table that is designed to minimize the
number of cacheline visits per lookup/insertion. The memory both
for vestigial PCBs and for elements of the PCB hashtable come from
fixed-size pools, and linked data structures exploit this to conserve
memory by representing references with a narrow index/offset from the
start of a pool instead of a pointer. When space for new vestigial PCBs
runs out, VTW makes room by discarding old vestigial PCBs, oldest first.
VTW cooperates with MSLT.
It may help to think of VTW as a "FIN cache" by analogy to the SYN
cache.
A 2.8-GHz Pentium 4 running a test workload that creates TIME_WAIT
sessions as fast as it can is approximately 17% idle when VTW is active
versus 0% idle when VTW is inactive. It has 103 megabytes more free RAM
when VTW is active (approximately 64k vestigial PCBs are created) than
when it is inactive.
2011-05-03 22:28:44 +04:00
|
|
|
(pf == AF_INET) ? &tcp4_vtw_enable : &tcp6_vtw_enable,
|
|
|
|
0, CTL_CREATE, CTL_EOL);
|
|
|
|
sysctl_createv(clog, 0, &vtw_node, NULL,
|
|
|
|
CTLFLAG_PERMANENT|CTLFLAG_READONLY,
|
|
|
|
CTLTYPE_INT, "entries",
|
|
|
|
SYSCTL_DESCR("Maximum number of vestigial TIME_WAIT entries"),
|
|
|
|
NULL, 0, &tcp_vtw_entries, 0, CTL_CREATE, CTL_EOL);
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
}
|
|
|
|
|
2009-09-16 19:23:04 +04:00
|
|
|
void
|
|
|
|
tcp_usrreq_init(void)
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
{
|
|
|
|
|
2009-09-16 19:23:04 +04:00
|
|
|
#ifdef INET
|
|
|
|
sysctl_net_inet_tcp_setup2(NULL, PF_INET, "inet", "tcp");
|
|
|
|
#endif
|
Dynamic sysctl.
Gone are the old kern_sysctl(), cpu_sysctl(), hw_sysctl(),
vfs_sysctl(), etc, routines, along with sysctl_int() et al. Now all
nodes are registered with the tree, and nodes can be added (or
removed) easily, and I/O to and from the tree is handled generically.
Since the nodes are registered with the tree, the mapping from name to
number (and back again) can now be discovered, instead of having to be
hard coded. Adding new nodes to the tree is likewise much simpler --
the new infrastructure handles almost all the work for simple types,
and just about anything else can be done with a small helper function.
All existing nodes are where they were before (numerically speaking),
so all existing consumers of sysctl information should notice no
difference.
PS - I'm sorry, but there's a distinct lack of documentation at the
moment. I'm working on sysctl(3/8/9) right now, and I promise to
watch out for buses.
2003-12-04 22:38:21 +03:00
|
|
|
#ifdef INET6
|
2009-09-16 19:23:04 +04:00
|
|
|
sysctl_net_inet_tcp_setup2(NULL, PF_INET6, "inet6", "tcp6");
|
|
|
|
#endif
|
2003-04-20 00:58:35 +04:00
|
|
|
}
|
2014-05-18 18:46:15 +04:00
|
|
|
|
2014-05-20 23:04:00 +04:00
|
|
|
PR_WRAP_USRREQS(tcp)
|
|
|
|
#define tcp_attach tcp_attach_wrapper
|
|
|
|
#define tcp_detach tcp_detach_wrapper
|
2014-07-09 18:41:42 +04:00
|
|
|
#define tcp_accept tcp_accept_wrapper
|
2014-07-24 19:12:03 +04:00
|
|
|
#define tcp_bind tcp_bind_wrapper
|
|
|
|
#define tcp_listen tcp_listen_wrapper
|
2014-07-30 14:04:25 +04:00
|
|
|
#define tcp_connect tcp_connect_wrapper
|
2014-08-09 09:33:00 +04:00
|
|
|
#define tcp_connect2 tcp_connect2_wrapper
|
split PRU_DISCONNECT, PRU_SHUTDOWN and PRU_ABORT function out of
pr_generic() usrreq switches and put into separate functions
xxx_disconnect(struct socket *)
xxx_shutdown(struct socket *)
xxx_abort(struct socket *)
- always KASSERT(solocked(so)) even if not implemented
- replace calls to pr_generic() with req =
PRU_{DISCONNECT,SHUTDOWN,ABORT}
with calls to pr_{disconnect,shutdown,abort}() respectively
rename existing internal functions used to implement above functionality
to permit use of the names for xxx_{disconnect,shutdown,abort}().
- {l2cap,sco,rfcomm}_disconnect() ->
{l2cap,sco,rfcomm}_disconnect_pcb()
- {unp,rip,tcp}_disconnect() -> {unp,rip,tcp}_disconnect1()
- unp_shutdown() -> unp_shutdown1()
patch reviewed by rmind
2014-07-31 07:39:35 +04:00
|
|
|
#define tcp_disconnect tcp_disconnect_wrapper
|
|
|
|
#define tcp_shutdown tcp_shutdown_wrapper
|
|
|
|
#define tcp_abort tcp_abort_wrapper
|
2014-06-22 12:10:18 +04:00
|
|
|
#define tcp_ioctl tcp_ioctl_wrapper
|
2014-07-06 07:33:33 +04:00
|
|
|
#define tcp_stat tcp_stat_wrapper
|
* split PRU_PEERADDR and PRU_SOCKADDR function out of pr_generic()
usrreq switches and put into separate functions
xxx_{peer,sock}addr(struct socket *, struct mbuf *).
- KASSERT(solocked(so)) always in new functions even if request
is not implemented
- KASSERT(pcb != NULL) and KASSERT(nam) if the request is
implemented and not for tcp.
* for tcp roll #ifdef KPROF and #ifdef DEBUG code from tcp_usrreq() into
easier to cut & paste functions tcp_debug_capture() and
tcp_debug_trace()
- functions provided by rmind
- remaining use of PRU_{PEER,SOCK}ADDR #define to be removed in a
future commit.
* rename netbt functions to permit consistency of pru function names
(as has been done with other requests already split out).
- l2cap_{peer,sock}addr() -> l2cap_{peer,sock}_addr_pcb()
- rfcomm_{peer,sock}addr() -> rfcomm_{peer,sock}_addr_pcb()
- sco_{peer,sock}addr() -> sco_{peer,sock}_addr_pcb()
* split/refactor do_sys_getsockname(lwp, fd, which, nam) into
two functions do_sys_get{peer,sock}name(fd, nam).
- move PRU_PEERADDR handling into do_sys_getpeername() from
do_sys_getsockname()
- have svr4_stream directly call do_sys_get{sock,peer}name()
respectively instead of providing `which' & fix a DPRINTF string
that incorrectly wrote "getpeername" when it meant "getsockname"
- fix sys_getpeername() and sys_getsockname() to call
do_sys_get{sock,peer}name() without `which' and `lwp' & adjust
comments
- bump kernel version for removal of lwp & which parameters from
do_sys_getsockname()
note: future cleanup to remove struct mbuf * abuse in
xxx_{peer,sock}name()
still to come, not done in this commit since it is easier to do post
split.
patch reviewed by rmind
welcome to 6.99.47
2014-07-09 08:54:03 +04:00
|
|
|
#define tcp_peeraddr tcp_peeraddr_wrapper
|
|
|
|
#define tcp_sockaddr tcp_sockaddr_wrapper
|
2014-08-08 07:05:44 +04:00
|
|
|
#define tcp_rcvd tcp_rcvd_wrapper
|
2014-07-23 17:17:18 +04:00
|
|
|
#define tcp_recvoob tcp_recvoob_wrapper
|
2014-08-05 11:55:31 +04:00
|
|
|
#define tcp_send tcp_send_wrapper
|
2014-07-23 17:17:18 +04:00
|
|
|
#define tcp_sendoob tcp_sendoob_wrapper
|
2014-08-09 09:33:00 +04:00
|
|
|
#define tcp_purgeif tcp_purgeif_wrapper
|
2014-05-18 18:46:15 +04:00
|
|
|
|
|
|
|
const struct pr_usrreqs tcp_usrreqs = {
|
2014-05-19 06:51:24 +04:00
|
|
|
.pr_attach = tcp_attach,
|
|
|
|
.pr_detach = tcp_detach,
|
2014-07-09 18:41:42 +04:00
|
|
|
.pr_accept = tcp_accept,
|
2014-07-24 19:12:03 +04:00
|
|
|
.pr_bind = tcp_bind,
|
|
|
|
.pr_listen = tcp_listen,
|
2014-07-30 14:04:25 +04:00
|
|
|
.pr_connect = tcp_connect,
|
2014-08-09 09:33:00 +04:00
|
|
|
.pr_connect2 = tcp_connect2,
|
split PRU_DISCONNECT, PRU_SHUTDOWN and PRU_ABORT function out of
pr_generic() usrreq switches and put into separate functions
xxx_disconnect(struct socket *)
xxx_shutdown(struct socket *)
xxx_abort(struct socket *)
- always KASSERT(solocked(so)) even if not implemented
- replace calls to pr_generic() with req =
PRU_{DISCONNECT,SHUTDOWN,ABORT}
with calls to pr_{disconnect,shutdown,abort}() respectively
rename existing internal functions used to implement above functionality
to permit use of the names for xxx_{disconnect,shutdown,abort}().
- {l2cap,sco,rfcomm}_disconnect() ->
{l2cap,sco,rfcomm}_disconnect_pcb()
- {unp,rip,tcp}_disconnect() -> {unp,rip,tcp}_disconnect1()
- unp_shutdown() -> unp_shutdown1()
patch reviewed by rmind
2014-07-31 07:39:35 +04:00
|
|
|
.pr_disconnect = tcp_disconnect,
|
|
|
|
.pr_shutdown = tcp_shutdown,
|
|
|
|
.pr_abort = tcp_abort,
|
2014-06-22 12:10:18 +04:00
|
|
|
.pr_ioctl = tcp_ioctl,
|
2014-07-06 07:33:33 +04:00
|
|
|
.pr_stat = tcp_stat,
|
* split PRU_PEERADDR and PRU_SOCKADDR function out of pr_generic()
usrreq switches and put into separate functions
xxx_{peer,sock}addr(struct socket *, struct mbuf *).
- KASSERT(solocked(so)) always in new functions even if request
is not implemented
- KASSERT(pcb != NULL) and KASSERT(nam) if the request is
implemented and not for tcp.
* for tcp roll #ifdef KPROF and #ifdef DEBUG code from tcp_usrreq() into
easier to cut & paste functions tcp_debug_capture() and
tcp_debug_trace()
- functions provided by rmind
- remaining use of PRU_{PEER,SOCK}ADDR #define to be removed in a
future commit.
* rename netbt functions to permit consistency of pru function names
(as has been done with other requests already split out).
- l2cap_{peer,sock}addr() -> l2cap_{peer,sock}_addr_pcb()
- rfcomm_{peer,sock}addr() -> rfcomm_{peer,sock}_addr_pcb()
- sco_{peer,sock}addr() -> sco_{peer,sock}_addr_pcb()
* split/refactor do_sys_getsockname(lwp, fd, which, nam) into
two functions do_sys_get{peer,sock}name(fd, nam).
- move PRU_PEERADDR handling into do_sys_getpeername() from
do_sys_getsockname()
- have svr4_stream directly call do_sys_get{sock,peer}name()
respectively instead of providing `which' & fix a DPRINTF string
that incorrectly wrote "getpeername" when it meant "getsockname"
- fix sys_getpeername() and sys_getsockname() to call
do_sys_get{sock,peer}name() without `which' and `lwp' & adjust
comments
- bump kernel version for removal of lwp & which parameters from
do_sys_getsockname()
note: future cleanup to remove struct mbuf * abuse in
xxx_{peer,sock}name()
still to come, not done in this commit since it is easier to do post
split.
patch reviewed by rmind
welcome to 6.99.47
2014-07-09 08:54:03 +04:00
|
|
|
.pr_peeraddr = tcp_peeraddr,
|
|
|
|
.pr_sockaddr = tcp_sockaddr,
|
2014-08-08 07:05:44 +04:00
|
|
|
.pr_rcvd = tcp_rcvd,
|
2014-07-23 17:17:18 +04:00
|
|
|
.pr_recvoob = tcp_recvoob,
|
2014-08-05 11:55:31 +04:00
|
|
|
.pr_send = tcp_send,
|
2014-07-23 17:17:18 +04:00
|
|
|
.pr_sendoob = tcp_sendoob,
|
2014-08-09 09:33:00 +04:00
|
|
|
.pr_purgeif = tcp_purgeif,
|
2014-05-18 18:46:15 +04:00
|
|
|
};
|