Port over the TCP_INFO socket option from FreeBSD, originally from
the Linux 2.6 TCP API. This permits the caller to query certain information about a TCP connection, and is used by pkgsrc's net/iperf3 test program if available. This extends struct tcbcb with three fields to count retransmits, out-of-sequence receives and zero window announcements, and will therefore warrant a kernel revision bump (done separately).
This commit is contained in:
parent
cba38714c3
commit
1d14d02249
|
@ -1,4 +1,4 @@
|
|||
.\" $NetBSD: tcp.4,v 1.29 2013/10/10 12:28:10 christos Exp $
|
||||
.\" $NetBSD: tcp.4,v 1.30 2015/02/14 12:57:52 he Exp $
|
||||
.\" $FreeBSD: tcp.4,v 1.11.2.16 2004/02/16 22:21:47 bms Exp $
|
||||
.\"
|
||||
.\" Copyright (c) 1983, 1991, 1993
|
||||
|
@ -243,6 +243,23 @@ option value is inherited from the listening socket.
|
|||
This option takes an
|
||||
.Vt "unsigned int"
|
||||
value, with a value greater than 0.
|
||||
.It Dv TCP_INFO
|
||||
Information about a socket's underlying TCP session may be retreived
|
||||
by passing the read-only option
|
||||
.Dv TPC_INFO
|
||||
to
|
||||
.Xr getsockopt 2 .
|
||||
It accepts a single argument: a pointer to an instance of
|
||||
.Vt "struct tcp_info" .
|
||||
.Pp
|
||||
This API is subject to change; consult the source to determine
|
||||
which fields are currently filled out by this option.
|
||||
.Nx
|
||||
specific additions include
|
||||
send window size,
|
||||
receive window size,
|
||||
and
|
||||
bandwidth-controlled window space.
|
||||
.\" range of 0 to N (where N is the
|
||||
.\" .Xr sysctl 8
|
||||
.\" variable
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: tcp.h,v 1.30 2012/01/07 20:20:22 christos Exp $ */
|
||||
/* $NetBSD: tcp.h,v 1.31 2015/02/14 12:57:53 he Exp $ */
|
||||
|
||||
/*
|
||||
* Copyright (c) 1982, 1986, 1993
|
||||
|
@ -127,7 +127,80 @@ struct tcphdr {
|
|||
#ifdef notyet
|
||||
#define TCP_NOOPT 8 /* reserved for FreeBSD compat */
|
||||
#endif
|
||||
#define TCP_INFO 9 /* retrieve tcp_info structure */
|
||||
#define TCP_MD5SIG 0x10 /* use MD5 digests (RFC2385) */
|
||||
#define TCP_CONGCTL 0x20 /* selected congestion control */
|
||||
|
||||
#define TCPI_OPT_TIMESTAMPS 0x01
|
||||
#define TCPI_OPT_SACK 0x02
|
||||
#define TCPI_OPT_WSCALE 0x04
|
||||
#define TCPI_OPT_ECN 0x08
|
||||
#define TCPI_OPT_TOE 0x10
|
||||
|
||||
/*
|
||||
* The TCP_INFO socket option comes from the Linux 2.6 TCP API, and permits
|
||||
* the caller to query certain information about the state of a TCP
|
||||
* connection. We provide an overlapping set of fields with the Linux
|
||||
* implementation, but since this is a fixed size structure, room has been
|
||||
* left for growth. In order to maximize potential future compatibility with
|
||||
* the Linux API, the same variable names and order have been adopted, and
|
||||
* padding left to make room for omitted fields in case they are added later.
|
||||
*
|
||||
* XXX: This is currently an unstable ABI/API, in that it is expected to
|
||||
* change.
|
||||
*/
|
||||
struct tcp_info {
|
||||
uint8_t tcpi_state; /* TCP FSM state. */
|
||||
uint8_t __tcpi_ca_state;
|
||||
uint8_t __tcpi_retransmits;
|
||||
uint8_t __tcpi_probes;
|
||||
uint8_t __tcpi_backoff;
|
||||
uint8_t tcpi_options; /* Options enabled on conn. */
|
||||
uint8_t tcpi_snd_wscale:4, /* RFC1323 send shift value. */
|
||||
tcpi_rcv_wscale:4; /* RFC1323 recv shift value. */
|
||||
|
||||
uint32_t tcpi_rto; /* Retransmission timeout (usec). */
|
||||
uint32_t __tcpi_ato;
|
||||
uint32_t tcpi_snd_mss; /* Max segment size for send. */
|
||||
uint32_t tcpi_rcv_mss; /* Max segment size for receive. */
|
||||
|
||||
uint32_t __tcpi_unacked;
|
||||
uint32_t __tcpi_sacked;
|
||||
uint32_t __tcpi_lost;
|
||||
uint32_t __tcpi_retrans;
|
||||
uint32_t __tcpi_fackets;
|
||||
|
||||
/* Times; measurements in usecs. */
|
||||
uint32_t __tcpi_last_data_sent;
|
||||
uint32_t __tcpi_last_ack_sent; /* Also unimpl. on Linux? */
|
||||
uint32_t tcpi_last_data_recv; /* Time since last recv data. */
|
||||
uint32_t __tcpi_last_ack_recv;
|
||||
|
||||
/* Metrics; variable units. */
|
||||
uint32_t __tcpi_pmtu;
|
||||
uint32_t __tcpi_rcv_ssthresh;
|
||||
uint32_t tcpi_rtt; /* Smoothed RTT in usecs. */
|
||||
uint32_t tcpi_rttvar; /* RTT variance in usecs. */
|
||||
uint32_t tcpi_snd_ssthresh; /* Slow start threshold. */
|
||||
uint32_t tcpi_snd_cwnd; /* Send congestion window. */
|
||||
uint32_t __tcpi_advmss;
|
||||
uint32_t __tcpi_reordering;
|
||||
|
||||
uint32_t __tcpi_rcv_rtt;
|
||||
uint32_t tcpi_rcv_space; /* Advertised recv window. */
|
||||
|
||||
/* FreeBSD/NetBSD extensions to tcp_info. */
|
||||
uint32_t tcpi_snd_wnd; /* Advertised send window. */
|
||||
uint32_t tcpi_snd_bwnd; /* No longer used. */
|
||||
uint32_t tcpi_snd_nxt; /* Next egress seqno */
|
||||
uint32_t tcpi_rcv_nxt; /* Next ingress seqno */
|
||||
uint32_t tcpi_toe_tid; /* HWTID for TOE endpoints */
|
||||
uint32_t tcpi_snd_rexmitpack; /* Retransmitted packets */
|
||||
uint32_t tcpi_rcv_ooopack; /* Out-of-order packets */
|
||||
uint32_t tcpi_snd_zerowin; /* Zero-sized windows sent */
|
||||
|
||||
/* Padding to grow without breaking ABI. */
|
||||
uint32_t __tcpi_pad[26]; /* Padding. */
|
||||
};
|
||||
|
||||
#endif /* !_NETINET_TCP_H_ */
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: tcp_input.c,v 1.335 2014/12/02 20:25:47 christos Exp $ */
|
||||
/* $NetBSD: tcp_input.c,v 1.336 2015/02/14 12:57:53 he Exp $ */
|
||||
|
||||
/*
|
||||
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
|
||||
|
@ -148,7 +148,7 @@
|
|||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(0, "$NetBSD: tcp_input.c,v 1.335 2014/12/02 20:25:47 christos Exp $");
|
||||
__KERNEL_RCSID(0, "$NetBSD: tcp_input.c,v 1.336 2015/02/14 12:57:53 he Exp $");
|
||||
|
||||
#include "opt_inet.h"
|
||||
#include "opt_ipsec.h"
|
||||
|
@ -738,6 +738,7 @@ tcp_reass(struct tcpcb *tp, const struct tcphdr *th, struct mbuf *m, int *tlen)
|
|||
/*
|
||||
* Update the counters.
|
||||
*/
|
||||
tp->t_rcvoopack++;
|
||||
tcps = TCP_STAT_GETREF();
|
||||
tcps[TCP_STAT_RCVOOPACK]++;
|
||||
tcps[TCP_STAT_RCVOOBYTE] += rcvoobyte;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: tcp_output.c,v 1.179 2014/11/10 18:52:51 maxv Exp $ */
|
||||
/* $NetBSD: tcp_output.c,v 1.180 2015/02/14 12:57:53 he Exp $ */
|
||||
|
||||
/*
|
||||
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
|
||||
|
@ -135,7 +135,7 @@
|
|||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(0, "$NetBSD: tcp_output.c,v 1.179 2014/11/10 18:52:51 maxv Exp $");
|
||||
__KERNEL_RCSID(0, "$NetBSD: tcp_output.c,v 1.180 2015/02/14 12:57:53 he Exp $");
|
||||
|
||||
#include "opt_inet.h"
|
||||
#include "opt_ipsec.h"
|
||||
|
@ -439,6 +439,7 @@ tcp_build_datapkt(struct tcpcb *tp, struct socket *so, int off,
|
|||
if (tp->t_force && len == 1)
|
||||
tcps[TCP_STAT_SNDPROBE]++;
|
||||
else if (SEQ_LT(tp->snd_nxt, tp->snd_max)) {
|
||||
tp->t_sndrexmitpack++;
|
||||
tcps[TCP_STAT_SNDREXMITPACK]++;
|
||||
tcps[TCP_STAT_SNDREXMITBYTE] += len;
|
||||
} else {
|
||||
|
@ -1401,6 +1402,9 @@ send:
|
|||
if (win < (long)(int32_t)(tp->rcv_adv - tp->rcv_nxt))
|
||||
win = (long)(int32_t)(tp->rcv_adv - tp->rcv_nxt);
|
||||
th->th_win = htons((u_int16_t) (win>>tp->rcv_scale));
|
||||
if (th->th_win == 0) {
|
||||
tp->t_sndzerowin++;
|
||||
}
|
||||
if (SEQ_GT(tp->snd_up, tp->snd_nxt)) {
|
||||
u_int32_t urp = tp->snd_up - tp->snd_nxt;
|
||||
if (urp > IP_MAXPACKET)
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: tcp_subr.c,v 1.257 2014/11/10 18:52:51 maxv Exp $ */
|
||||
/* $NetBSD: tcp_subr.c,v 1.258 2015/02/14 12:57:53 he Exp $ */
|
||||
|
||||
/*
|
||||
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
|
||||
|
@ -91,7 +91,7 @@
|
|||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(0, "$NetBSD: tcp_subr.c,v 1.257 2014/11/10 18:52:51 maxv Exp $");
|
||||
__KERNEL_RCSID(0, "$NetBSD: tcp_subr.c,v 1.258 2015/02/14 12:57:53 he Exp $");
|
||||
|
||||
#include "opt_inet.h"
|
||||
#include "opt_ipsec.h"
|
||||
|
@ -980,6 +980,9 @@ static struct tcpcb tcpcb_template = {
|
|||
|
||||
.t_partialacks = -1,
|
||||
.t_bytes_acked = 0,
|
||||
.t_sndrexmitpack = 0,
|
||||
.t_rcvoopack = 0,
|
||||
.t_sndzerowin = 0,
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: tcp_usrreq.c,v 1.202 2014/11/10 18:52:51 maxv Exp $ */
|
||||
/* $NetBSD: tcp_usrreq.c,v 1.203 2015/02/14 12:57:53 he Exp $ */
|
||||
|
||||
/*
|
||||
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
|
||||
|
@ -99,7 +99,7 @@
|
|||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(0, "$NetBSD: tcp_usrreq.c,v 1.202 2014/11/10 18:52:51 maxv Exp $");
|
||||
__KERNEL_RCSID(0, "$NetBSD: tcp_usrreq.c,v 1.203 2015/02/14 12:57:53 he Exp $");
|
||||
|
||||
#include "opt_inet.h"
|
||||
#include "opt_ipsec.h"
|
||||
|
@ -119,6 +119,7 @@ __KERNEL_RCSID(0, "$NetBSD: tcp_usrreq.c,v 1.202 2014/11/10 18:52:51 maxv Exp $"
|
|||
#include <sys/domain.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/kauth.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/uidinfo.h>
|
||||
|
||||
#include <net/if.h>
|
||||
|
@ -271,6 +272,65 @@ change_keepalive(struct socket *so, struct tcpcb *tp)
|
|||
TCP_TIMER_ARM(tp, TCPT_2MSL, tp->t_maxidle);
|
||||
}
|
||||
|
||||
/*
|
||||
* Export TCP internal state information via a struct tcp_info, based on the
|
||||
* Linux 2.6 API. Not ABI compatible as our constants are mapped differently
|
||||
* (TCP state machine, etc). We export all information using FreeBSD-native
|
||||
* constants -- for example, the numeric values for tcpi_state will differ
|
||||
* from Linux.
|
||||
*/
|
||||
static void
|
||||
tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti)
|
||||
{
|
||||
|
||||
bzero(ti, sizeof(*ti));
|
||||
|
||||
ti->tcpi_state = tp->t_state;
|
||||
if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP))
|
||||
ti->tcpi_options |= TCPI_OPT_TIMESTAMPS;
|
||||
if (tp->t_flags & TF_SACK_PERMIT)
|
||||
ti->tcpi_options |= TCPI_OPT_SACK;
|
||||
if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) {
|
||||
ti->tcpi_options |= TCPI_OPT_WSCALE;
|
||||
ti->tcpi_snd_wscale = tp->snd_scale;
|
||||
ti->tcpi_rcv_wscale = tp->rcv_scale;
|
||||
}
|
||||
if (tp->t_flags & TF_ECN_PERMIT) {
|
||||
ti->tcpi_options |= TCPI_OPT_ECN;
|
||||
}
|
||||
|
||||
ti->tcpi_rto = tp->t_rxtcur * tick;
|
||||
ti->tcpi_last_data_recv = (long)(hardclock_ticks -
|
||||
(int)tp->t_rcvtime) * tick;
|
||||
ti->tcpi_rtt = ((u_int64_t)tp->t_srtt * tick) >> TCP_RTT_SHIFT;
|
||||
ti->tcpi_rttvar = ((u_int64_t)tp->t_rttvar * tick) >> TCP_RTTVAR_SHIFT;
|
||||
|
||||
ti->tcpi_snd_ssthresh = tp->snd_ssthresh;
|
||||
/* Linux API wants these in # of segments, apparently */
|
||||
ti->tcpi_snd_cwnd = tp->snd_cwnd / tp->t_segsz;
|
||||
ti->tcpi_snd_wnd = tp->snd_wnd / tp->t_segsz;
|
||||
|
||||
/*
|
||||
* FreeBSD-specific extension fields for tcp_info.
|
||||
*/
|
||||
ti->tcpi_rcv_space = tp->rcv_wnd;
|
||||
ti->tcpi_rcv_nxt = tp->rcv_nxt;
|
||||
ti->tcpi_snd_bwnd = 0; /* Unused, kept for compat. */
|
||||
ti->tcpi_snd_nxt = tp->snd_nxt;
|
||||
ti->tcpi_snd_mss = tp->t_segsz;
|
||||
ti->tcpi_rcv_mss = tp->t_segsz;
|
||||
#ifdef TF_TOE
|
||||
if (tp->t_flags & TF_TOE)
|
||||
ti->tcpi_options |= TCPI_OPT_TOE;
|
||||
#endif
|
||||
/* From the redundant department of redundancies... */
|
||||
ti->__tcpi_retransmits = ti->__tcpi_retrans =
|
||||
ti->tcpi_snd_rexmitpack = tp->t_sndrexmitpack;
|
||||
|
||||
ti->tcpi_rcv_ooopack = tp->t_rcvoopack;
|
||||
ti->tcpi_snd_zerowin = tp->t_sndzerowin;
|
||||
}
|
||||
|
||||
int
|
||||
tcp_ctloutput(int op, struct socket *so, struct sockopt *sopt)
|
||||
{
|
||||
|
@ -280,6 +340,7 @@ tcp_ctloutput(int op, struct socket *so, struct sockopt *sopt)
|
|||
struct in6pcb *in6p;
|
||||
#endif
|
||||
struct tcpcb *tp;
|
||||
struct tcp_info ti;
|
||||
u_int ui;
|
||||
int family; /* family of the socket */
|
||||
int level, optname, optval;
|
||||
|
@ -450,6 +511,10 @@ tcp_ctloutput(int op, struct socket *so, struct sockopt *sopt)
|
|||
optval = tp->t_peermss;
|
||||
error = sockopt_set(sopt, &optval, sizeof(optval));
|
||||
break;
|
||||
case TCP_INFO:
|
||||
tcp_fill_info(tp, &ti);
|
||||
error = sockopt_set(sopt, &ti, sizeof ti);
|
||||
break;
|
||||
#ifdef notyet
|
||||
case TCP_CONGCTL:
|
||||
break;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: tcp_var.h,v 1.175 2014/07/31 03:39:35 rtr Exp $ */
|
||||
/* $NetBSD: tcp_var.h,v 1.176 2015/02/14 12:57:53 he Exp $ */
|
||||
|
||||
/*
|
||||
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
|
||||
|
@ -364,6 +364,11 @@ struct tcpcb {
|
|||
u_int t_maxidle; /* t_keepcnt * t_keepintvl */
|
||||
|
||||
u_int t_msl; /* MSL to use for this connexion */
|
||||
|
||||
/* maintain a few stats per connection: */
|
||||
int t_rcvoopack; /* out-of-order packets received */
|
||||
int t_sndrexmitpack; /* retransmit packets sent */
|
||||
int t_sndzerowin; /* zero-window updates sent */
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
Loading…
Reference in New Issue