Port over the TCP_INFO socket option from FreeBSD, originally from

the Linux 2.6 TCP API.  This permits the caller to query certain information
about a TCP connection, and is used by pkgsrc's net/iperf3 test program
if available.

This extends struct tcbcb with three fields to count retransmits,
out-of-sequence receives and zero window announcements, and will
therefore warrant a kernel revision bump (done separately).
This commit is contained in:
he 2015-02-14 12:57:52 +00:00
parent cba38714c3
commit 1d14d02249
7 changed files with 179 additions and 11 deletions

View File

@ -1,4 +1,4 @@
.\" $NetBSD: tcp.4,v 1.29 2013/10/10 12:28:10 christos Exp $
.\" $NetBSD: tcp.4,v 1.30 2015/02/14 12:57:52 he Exp $
.\" $FreeBSD: tcp.4,v 1.11.2.16 2004/02/16 22:21:47 bms Exp $
.\"
.\" Copyright (c) 1983, 1991, 1993
@ -243,6 +243,23 @@ option value is inherited from the listening socket.
This option takes an
.Vt "unsigned int"
value, with a value greater than 0.
.It Dv TCP_INFO
Information about a socket's underlying TCP session may be retreived
by passing the read-only option
.Dv TPC_INFO
to
.Xr getsockopt 2 .
It accepts a single argument: a pointer to an instance of
.Vt "struct tcp_info" .
.Pp
This API is subject to change; consult the source to determine
which fields are currently filled out by this option.
.Nx
specific additions include
send window size,
receive window size,
and
bandwidth-controlled window space.
.\" range of 0 to N (where N is the
.\" .Xr sysctl 8
.\" variable

View File

@ -1,4 +1,4 @@
/* $NetBSD: tcp.h,v 1.30 2012/01/07 20:20:22 christos Exp $ */
/* $NetBSD: tcp.h,v 1.31 2015/02/14 12:57:53 he Exp $ */
/*
* Copyright (c) 1982, 1986, 1993
@ -127,7 +127,80 @@ struct tcphdr {
#ifdef notyet
#define TCP_NOOPT 8 /* reserved for FreeBSD compat */
#endif
#define TCP_INFO 9 /* retrieve tcp_info structure */
#define TCP_MD5SIG 0x10 /* use MD5 digests (RFC2385) */
#define TCP_CONGCTL 0x20 /* selected congestion control */
#define TCPI_OPT_TIMESTAMPS 0x01
#define TCPI_OPT_SACK 0x02
#define TCPI_OPT_WSCALE 0x04
#define TCPI_OPT_ECN 0x08
#define TCPI_OPT_TOE 0x10
/*
* The TCP_INFO socket option comes from the Linux 2.6 TCP API, and permits
* the caller to query certain information about the state of a TCP
* connection. We provide an overlapping set of fields with the Linux
* implementation, but since this is a fixed size structure, room has been
* left for growth. In order to maximize potential future compatibility with
* the Linux API, the same variable names and order have been adopted, and
* padding left to make room for omitted fields in case they are added later.
*
* XXX: This is currently an unstable ABI/API, in that it is expected to
* change.
*/
struct tcp_info {
uint8_t tcpi_state; /* TCP FSM state. */
uint8_t __tcpi_ca_state;
uint8_t __tcpi_retransmits;
uint8_t __tcpi_probes;
uint8_t __tcpi_backoff;
uint8_t tcpi_options; /* Options enabled on conn. */
uint8_t tcpi_snd_wscale:4, /* RFC1323 send shift value. */
tcpi_rcv_wscale:4; /* RFC1323 recv shift value. */
uint32_t tcpi_rto; /* Retransmission timeout (usec). */
uint32_t __tcpi_ato;
uint32_t tcpi_snd_mss; /* Max segment size for send. */
uint32_t tcpi_rcv_mss; /* Max segment size for receive. */
uint32_t __tcpi_unacked;
uint32_t __tcpi_sacked;
uint32_t __tcpi_lost;
uint32_t __tcpi_retrans;
uint32_t __tcpi_fackets;
/* Times; measurements in usecs. */
uint32_t __tcpi_last_data_sent;
uint32_t __tcpi_last_ack_sent; /* Also unimpl. on Linux? */
uint32_t tcpi_last_data_recv; /* Time since last recv data. */
uint32_t __tcpi_last_ack_recv;
/* Metrics; variable units. */
uint32_t __tcpi_pmtu;
uint32_t __tcpi_rcv_ssthresh;
uint32_t tcpi_rtt; /* Smoothed RTT in usecs. */
uint32_t tcpi_rttvar; /* RTT variance in usecs. */
uint32_t tcpi_snd_ssthresh; /* Slow start threshold. */
uint32_t tcpi_snd_cwnd; /* Send congestion window. */
uint32_t __tcpi_advmss;
uint32_t __tcpi_reordering;
uint32_t __tcpi_rcv_rtt;
uint32_t tcpi_rcv_space; /* Advertised recv window. */
/* FreeBSD/NetBSD extensions to tcp_info. */
uint32_t tcpi_snd_wnd; /* Advertised send window. */
uint32_t tcpi_snd_bwnd; /* No longer used. */
uint32_t tcpi_snd_nxt; /* Next egress seqno */
uint32_t tcpi_rcv_nxt; /* Next ingress seqno */
uint32_t tcpi_toe_tid; /* HWTID for TOE endpoints */
uint32_t tcpi_snd_rexmitpack; /* Retransmitted packets */
uint32_t tcpi_rcv_ooopack; /* Out-of-order packets */
uint32_t tcpi_snd_zerowin; /* Zero-sized windows sent */
/* Padding to grow without breaking ABI. */
uint32_t __tcpi_pad[26]; /* Padding. */
};
#endif /* !_NETINET_TCP_H_ */

View File

@ -1,4 +1,4 @@
/* $NetBSD: tcp_input.c,v 1.335 2014/12/02 20:25:47 christos Exp $ */
/* $NetBSD: tcp_input.c,v 1.336 2015/02/14 12:57:53 he Exp $ */
/*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@ -148,7 +148,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: tcp_input.c,v 1.335 2014/12/02 20:25:47 christos Exp $");
__KERNEL_RCSID(0, "$NetBSD: tcp_input.c,v 1.336 2015/02/14 12:57:53 he Exp $");
#include "opt_inet.h"
#include "opt_ipsec.h"
@ -738,6 +738,7 @@ tcp_reass(struct tcpcb *tp, const struct tcphdr *th, struct mbuf *m, int *tlen)
/*
* Update the counters.
*/
tp->t_rcvoopack++;
tcps = TCP_STAT_GETREF();
tcps[TCP_STAT_RCVOOPACK]++;
tcps[TCP_STAT_RCVOOBYTE] += rcvoobyte;

View File

@ -1,4 +1,4 @@
/* $NetBSD: tcp_output.c,v 1.179 2014/11/10 18:52:51 maxv Exp $ */
/* $NetBSD: tcp_output.c,v 1.180 2015/02/14 12:57:53 he Exp $ */
/*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@ -135,7 +135,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: tcp_output.c,v 1.179 2014/11/10 18:52:51 maxv Exp $");
__KERNEL_RCSID(0, "$NetBSD: tcp_output.c,v 1.180 2015/02/14 12:57:53 he Exp $");
#include "opt_inet.h"
#include "opt_ipsec.h"
@ -439,6 +439,7 @@ tcp_build_datapkt(struct tcpcb *tp, struct socket *so, int off,
if (tp->t_force && len == 1)
tcps[TCP_STAT_SNDPROBE]++;
else if (SEQ_LT(tp->snd_nxt, tp->snd_max)) {
tp->t_sndrexmitpack++;
tcps[TCP_STAT_SNDREXMITPACK]++;
tcps[TCP_STAT_SNDREXMITBYTE] += len;
} else {
@ -1401,6 +1402,9 @@ send:
if (win < (long)(int32_t)(tp->rcv_adv - tp->rcv_nxt))
win = (long)(int32_t)(tp->rcv_adv - tp->rcv_nxt);
th->th_win = htons((u_int16_t) (win>>tp->rcv_scale));
if (th->th_win == 0) {
tp->t_sndzerowin++;
}
if (SEQ_GT(tp->snd_up, tp->snd_nxt)) {
u_int32_t urp = tp->snd_up - tp->snd_nxt;
if (urp > IP_MAXPACKET)

View File

@ -1,4 +1,4 @@
/* $NetBSD: tcp_subr.c,v 1.257 2014/11/10 18:52:51 maxv Exp $ */
/* $NetBSD: tcp_subr.c,v 1.258 2015/02/14 12:57:53 he Exp $ */
/*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@ -91,7 +91,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: tcp_subr.c,v 1.257 2014/11/10 18:52:51 maxv Exp $");
__KERNEL_RCSID(0, "$NetBSD: tcp_subr.c,v 1.258 2015/02/14 12:57:53 he Exp $");
#include "opt_inet.h"
#include "opt_ipsec.h"
@ -980,6 +980,9 @@ static struct tcpcb tcpcb_template = {
.t_partialacks = -1,
.t_bytes_acked = 0,
.t_sndrexmitpack = 0,
.t_rcvoopack = 0,
.t_sndzerowin = 0,
};
/*

View File

@ -1,4 +1,4 @@
/* $NetBSD: tcp_usrreq.c,v 1.202 2014/11/10 18:52:51 maxv Exp $ */
/* $NetBSD: tcp_usrreq.c,v 1.203 2015/02/14 12:57:53 he Exp $ */
/*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@ -99,7 +99,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: tcp_usrreq.c,v 1.202 2014/11/10 18:52:51 maxv Exp $");
__KERNEL_RCSID(0, "$NetBSD: tcp_usrreq.c,v 1.203 2015/02/14 12:57:53 he Exp $");
#include "opt_inet.h"
#include "opt_ipsec.h"
@ -119,6 +119,7 @@ __KERNEL_RCSID(0, "$NetBSD: tcp_usrreq.c,v 1.202 2014/11/10 18:52:51 maxv Exp $"
#include <sys/domain.h>
#include <sys/sysctl.h>
#include <sys/kauth.h>
#include <sys/kernel.h>
#include <sys/uidinfo.h>
#include <net/if.h>
@ -271,6 +272,65 @@ change_keepalive(struct socket *so, struct tcpcb *tp)
TCP_TIMER_ARM(tp, TCPT_2MSL, tp->t_maxidle);
}
/*
* Export TCP internal state information via a struct tcp_info, based on the
* Linux 2.6 API. Not ABI compatible as our constants are mapped differently
* (TCP state machine, etc). We export all information using FreeBSD-native
* constants -- for example, the numeric values for tcpi_state will differ
* from Linux.
*/
static void
tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti)
{
bzero(ti, sizeof(*ti));
ti->tcpi_state = tp->t_state;
if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP))
ti->tcpi_options |= TCPI_OPT_TIMESTAMPS;
if (tp->t_flags & TF_SACK_PERMIT)
ti->tcpi_options |= TCPI_OPT_SACK;
if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) {
ti->tcpi_options |= TCPI_OPT_WSCALE;
ti->tcpi_snd_wscale = tp->snd_scale;
ti->tcpi_rcv_wscale = tp->rcv_scale;
}
if (tp->t_flags & TF_ECN_PERMIT) {
ti->tcpi_options |= TCPI_OPT_ECN;
}
ti->tcpi_rto = tp->t_rxtcur * tick;
ti->tcpi_last_data_recv = (long)(hardclock_ticks -
(int)tp->t_rcvtime) * tick;
ti->tcpi_rtt = ((u_int64_t)tp->t_srtt * tick) >> TCP_RTT_SHIFT;
ti->tcpi_rttvar = ((u_int64_t)tp->t_rttvar * tick) >> TCP_RTTVAR_SHIFT;
ti->tcpi_snd_ssthresh = tp->snd_ssthresh;
/* Linux API wants these in # of segments, apparently */
ti->tcpi_snd_cwnd = tp->snd_cwnd / tp->t_segsz;
ti->tcpi_snd_wnd = tp->snd_wnd / tp->t_segsz;
/*
* FreeBSD-specific extension fields for tcp_info.
*/
ti->tcpi_rcv_space = tp->rcv_wnd;
ti->tcpi_rcv_nxt = tp->rcv_nxt;
ti->tcpi_snd_bwnd = 0; /* Unused, kept for compat. */
ti->tcpi_snd_nxt = tp->snd_nxt;
ti->tcpi_snd_mss = tp->t_segsz;
ti->tcpi_rcv_mss = tp->t_segsz;
#ifdef TF_TOE
if (tp->t_flags & TF_TOE)
ti->tcpi_options |= TCPI_OPT_TOE;
#endif
/* From the redundant department of redundancies... */
ti->__tcpi_retransmits = ti->__tcpi_retrans =
ti->tcpi_snd_rexmitpack = tp->t_sndrexmitpack;
ti->tcpi_rcv_ooopack = tp->t_rcvoopack;
ti->tcpi_snd_zerowin = tp->t_sndzerowin;
}
int
tcp_ctloutput(int op, struct socket *so, struct sockopt *sopt)
{
@ -280,6 +340,7 @@ tcp_ctloutput(int op, struct socket *so, struct sockopt *sopt)
struct in6pcb *in6p;
#endif
struct tcpcb *tp;
struct tcp_info ti;
u_int ui;
int family; /* family of the socket */
int level, optname, optval;
@ -450,6 +511,10 @@ tcp_ctloutput(int op, struct socket *so, struct sockopt *sopt)
optval = tp->t_peermss;
error = sockopt_set(sopt, &optval, sizeof(optval));
break;
case TCP_INFO:
tcp_fill_info(tp, &ti);
error = sockopt_set(sopt, &ti, sizeof ti);
break;
#ifdef notyet
case TCP_CONGCTL:
break;

View File

@ -1,4 +1,4 @@
/* $NetBSD: tcp_var.h,v 1.175 2014/07/31 03:39:35 rtr Exp $ */
/* $NetBSD: tcp_var.h,v 1.176 2015/02/14 12:57:53 he Exp $ */
/*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@ -364,6 +364,11 @@ struct tcpcb {
u_int t_maxidle; /* t_keepcnt * t_keepintvl */
u_int t_msl; /* MSL to use for this connexion */
/* maintain a few stats per connection: */
int t_rcvoopack; /* out-of-order packets received */
int t_sndrexmitpack; /* retransmit packets sent */
int t_sndzerowin; /* zero-window updates sent */
};
/*