Add code to remember where in the send queue of mbufs the last packet was

sent from. This change avoid a linear search through all mbufs when using
large TCP windows, and therefore permit high-speed connections on long
distances.

Tested on a 1 Gigabit connection between Luleå and San Francisco, a distance
of about 15000km.  With TCP windows of just over 20 Mbytes it could keep up
with 950Mbit/s.

After discussions with Matt Thomas and Jason Thorpe.
This commit is contained in:
ragge 2003-06-29 18:58:26 +00:00
parent d189d63466
commit 679db94879
4 changed files with 61 additions and 8 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: tcp_input.c,v 1.169 2003/06/15 02:49:33 matt Exp $ */
/* $NetBSD: tcp_input.c,v 1.170 2003/06/29 18:58:26 ragge Exp $ */
/*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@ -152,7 +152,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: tcp_input.c,v 1.169 2003/06/15 02:49:33 matt Exp $");
__KERNEL_RCSID(0, "$NetBSD: tcp_input.c,v 1.170 2003/06/29 18:58:26 ragge Exp $");
#include "opt_inet.h"
#include "opt_ipsec.h"
@ -1457,6 +1457,9 @@ after_listen:
tcpstat.tcps_rcvackbyte += acked;
ND6_HINT(tp);
sbdrop(&so->so_snd, acked);
if (tp->t_lastm != NULL)
tp->t_lastoff -= acked;
/*
* We want snd_recover to track snd_una to
* avoid sequence wraparound problems for
@ -2069,11 +2072,15 @@ after_listen:
ND6_HINT(tp);
if (acked > so->so_snd.sb_cc) {
tp->snd_wnd -= so->so_snd.sb_cc;
tp->t_lastm = NULL;
sbdrop(&so->so_snd, (int)so->so_snd.sb_cc);
ourfinisacked = 1;
} else {
sbdrop(&so->so_snd, acked);
tp->snd_wnd -= acked;
tp->t_lastoff -= acked;
if (tp->t_lastoff <= 0)
tp->t_lastm = NULL;
ourfinisacked = 0;
}
sowwakeup(so);

View File

@ -1,4 +1,4 @@
/* $NetBSD: tcp_output.c,v 1.91 2003/05/17 17:16:20 itojun Exp $ */
/* $NetBSD: tcp_output.c,v 1.92 2003/06/29 18:58:27 ragge Exp $ */
/*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@ -142,7 +142,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: tcp_output.c,v 1.91 2003/05/17 17:16:20 itojun Exp $");
__KERNEL_RCSID(0, "$NetBSD: tcp_output.c,v 1.92 2003/06/29 18:58:27 ragge Exp $");
#include "opt_inet.h"
#include "opt_ipsec.h"
@ -446,7 +446,44 @@ tcp_build_datapkt(struct tcpcb *tp, struct socket *so, int off,
m->m_len += len;
TCP_OUTPUT_COUNTER_INCR(&tcp_output_copysmall);
} else {
m->m_next = m_copy(so->so_snd.sb_mb, off, (int) len);
struct mbuf *m0;
/*
* To avoid traversing the whole sb_mb chain for correct
* data to send, remember last sent mbuf, its offset and
* the sent size. When called the next time, see if the
* data to send is the directly following the previous
* transfer. This is important for large TCP windows.
*/
if (off > 8*1024) { /* Only for long chains */
if (tp->t_lastm == NULL ||
(tp->t_lastoff + tp->t_lastlen) != off) {
/* Prediction failed */
tp->t_lastm = so->so_snd.sb_mb;
tp->t_inoff = off;
} else {
tp->t_inoff += tp->t_lastlen;
tp->t_lastoff = off - tp->t_lastoff;
}
/* Traverse forward to next packet */
while (tp->t_inoff > 0) {
if (tp->t_lastm == NULL)
panic("tp->t_lastm == NULL");
if (tp->t_inoff < tp->t_lastm->m_len)
break;
tp->t_inoff -= tp->t_lastm->m_len;
tp->t_lastm = tp->t_lastm->m_next;
}
tp->t_lastoff = off;
tp->t_lastlen = len;
m0 = tp->t_lastm;
off = tp->t_inoff;
} else
m0 = so->so_snd.sb_mb;
m->m_next = m_copy(m0, off, (int) len);
if (m->m_next == NULL) {
m_freem(m);
return (ENOBUFS);

View File

@ -1,4 +1,4 @@
/* $NetBSD: tcp_subr.c,v 1.140 2003/06/23 11:02:15 martin Exp $ */
/* $NetBSD: tcp_subr.c,v 1.141 2003/06/29 18:58:27 ragge Exp $ */
/*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@ -102,7 +102,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: tcp_subr.c,v 1.140 2003/06/23 11:02:15 martin Exp $");
__KERNEL_RCSID(0, "$NetBSD: tcp_subr.c,v 1.141 2003/06/29 18:58:27 ragge Exp $");
#include "opt_inet.h"
#include "opt_ipsec.h"
@ -916,6 +916,9 @@ tcp_newtcpcb(family, aux)
tp->t_segsz = tcp_mssdflt;
LIST_INIT(&tp->t_sc);
tp->t_lastm = NULL;
tp->t_lastoff = 0;
callout_init(&tp->t_delack_ch);
for (i = 0; i < TCPT_NTIMERS; i++)
TCP_TIMER_INIT(tp, i);

View File

@ -1,4 +1,4 @@
/* $NetBSD: tcp_var.h,v 1.100 2003/06/28 14:22:09 darrenr Exp $ */
/* $NetBSD: tcp_var.h,v 1.101 2003/06/29 18:58:28 ragge Exp $ */
/*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@ -261,6 +261,12 @@ struct tcpcb {
/* pointer for syn cache entries*/
LIST_HEAD(, syn_cache) t_sc; /* list of entries by this tcb */
/* prediction of next mbuf when using large window sizes */
struct mbuf *t_lastm; /* last mbuf that data was sent from */
int t_inoff; /* data offset in previous mbuf */
int t_lastoff; /* last data address in mbuf chain */
int t_lastlen; /* last length read from mbuf chain */
};
#ifdef _KERNEL