NetBSD/sys/netiso/tp_subr.c

/*	$NetBSD: tp_subr.c,v 1.21 2007/03/04 06:03:33 christos Exp $	*/

/*-
 * Copyright (c) 1991, 1993
 *	The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *	@(#)tp_subr.c	8.1 (Berkeley) 6/10/93
 */

/***********************************************************
		Copyright IBM Corporation 1987

                      All Rights Reserved

Permission to use, copy, modify, and distribute this software and its
documentation for any purpose and without fee is hereby granted,
provided that the above copyright notice appear in all copies and that
both that copyright notice and this permission notice appear in
supporting documentation, and that the name of IBM not be
used in advertising or publicity pertaining to distribution of the
software without specific, written prior permission.

IBM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
SOFTWARE.

******************************************************************/

/*
 * ARGO Project, Computer Sciences Dept., University of Wisconsin - Madison
 */
/*
 * The main work of data transfer is done here. These routines are called
 * from tp.trans. They include the routines that check the validity of acks
 * and Xacks, (tp_goodack() and tp_goodXack() ) take packets from socket
 * buffers and send them (tp_send()), drop the data from the socket buffers
 * (tp_sbdrop()),  and put incoming packet data into socket buffers
 * (tp_stash()).
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: tp_subr.c,v 1.21 2007/03/04 06:03:33 christos Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/protosw.h>
#include <sys/errno.h>
#include <sys/time.h>
#include <sys/kernel.h>

#include <netiso/tp_ip.h>
#include <netiso/iso.h>
#include <netiso/argo_debug.h>
#include <netiso/tp_timer.h>
#include <netiso/tp_param.h>
#include <netiso/tp_stat.h>
#include <netiso/tp_pcb.h>
#include <netiso/tp_tpdu.h>
#include <netiso/tp_trace.h>
#include <netiso/tp_meas.h>
#include <netiso/tp_seq.h>
#include <netiso/tp_var.h>

int             tprexmtthresh = 3;

/*
 * CALLED FROM:
 *	tp.trans, when an XAK arrives
 * FUNCTION and ARGUMENTS:
 * 	Determines if the sequence number (seq) from the XAK
 * 	acks anything new.  If so, drop the appropriate tpdu
 * 	from the XPD send queue.
 * RETURN VALUE:
 * 	Returns 1 if it did this, 0 if the ack caused no action.
 */
int
tp_goodXack(struct tp_pcb  *tpcb, SeqNum seq)
{

#ifdef TPPT
	if (tp_traceflags[D_XPD]) {
		tptraceTPCB(TPPTgotXack,
		      seq, tpcb->tp_Xuna, tpcb->tp_Xsndnxt, tpcb->tp_sndnew,
			    tpcb->tp_snduna);
	}
#endif

	if (seq == tpcb->tp_Xuna) {
		tpcb->tp_Xuna = tpcb->tp_Xsndnxt;

		/*
		 * DROP 1 packet from the Xsnd socket buf - just so happens
		 * that only one packet can be there at any time so drop the
		 * whole thing.  If you allow > 1 packet the socket buffer,
		 * then you'll have to keep track of how many characters went
		 * w/ each XPD tpdu, so this will get messier
		 */
#ifdef ARGO_DEBUG
		if (argo_debug[D_XPD]) {
			dump_mbuf(tpcb->tp_Xsnd.sb_mb,
				  "tp_goodXack Xsnd before sbdrop");
		}
#endif

#ifdef TPPT
		if (tp_traceflags[D_XPD]) {
			tptraceTPCB(TPPTmisc,
				    "goodXack: dropping cc ",
				    (int) (tpcb->tp_Xsnd.sb_cc),
				    0, 0, 0);
		}
#endif
		sbdroprecord(&tpcb->tp_Xsnd);
		return 1;
	}
	return 0;
}

/*
 * CALLED FROM:
 *  tp_good_ack()
 * FUNCTION and ARGUMENTS:
 *  updates
 *  smoothed average round trip time (*rtt)
 *  roundtrip time variance (*rtv) - actually deviation, not variance
 *  given the new value (diff)
 * RETURN VALUE:
 * void
 */

void
tp_rtt_rtv(struct tp_pcb *tpcb)
{
	int             old = tpcb->tp_rtt;
	int             elapsed, delta = 0;

	elapsed = hardclock_ticks - tpcb->tp_rttemit;

	if (tpcb->tp_rtt != 0) {
		/*
		 * rtt is the smoothed round trip time in machine clock
		 * ticks (hz). It is stored as a fixed point number,
		 * unscaled (unlike the tcp srtt).  The rationale here
		 * is that it is only significant to the nearest unit of
		 * slowtimo, which is at least 8 machine clock ticks
		 * so there is no need to scale.  The smoothing is done
		 * according to the same formula as TCP (rtt = rtt*7/8
		 * + measured_rtt/8).
		 */
		delta = elapsed - tpcb->tp_rtt;
		if ((tpcb->tp_rtt += (delta >> TP_RTT_ALPHA)) <= 0)
			tpcb->tp_rtt = 1;
		/*
		 * rtv is a smoothed accumulated mean difference, unscaled
		 * for reasons expressed above.
		 * It is smoothed with an alpha of .75, and the round trip timer
		 * will be set to rtt + 4*rtv, also as TCP does.
		 */
		if (delta < 0)
			delta = -delta;
		if ((tpcb->tp_rtv += ((delta - tpcb->tp_rtv) >> TP_RTV_ALPHA)) <= 0)
			tpcb->tp_rtv = 1;
	} else {
		/*
		 * No rtt measurement yet - use the unsmoothed rtt. Set the
		 * variance to half the rtt (so our first retransmit happens
		 * at 3*rtt)
		 */
		tpcb->tp_rtt = elapsed;
		tpcb->tp_rtv = elapsed >> 1;
	}
	tpcb->tp_rttemit = 0;
	tpcb->tp_rxtshift = 0;
	/*
	 * Quoting TCP: "the retransmit should happen at rtt + 4 * rttvar.
	 * Because of the way we do the smoothing, srtt and rttvar
	 * will each average +1/2 tick of bias.  When we compute
	 * the retransmit timer, we want 1/2 tick of rounding and
	 * 1 extra tick because of +-1/2 tick uncertainty in the
	 * firing of the timer.  The bias will give us exactly the
	 * 1.5 tick we need.  But, because the bias is
	 * statistical, we have to test that we don't drop below
	 * the minimum feasible timer (which is 2 ticks)."
	 */
	TP_RANGESET(tpcb->tp_dt_ticks, TP_REXMTVAL(tpcb),
		    tpcb->tp_peer_acktime, 128 /* XXX */ );
#ifdef ARGO_DEBUG
	if (argo_debug[D_RTT]) {
		printf("%s tpcb %p, elapsed %d, delta %d, rtt %d, rtv %d, old %d\n",
		       "tp_rtt_rtv:", tpcb, elapsed, delta, tpcb->tp_rtt, tpcb->tp_rtv, old);
	}
#endif
	tpcb->tp_rxtcur = tpcb->tp_dt_ticks;
}

/*
 * CALLED FROM:
 *  tp.trans when an AK arrives
 * FUNCTION and ARGUMENTS:
 * 	Given (cdt), the credit from the AK tpdu, and
 *	(seq), the sequence number from the AK tpdu,
 *  tp_goodack() determines if the AK acknowledges something in the send
 * 	window, and if so, drops the appropriate packets from the retransmission
 *  list, computes the round trip time, and updates the retransmission timer
 *  based on the new smoothed round trip time.
 * RETURN VALUE:
 * 	Returns 1 if
 * 	EITHER it actually acked something heretofore unacknowledged
 * 	OR no news but the credit should be processed.
 * 	If something heretofore unacked was acked with this sequence number,
 * 	the appropriate tpdus are dropped from the retransmission control list,
 * 	by calling tp_sbdrop().
 * 	No need to see the tpdu itself.
 */
int
tp_goodack(struct tp_pcb *tpcb, u_int cdt, SeqNum seq, u_int subseq)
{
	int             old_fcredit = 0;
	int             bang = 0;	/* bang --> ack for something
					 * heretofore unacked */
	u_int           bytes_acked;

#ifdef ARGO_DEBUG
	if (argo_debug[D_ACKRECV]) {
		printf("goodack tpcb %p seq 0x%x cdt %d una 0x%x new 0x%x nxt 0x%x\n",
		       tpcb, seq, cdt, tpcb->tp_snduna, tpcb->tp_sndnew, tpcb->tp_sndnxt);
	}
#endif

#ifdef TPPT
	if (tp_traceflags[D_ACKRECV]) {
		tptraceTPCB(TPPTgotack,
			seq, cdt, tpcb->tp_snduna, tpcb->tp_sndnew, subseq);
	}
#endif

#ifdef TP_PERF_MEAS
		if (DOPERF(tpcb)) {
		tpmeas(tpcb->tp_lref, TPtime_ack_rcvd, (struct timeval *) 0, seq, 0, 0);
	}
#endif

	if (seq == tpcb->tp_snduna) {
		if (subseq < tpcb->tp_r_subseq ||
		 (subseq == tpcb->tp_r_subseq && cdt <= tpcb->tp_fcredit)) {
	discard_the_ack:
#ifdef ARGO_DEBUG
			if (argo_debug[D_ACKRECV]) {
				printf("goodack discard : tpcb %p subseq %d r_subseq %d\n",
				       tpcb, subseq, tpcb->tp_r_subseq);
			}
#endif
			goto done;
		}
		if (cdt == tpcb->tp_fcredit	/* && thus subseq >
		        tpcb->tp_r_subseq */ ) {
			tpcb->tp_r_subseq = subseq;
			if (tpcb->tp_timer[TM_data_retrans] == 0)
				tpcb->tp_dupacks = 0;
			else if (++tpcb->tp_dupacks == tprexmtthresh) {
				/*
				 * partner went out of his way to signal with
				 * different subsequences that he has the
				 * same lack of an expected packet.  This may
				 * be an early indiciation of a loss
				 */

				SeqNum          onxt = tpcb->tp_sndnxt;
				struct mbuf    *onxt_m = tpcb->tp_sndnxt_m;
				u_int           win = min(tpcb->tp_fcredit,
				tpcb->tp_cong_win / tpcb->tp_l_tpdusize) / 2;
#ifdef ARGO_DEBUG
				if (argo_debug[D_ACKRECV]) {
					printf("%s tpcb %p seq 0x%x rttseq 0x%x onxt 0x%x\n",
					       "goodack dupacks:", tpcb, seq, tpcb->tp_rttseq, onxt);
				}
#endif
				if (win < 2)
					win = 2;
				tpcb->tp_ssthresh = win * tpcb->tp_l_tpdusize;
				tpcb->tp_timer[TM_data_retrans] = 0;
				tpcb->tp_rttemit = 0;
				tpcb->tp_sndnxt = tpcb->tp_snduna;
				tpcb->tp_sndnxt_m = 0;
				tpcb->tp_cong_win = tpcb->tp_l_tpdusize;
				tp_send(tpcb);
				tpcb->tp_cong_win = tpcb->tp_ssthresh +
					tpcb->tp_dupacks * tpcb->tp_l_tpdusize;
				if (SEQ_GT(tpcb, onxt, tpcb->tp_sndnxt)) {
					tpcb->tp_sndnxt = onxt;
					tpcb->tp_sndnxt_m = onxt_m;
				}
			} else if (tpcb->tp_dupacks > tprexmtthresh) {
				tpcb->tp_cong_win += tpcb->tp_l_tpdusize;
			}
			goto done;
		}
	} else if (SEQ_LT(tpcb, seq, tpcb->tp_snduna))
		goto discard_the_ack;
	/*
	 * If the congestion window was inflated to account
	 * for the other side's cached packets, retract it.
	 */
	if (tpcb->tp_dupacks > tprexmtthresh &&
	    tpcb->tp_cong_win > tpcb->tp_ssthresh)
		tpcb->tp_cong_win = tpcb->tp_ssthresh;
	tpcb->tp_r_subseq = subseq;
	old_fcredit = tpcb->tp_fcredit;
	tpcb->tp_fcredit = cdt;
	if (cdt > tpcb->tp_maxfcredit)
		tpcb->tp_maxfcredit = cdt;
	tpcb->tp_dupacks = 0;

	if (IN_SWINDOW(tpcb, seq, tpcb->tp_snduna, tpcb->tp_sndnew)) {

		tpsbcheck(tpcb, 0);
		bytes_acked = tp_sbdrop(tpcb, seq);
		tpsbcheck(tpcb, 1);
		/*
		 * If transmit timer is running and timed sequence
		 * number was acked, update smoothed round trip time.
		 * Since we now have an rtt measurement, cancel the
		 * timer backoff (cf., Phil Karn's retransmit alg.).
		 * Recompute the initial retransmit timer.
		 */
		if (tpcb->tp_rttemit && SEQ_GT(tpcb, seq, tpcb->tp_rttseq))
			tp_rtt_rtv(tpcb);
		/*
		 * If all outstanding data is acked, stop retransmit timer.
		 * If there is more data to be acked, restart retransmit
		 * timer, using current (possibly backed-off) value.
		 * OSI combines the keepalive and persistance functions.
		 * So, there is no persistance timer per se, to restart.
		 */
		if (tpcb->tp_class != TP_CLASS_0)
			tpcb->tp_timer[TM_data_retrans] =
				(seq == tpcb->tp_sndnew) ? 0 : tpcb->tp_rxtcur;
		/*
		 * When new data is acked, open the congestion window.
		 * If the window gives us less than ssthresh packets
		 * in flight, open exponentially (maxseg per packet).
		 * Otherwise open linearly: maxseg per window
		 * (maxseg^2 / cwnd per packet), plus a constant
		 * fraction of a packet (maxseg/8) to help larger windows
		 * open quickly enough.
		 */
		{
			u_int           cw = tpcb->tp_cong_win, incr = tpcb->tp_l_tpdusize;

			incr = min(incr, bytes_acked);
			if (cw > tpcb->tp_ssthresh)
				incr = incr * incr / cw + incr / 8;
			tpcb->tp_cong_win =
				min(cw + incr, tpcb->tp_sock->so_snd.sb_hiwat);
		}
		tpcb->tp_snduna = seq;
		if (SEQ_LT(tpcb, tpcb->tp_sndnxt, seq)) {
			tpcb->tp_sndnxt = seq;
			tpcb->tp_sndnxt_m = 0;
		}
		bang++;
	}
	if (cdt != 0 && old_fcredit == 0) {
		tpcb->tp_sendfcc = 1;
	}
	if (cdt == 0) {
		if (old_fcredit != 0)
			IncStat(ts_zfcdt);
		/* The following might mean that the window shrunk */
		if (tpcb->tp_timer[TM_data_retrans]) {
			tpcb->tp_timer[TM_data_retrans] = 0;
			tpcb->tp_timer[TM_sendack] = tpcb->tp_dt_ticks;
			if (tpcb->tp_sndnxt != tpcb->tp_snduna) {
				tpcb->tp_sndnxt = tpcb->tp_snduna;
				tpcb->tp_sndnxt_m = 0;
			}
		}
	}
	tpcb->tp_fcredit = cdt;
	bang |= (old_fcredit < cdt);

done:
#ifdef ARGO_DEBUG
	if (argo_debug[D_ACKRECV]) {
		printf("goodack returns 0x%x, cdt 0x%x ocdt 0x%x cwin 0x%lx\n",
		       bang, cdt, old_fcredit, tpcb->tp_cong_win);
	}
#endif
	/*
	 * if (bang) XXXXX Very bad to remove this test, but somethings
	 * broken
	 */
	tp_send(tpcb);
	return (bang);
}

/*
 * CALLED FROM:
 *  tp_goodack()
 * FUNCTION and ARGUMENTS:
 *  drops everything up TO but not INCLUDING seq # (seq)
 *  from the retransmission queue.
 */
int
tp_sbdrop(struct tp_pcb *tpcb, SeqNum seq)
{
	struct sockbuf *sb = &tpcb->tp_sock->so_snd;
	int    i = SEQ_SUB(tpcb, seq, tpcb->tp_snduna);
	int             oldcc = sb->sb_cc, oldi = i;

	if (i >= tpcb->tp_seqhalf)
		printf("tp_spdropping too much -- should panic");
	while (i-- > 0)
		sbdroprecord(sb);
#ifdef ARGO_DEBUG
	if (argo_debug[D_ACKRECV]) {
		printf("tp_sbdroping %d pkts %ld bytes on %p at 0x%x\n",
		       oldi, oldcc - sb->sb_cc, tpcb, seq);
	}
#endif
	if (sb_notify(sb))
		sowwakeup(tpcb->tp_sock);
	return (oldcc - sb->sb_cc);
}

/*
 * CALLED FROM:
 * 	tp.trans on user send request, arrival of AK and arrival of XAK
 * FUNCTION and ARGUMENTS:
 * 	Emits tpdus starting at sequence number (tpcb->tp_sndnxt).
 * 	Emits until a) runs out of data, or  b) runs into an XPD mark, or
 * 			c) it hits seq number (highseq) limited by cong or credit.
 *
 * 	If you want XPD to buffer > 1 du per socket buffer, you can
 * 	modifiy this to issue XPD tpdus also, but then it'll have
 * 	to take some argument(s) to distinguish between the type of DU to
 * 	hand tp_emit.
 *
 * 	When something is sent for the first time, its time-of-send
 * 	is stashed (in system clock ticks rather than pf_slowtimo ticks).
 *  When the ack arrives, the smoothed round-trip time is figured
 *  using this value.
 */
void
tp_send(struct tp_pcb *tpcb)
{
	int    len;
	struct mbuf *m;
	struct mbuf    *mb = 0;
	struct sockbuf *sb = &tpcb->tp_sock->so_snd;
	unsigned int    eotsdu = 0;
	SeqNum          highseq, checkseq;
	int             idle, idleticks, off, cong_win;
#ifdef TP_PERF_MEAS
	int             send_start_time = hardclock_ticks;
	SeqNum          oldnxt = tpcb->tp_sndnxt;
#endif /* TP_PERF_MEAS */

	idle = (tpcb->tp_snduna == tpcb->tp_sndnew);
	if (idle) {
		idleticks = tpcb->tp_inact_ticks - tpcb->tp_timer[TM_inact];
		if (idleticks > tpcb->tp_dt_ticks)
			/*
			 * We have been idle for "a while" and no acks are
			 * expected to clock out any data we send --
			 * slow start to get ack "clock" running again.
			 */
			tpcb->tp_cong_win = tpcb->tp_l_tpdusize;
	}
	cong_win = tpcb->tp_cong_win;
	highseq = SEQ(tpcb, tpcb->tp_fcredit + tpcb->tp_snduna);
	if (tpcb->tp_Xsnd.sb_mb)
		highseq = SEQ_MIN(tpcb, highseq, tpcb->tp_sndnew);

#ifdef ARGO_DEBUG
	if (argo_debug[D_DATA]) {
		printf("tp_send enter tpcb %p nxt 0x%x win %d high 0x%x\n",
		       tpcb, tpcb->tp_sndnxt, cong_win, highseq);
	}
#endif
#ifdef TPPT
	if (tp_traceflags[D_DATA]) {
		tptraceTPCB(TPPTmisc, "tp_send sndnew snduna",
			    tpcb->tp_sndnew, tpcb->tp_snduna, 0, 0);
	tptraceTPCB(TPPTmisc, "tp_send tpcb->tp_sndnxt win fcredit congwin",
	    tpcb->tp_sndnxt, cong_win, tpcb->tp_fcredit, tpcb->tp_cong_win);
	}
#endif
#ifdef TPPT
		if (tp_traceflags[D_DATA]) {
		tptraceTPCB(TPPTmisc, "tp_send 2 nxt high fcredit congwin",
		      tpcb->tp_sndnxt, highseq, tpcb->tp_fcredit, cong_win);
	}
#endif

		if (tpcb->tp_sndnxt_m)
		m = tpcb->tp_sndnxt_m;
	else {
		off = SEQ_SUB(tpcb, tpcb->tp_sndnxt, tpcb->tp_snduna);
		for (m = sb->sb_mb; m && off > 0; m = m->m_next)
			off--;
	}
	/*
	 * Avoid silly window syndrome here . . . figure out how!
	 */
	checkseq = tpcb->tp_sndnum;
	if (idle && SEQ_LT(tpcb, tpcb->tp_sndnum, highseq))
		checkseq = highseq;	/* i.e. DON'T retain highest assigned
					 * packet */

	while ((SEQ_LT(tpcb, tpcb->tp_sndnxt, highseq)) && m && cong_win > 0) {

		eotsdu = (m->m_flags & M_EOR) != 0;
		len = m->m_pkthdr.len;
		if (tpcb->tp_sndnxt == checkseq && eotsdu == 0 &&
		    len < (tpcb->tp_l_tpdusize / 2))
			break;	/* Nagle . . . . . */
		cong_win -= len;
		/*
		 * make a copy - mb goes into the retransmission list while m
		 * gets emitted.  m_copy won't copy a zero-length mbuf.
		 */
		mb = m;
		m = m_copy(mb, 0, M_COPYALL);
		if (m == NULL)
			break;
#ifdef TPPT
		if (tp_traceflags[D_STASH]) {
			tptraceTPCB(TPPTmisc,
				    "tp_send mcopy nxt high eotsdu len",
				    tpcb->tp_sndnxt, highseq, eotsdu, len);
		}
#endif

#ifdef ARGO_DEBUG
			if (argo_debug[D_DATA]) {
			printf("tp_sending tpcb %p nxt 0x%x\n",
			       tpcb, tpcb->tp_sndnxt);
		}
#endif
		/*
		 * when headers are precomputed, may need to fill in checksum
		 * here
		 */
		tpcb->tp_sock->so_error =
		      tp_emit(DT_TPDU_type, tpcb, tpcb->tp_sndnxt, eotsdu, m);
		if (tpcb->tp_sock->so_error != 0)
			/* error */
			break;
		m = mb->m_nextpkt;
		tpcb->tp_sndnxt_m = m;
		if (tpcb->tp_sndnxt == tpcb->tp_sndnew) {
			SEQ_INC(tpcb, tpcb->tp_sndnew);
			/*
			 * Time this transmission if not a retransmission and
			 * not currently timing anything.
			 */
			if (tpcb->tp_rttemit == 0) {
				tpcb->tp_rttemit = hardclock_ticks;
				tpcb->tp_rttseq = tpcb->tp_sndnxt;
			}
			tpcb->tp_sndnxt = tpcb->tp_sndnew;
		} else
			SEQ_INC(tpcb, tpcb->tp_sndnxt);
		/*
		 * Set retransmit timer if not currently set.
		 * Initial value for retransmit timer is smoothed
		 * round-trip time + 2 * round-trip time variance.
		 * Initialize shift counter which is used for backoff
		 * of retransmit time.
		 */
		if (tpcb->tp_timer[TM_data_retrans] == 0 &&
		    tpcb->tp_class != TP_CLASS_0) {
			tpcb->tp_timer[TM_data_retrans] = tpcb->tp_dt_ticks;
			tpcb->tp_timer[TM_sendack] = tpcb->tp_keepalive_ticks;
			tpcb->tp_rxtshift = 0;
		}
	}
	if (SEQ_GT(tpcb, tpcb->tp_sndnew, tpcb->tp_sndnum))
		tpcb->tp_oktonagle = 0;
#ifdef TP_PERF_MEAS
	if (DOPERF(tpcb)) {
		int    npkts;
		int             s, elapsed, *t;
		struct timeval  now;

		elapsed = hardclock_ticks - send_start_time;

		npkts = SEQ_SUB(tpcb, tpcb->tp_sndnxt, oldnxt);

		if (npkts > 0)
			tpcb->tp_Nwindow++;

		if (npkts > TP_PM_MAX)
			npkts = TP_PM_MAX;

		t = &(tpcb->tp_p_meas->tps_sendtime[npkts]);
		*t += (t - elapsed) >> TP_RTT_ALPHA;

		if (mb == 0) {
			IncPStat(tpcb, tps_win_lim_by_data[npkts]);
		} else {
			IncPStat(tpcb, tps_win_lim_by_cdt[npkts]);
			/* not true with congestion-window being used */
		}
		now.tv_sec = elapsed / hz;
		now.tv_usec = (elapsed - (hz * now.tv_sec)) * 1000000 / hz;
		tpmeas(tpcb->tp_lref,
		       TPsbsend, &elapsed, newseq, tpcb->tp_Nwindow, npkts);
	}
#endif				/* TP_PERF_MEAS */


#ifdef TPPT
	if (tp_traceflags[D_DATA]) {
		tptraceTPCB(TPPTmisc,
			    "tp_send at end: new nxt eotsdu error",
			    tpcb->tp_sndnew, tpcb->tp_sndnxt, eotsdu,
			    tpcb->tp_sock->so_error);

	}
#endif
}

int             TPNagleok;
int             TPNagled;

int
tp_packetize(struct tp_pcb *tpcb, struct mbuf *m, int eotsdu)
{
	struct mbuf *n = NULL;
	struct sockbuf *sb = &tpcb->tp_sock->so_snd;
	int             maxsize = tpcb->tp_l_tpdusize
			    - tp_headersize(DT_TPDU_type, tpcb)
			    - (tpcb->tp_use_checksum ? 4 : 0);
	int             totlen = m->m_pkthdr.len;

	/*
	 * Pre-packetize the data in the sockbuf
	 * according to negotiated mtu.  Do it here
	 * where we can safely wait for mbufs.
	 *
	 * This presumes knowledge of sockbuf conventions.
	 * TODO: allocate space for header and fill it in (once!).
	 */
#ifdef ARGO_DEBUG
	if (argo_debug[D_DATA]) {
		printf("SEND BF: maxsize %d totlen %d eotsdu %d sndnum 0x%x\n",
		       maxsize, totlen, eotsdu, tpcb->tp_sndnum);
	}
#endif
	if (tpcb->tp_oktonagle) {
		if ((n = sb->sb_mb) == 0)
			panic("tp_packetize");
		while (n->m_nextpkt)
			n = n->m_nextpkt;
		if (n->m_flags & M_EOR)
			panic("tp_packetize 2");
		SEQ_INC(tpcb, tpcb->tp_sndnum);
		if (totlen + n->m_pkthdr.len < maxsize) {
			/*
			 * There is an unsent packet with space,
			 * combine data
			 */
			struct mbuf    *old_n = n;
			tpsbcheck(tpcb, 3);
			n->m_pkthdr.len += totlen;
			while (n->m_next)
				n = n->m_next;
			sbcompress(sb, m, n);
			tpsbcheck(tpcb, 4);
			n = old_n;
			TPNagled++;
			goto out;
		}
	}

	while (m) {
		n = m;
		if (totlen > maxsize) {
			if ((m = m_split(n, maxsize, M_WAIT)) == 0)
				panic("tp_packetize");
		} else
			m = 0;
		totlen -= maxsize;
		tpsbcheck(tpcb, 5);
		sbappendrecord(sb, n);
		tpsbcheck(tpcb, 6);
		SEQ_INC(tpcb, tpcb->tp_sndnum);
	}
out:
	if (eotsdu) {
		n->m_flags |= M_EOR;	/* XXX belongs at end */
		tpcb->tp_oktonagle = 0;
	} else {
		SEQ_DEC(tpcb, tpcb->tp_sndnum);
		tpcb->tp_oktonagle = 1;
		TPNagleok++;
	}

#ifdef ARGO_DEBUG
	if (argo_debug[D_DATA]) {
		printf("SEND out: oktonagle %d sndnum 0x%x\n",
		       tpcb->tp_oktonagle, tpcb->tp_sndnum);
	}
#endif
	return 0;
}


/*
 * NAME: tp_stash()
 * CALLED FROM:
 *	tp.trans on arrival of a DT tpdu
 * FUNCTION, ARGUMENTS, and RETURN VALUE:
 * 	Returns 1 if
 *	a) something new arrived and it's got eotsdu_reached bit on,
 * 	b) this arrival was caused other out-of-sequence things to be
 *    	accepted, or
 * 	c) this arrival is the highest seq # for which we last gave credit
 *   	(sender just sent a whole window)
 *  In other words, returns 1 if tp should send an ack immediately, 0 if
 *  the ack can wait a while.
 *
 * Note: this implementation no longer renegs on credit, (except
 * when debugging option D_RENEG is on, for the purpose of testing
 * ack subsequencing), so we don't  need to check for incoming tpdus
 * being in a reneged portion of the window.
 */

int
tp_stash(struct tp_pcb *tpcb, struct tp_event *e)
{
	int    ack_reason = tpcb->tp_ack_strat & ACK_STRAT_EACH;
	/* 0--> delay acks until full window */
	/* 1--> ack each tpdu */
#define E e->TPDU_ATTR(DT)

	if (E.e_eot) {
		struct mbuf *n = E.e_data;
		n->m_flags |= M_EOR;
		n->m_nextpkt = 0;
	}
#ifdef ARGO_DEBUG
	if (argo_debug[D_STASH]) {
		dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb,
			  "stash: so_rcv before appending");
		dump_mbuf(E.e_data,
			  "stash: e_data before appending");
	}
#endif

#ifdef TP_PERF_MEAS
	if (DOPERF(tpcb)) {
		PStat(tpcb, Nb_from_ll) += E.e_datalen;
		tpmeas(tpcb->tp_lref, TPtime_from_ll,
		       &e->e_time, E.e_seq,
		       (u_int) PStat(tpcb, Nb_from_ll),
		       (u_int) E.e_datalen);
	}
#endif

	if (E.e_seq == tpcb->tp_rcvnxt) {

#ifdef ARGO_DEBUG
		if (argo_debug[D_STASH]) {
			printf("stash EQ: seq 0x%x datalen 0x%x eot 0x%x\n",
			     E.e_seq, E.e_datalen, E.e_eot);
		}
#endif

#ifdef TPPT
		if (tp_traceflags[D_STASH]) {
			tptraceTPCB(TPPTmisc, "stash EQ: seq len eot",
			  E.e_seq, E.e_datalen, E.e_eot, 0);
		}
#endif

		SET_DELACK(tpcb);

		sbappend(&tpcb->tp_sock->so_rcv, E.e_data);

		SEQ_INC(tpcb, tpcb->tp_rcvnxt);
		/*
		 * move chains from the reassembly queue to the socket buffer
		 */
		if (tpcb->tp_rsycnt) {
			struct mbuf **mp;
			struct mbuf   **mplim;

			mp = tpcb->tp_rsyq + (tpcb->tp_rcvnxt %
					      tpcb->tp_maxlcredit);
			mplim = tpcb->tp_rsyq + tpcb->tp_maxlcredit;

			while (tpcb->tp_rsycnt && *mp) {
				sbappend(&tpcb->tp_sock->so_rcv, *mp);
				tpcb->tp_rsycnt--;
				*mp = 0;
				SEQ_INC(tpcb, tpcb->tp_rcvnxt);
				ack_reason |= ACK_REORDER;
				if (++mp == mplim)
					mp = tpcb->tp_rsyq;
			}
		}
#ifdef ARGO_DEBUG
		if (argo_debug[D_STASH]) {
			dump_mbuf(tpcb->tp_sock->so_rcv.sb_mb,
			   "stash: so_rcv after appending");
		}
#endif

	} else {
		struct mbuf **mp;
		SeqNum          uwe;

#ifdef TPPT
		if (tp_traceflags[D_STASH]) {
			tptraceTPCB(TPPTmisc, "stash Reseq: seq rcvnxt lcdt",
				    E.e_seq, tpcb->tp_rcvnxt,
				    tpcb->tp_lcredit, 0);
		}
#endif

		if (tpcb->tp_rsyq == 0)
			tp_rsyset(tpcb);
		uwe = SEQ(tpcb, tpcb->tp_rcvnxt + tpcb->tp_maxlcredit);
		if (tpcb->tp_rsyq == 0 ||
		    !IN_RWINDOW(tpcb, E.e_seq, tpcb->tp_rcvnxt, uwe)) {
			ack_reason = ACK_DONT;
			m_freem(E.e_data);
		} else if (*(mp = tpcb->tp_rsyq +
			     (E.e_seq % tpcb->tp_maxlcredit)) != NULL ) {
#ifdef ARGO_DEBUG
			if (argo_debug[D_STASH]) {
				printf("tp_stash - drop & ack\n");
			}
#endif

			/*
			 * retransmission - drop it and force
			 * an ack
			 */
			IncStat(ts_dt_dup);
#ifdef TP_PERF_MEAS
			if (DOPERF(tpcb)) {
				IncPStat(tpcb, tps_n_ack_cuz_dup);
			}
#endif

				m_freem(E.e_data);
			ack_reason |= ACK_DUP;
		} else {
			*mp = E.e_data;
			tpcb->tp_rsycnt++;
			ack_reason = ACK_DONT;
		}
	}
	/*
	 * there were some comments of historical interest
	 * here.
	 */
	{
		LOCAL_CREDIT(tpcb);

		if (E.e_seq == tpcb->tp_sent_uwe)
			ack_reason |= ACK_STRAT_FULLWIN;

#ifdef TPPT
		if (tp_traceflags[D_STASH]) {
			tptraceTPCB(TPPTmisc,
		 "end of stash, eot, ack_reason, sent_uwe ",
		 E.e_eot, ack_reason, tpcb->tp_sent_uwe, 0);
		}
#endif

		if (ack_reason == ACK_DONT) {
			IncStat(ts_ackreason[ACK_DONT]);
			return 0;
		} else {
#ifdef TP_PERF_MEAS
			if (DOPERF(tpcb)) {
				if (ack_reason & ACK_STRAT_EACH) {
				IncPStat(tpcb, tps_n_ack_cuz_strat);
			} else if (ack_reason & ACK_STRAT_FULLWIN) {
				IncPStat(tpcb, tps_n_ack_cuz_fullwin);
			} else if (ack_reason & ACK_REORDER) {
				IncPStat(tpcb, tps_n_ack_cuz_reorder);
			}
			tpmeas(tpcb->tp_lref, TPtime_ack_sent, 0,
			   SEQ_ADD(tpcb, E.e_seq, 1), 0, 0);
			}
#endif
			{
				int    i;

				/*
				 * keep track of all reasons
				 * that apply
				 */
				for (i = 1; i < _ACK_NUM_REASONS_; i++) {
					if (ack_reason & (1 << i))
						IncStat(ts_ackreason[i]);
				}
			}
			return 1;
		}
	}
}

/*
 * tp_rsyflush - drop all the packets on the reassembly queue.
 * Do this when closing the socket, or when somebody has changed
 * the space avaible in the receive socket (XXX).
 */
void
tp_rsyflush(struct tp_pcb *tpcb)
{
	struct mbuf **mp;
	if (tpcb->tp_rsycnt) {
		for (mp = tpcb->tp_rsyq + tpcb->tp_maxlcredit;
		     --mp >= tpcb->tp_rsyq;)
			if (*mp) {
				tpcb->tp_rsycnt--;
				m_freem(*mp);
			}
		if (tpcb->tp_rsycnt) {
			printf("tp_rsyflush %p\n", tpcb);
			tpcb->tp_rsycnt = 0;
		}
	}
	free((void *) tpcb->tp_rsyq, M_PCB);
	tpcb->tp_rsyq = 0;
}

void
tp_rsyset(struct tp_pcb *tpcb)
{
	struct socket *so = tpcb->tp_sock;
	int             maxcredit = tpcb->tp_xtd_format ? 0xffff : 0xf;
	int             old_credit = tpcb->tp_maxlcredit;
	void *        rsyq;

	tpcb->tp_maxlcredit = maxcredit = min(maxcredit,
					      (so->so_rcv.sb_hiwat + tpcb->tp_l_tpdusize) / tpcb->tp_l_tpdusize);

	if (old_credit == tpcb->tp_maxlcredit && tpcb->tp_rsyq != 0)
		return;
	maxcredit *= sizeof(struct mbuf *);
	if (tpcb->tp_rsyq)
		tp_rsyflush(tpcb);
	if ((rsyq = (void *) malloc(maxcredit, M_PCB, M_NOWAIT)) != NULL)
		bzero(rsyq, maxcredit);
	tpcb->tp_rsyq = (struct mbuf **) rsyq;
}


void
tpsbcheck(struct tp_pcb *tpcb, int i)
{
	struct mbuf *n, *m;
	int    len = 0, mbcnt = 0, pktlen;
	struct sockbuf *sb = &tpcb->tp_sock->so_snd;

	for (n = sb->sb_mb; n; n = n->m_nextpkt) {
		if ((n->m_flags & M_PKTHDR) == 0)
			panic("tpsbcheck nohdr");
		pktlen = len + n->m_pkthdr.len;
		for (m = n; m; m = m->m_next) {
			len += m->m_len;
			mbcnt += MSIZE;
			if (m->m_flags & M_EXT)
				mbcnt += m->m_ext.ext_size;
		}
		if (len != pktlen) {
			printf("test %d; len %d != pktlen %d on mbuf %p\n",
			       i, len, pktlen, n);
			panic("tpsbcheck short");
		}
	}
	if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
		printf("test %d: cc %d != %ld || mbcnt %d != %ld\n", i, len, sb->sb_cc,
		       mbcnt, sb->sb_mbcnt);
		panic("tpsbcheck");
	}
}