3afd44cf08
<20111022023242.BA26F14A158@mail.netbsd.org>. This change includes the following: An initial cleanup and minor reorganization of the entropy pool code in sys/dev/rnd.c and sys/dev/rndpool.c. Several bugs are fixed. Some effort is made to accumulate entropy more quickly at boot time. A generic interface, "rndsink", is added, for stream generators to request that they be re-keyed with good quality entropy from the pool as soon as it is available. The arc4random()/arc4randbytes() implementation in libkern is adjusted to use the rndsink interface for rekeying, which helps address the problem of low-quality keys at boot time. An implementation of the FIPS 140-2 statistical tests for random number generator quality is provided (libkern/rngtest.c). This is based on Greg Rose's implementation from Qualcomm. A new random stream generator, nist_ctr_drbg, is provided. It is based on an implementation of the NIST SP800-90 CTR_DRBG by Henric Jungheim. This generator users AES in a modified counter mode to generate a backtracking-resistant random stream. An abstraction layer, "cprng", is provided for in-kernel consumers of randomness. The arc4random/arc4randbytes API is deprecated for in-kernel use. It is replaced by "cprng_strong". The current cprng_fast implementation wraps the existing arc4random implementation. The current cprng_strong implementation wraps the new CTR_DRBG implementation. Both interfaces are rekeyed from the entropy pool automatically at intervals justifiable from best current cryptographic practice. In some quick tests, cprng_fast() is about the same speed as the old arc4randbytes(), and cprng_strong() is about 20% faster than rnd_extract_data(). Performance is expected to improve. The AES code in src/crypto/rijndael is no longer an optional kernel component, as it is required by cprng_strong, which is not an optional kernel component. The entropy pool output is subjected to the rngtest tests at startup time; if it fails, the system will reboot. There is approximately a 3/10000 chance of a false positive from these tests. Entropy pool _input_ from hardware random numbers is subjected to the rngtest tests at attach time, as well as the FIPS continuous-output test, to detect bad or stuck hardware RNGs; if any are detected, they are detached, but the system continues to run. A problem with rndctl(8) is fixed -- datastructures with pointers in arrays are no longer passed to userspace (this was not a security problem, but rather a major issue for compat32). A new kernel will require a new rndctl. The sysctl kern.arandom() and kern.urandom() nodes are hooked up to the new generators, but the /dev/*random pseudodevices are not, yet. Manual pages for the new kernel interfaces are forthcoming.
673 lines
17 KiB
C
673 lines
17 KiB
C
/* $NetBSD: altq_blue.c,v 1.23 2011/11/19 22:51:18 tls Exp $ */
|
|
/* $KAME: altq_blue.c,v 1.15 2005/04/13 03:44:24 suz Exp $ */
|
|
|
|
/*
|
|
* Copyright (C) 1997-2002
|
|
* Sony Computer Science Laboratories Inc. All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*
|
|
*/
|
|
/*
|
|
* Copyright (c) 1990-1994 Regents of the University of California.
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. All advertising materials mentioning features or use of this software
|
|
* must display the following acknowledgement:
|
|
* This product includes software developed by the Computer Systems
|
|
* Engineering Group at Lawrence Berkeley Laboratory.
|
|
* 4. Neither the name of the University nor of the Laboratory may be used
|
|
* to endorse or promote products derived from this software without
|
|
* specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*/
|
|
|
|
#include <sys/cdefs.h>
|
|
__KERNEL_RCSID(0, "$NetBSD: altq_blue.c,v 1.23 2011/11/19 22:51:18 tls Exp $");
|
|
|
|
#ifdef _KERNEL_OPT
|
|
#include "opt_altq.h"
|
|
#include "opt_inet.h"
|
|
#endif
|
|
|
|
#ifdef ALTQ_BLUE /* blue is enabled by ALTQ_BLUE option in opt_altq.h */
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/malloc.h>
|
|
#include <sys/mbuf.h>
|
|
#include <sys/socket.h>
|
|
#include <sys/sockio.h>
|
|
#include <sys/systm.h>
|
|
#include <sys/proc.h>
|
|
#include <sys/errno.h>
|
|
#include <sys/kernel.h>
|
|
#include <sys/kauth.h>
|
|
#include <sys/cprng.h>
|
|
|
|
#include <net/if.h>
|
|
#include <net/if_types.h>
|
|
#include <netinet/in.h>
|
|
#include <netinet/in_systm.h>
|
|
#include <netinet/ip.h>
|
|
#ifdef INET6
|
|
#include <netinet/ip6.h>
|
|
#endif
|
|
|
|
#include <altq/altq.h>
|
|
#include <altq/altq_conf.h>
|
|
#include <altq/altq_blue.h>
|
|
|
|
#ifdef ALTQ3_COMPAT
|
|
/*
|
|
* Blue is proposed and implemented by Wu-chang Feng <wuchang@eecs.umich.edu>.
|
|
* more information on Blue is available from
|
|
* http://www.eecs.umich.edu/~wuchang/blue/
|
|
*/
|
|
|
|
/* fixed-point uses 12-bit decimal places */
|
|
#define FP_SHIFT 12 /* fixed-point shift */
|
|
|
|
#define BLUE_LIMIT 200 /* default max queue lenght */
|
|
#define BLUE_STATS /* collect statistics */
|
|
|
|
/* blue_list keeps all blue_state_t's allocated. */
|
|
static blue_queue_t *blue_list = NULL;
|
|
|
|
/* internal function prototypes */
|
|
static int blue_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
|
|
static struct mbuf *blue_dequeue(struct ifaltq *, int);
|
|
static int drop_early(blue_t *);
|
|
static int mark_ecn(struct mbuf *, struct altq_pktattr *, int);
|
|
static int blue_detach(blue_queue_t *);
|
|
static int blue_request(struct ifaltq *, int, void *);
|
|
|
|
/*
|
|
* blue device interface
|
|
*/
|
|
altqdev_decl(blue);
|
|
|
|
int
|
|
blueopen(dev_t dev, int flag, int fmt,
|
|
struct lwp *l)
|
|
{
|
|
/* everything will be done when the queueing scheme is attached. */
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
blueclose(dev_t dev, int flag, int fmt,
|
|
struct lwp *l)
|
|
{
|
|
blue_queue_t *rqp;
|
|
int err, error = 0;
|
|
|
|
while ((rqp = blue_list) != NULL) {
|
|
/* destroy all */
|
|
err = blue_detach(rqp);
|
|
if (err != 0 && error == 0)
|
|
error = err;
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
int
|
|
blueioctl(dev_t dev, ioctlcmd_t cmd, void *addr, int flag,
|
|
struct lwp *l)
|
|
{
|
|
blue_queue_t *rqp;
|
|
struct blue_interface *ifacep;
|
|
struct ifnet *ifp;
|
|
int error = 0;
|
|
|
|
/* check super-user privilege */
|
|
switch (cmd) {
|
|
case BLUE_GETSTATS:
|
|
break;
|
|
default:
|
|
#if (__FreeBSD_version > 400000)
|
|
if ((error = suser(p)) != 0)
|
|
return (error);
|
|
#else
|
|
if ((error = kauth_authorize_network(l->l_cred,
|
|
KAUTH_NETWORK_ALTQ, KAUTH_REQ_NETWORK_ALTQ_BLUE, NULL,
|
|
NULL, NULL)) != 0)
|
|
return (error);
|
|
#endif
|
|
break;
|
|
}
|
|
|
|
switch (cmd) {
|
|
|
|
case BLUE_ENABLE:
|
|
ifacep = (struct blue_interface *)addr;
|
|
if ((rqp = altq_lookup(ifacep->blue_ifname, ALTQT_BLUE)) == NULL) {
|
|
error = EBADF;
|
|
break;
|
|
}
|
|
error = altq_enable(rqp->rq_ifq);
|
|
break;
|
|
|
|
case BLUE_DISABLE:
|
|
ifacep = (struct blue_interface *)addr;
|
|
if ((rqp = altq_lookup(ifacep->blue_ifname, ALTQT_BLUE)) == NULL) {
|
|
error = EBADF;
|
|
break;
|
|
}
|
|
error = altq_disable(rqp->rq_ifq);
|
|
break;
|
|
|
|
case BLUE_IF_ATTACH:
|
|
ifp = ifunit(((struct blue_interface *)addr)->blue_ifname);
|
|
if (ifp == NULL) {
|
|
error = ENXIO;
|
|
break;
|
|
}
|
|
|
|
/* allocate and initialize blue_state_t */
|
|
rqp = malloc(sizeof(blue_queue_t), M_DEVBUF, M_WAITOK|M_ZERO);
|
|
if (rqp == NULL) {
|
|
error = ENOMEM;
|
|
break;
|
|
}
|
|
|
|
rqp->rq_q = malloc(sizeof(class_queue_t), M_DEVBUF,
|
|
M_WAITOK|M_ZERO);
|
|
if (rqp->rq_q == NULL) {
|
|
free(rqp, M_DEVBUF);
|
|
error = ENOMEM;
|
|
break;
|
|
}
|
|
|
|
rqp->rq_blue = malloc(sizeof(blue_t), M_DEVBUF,
|
|
M_WAITOK|M_ZERO);
|
|
if (rqp->rq_blue == NULL) {
|
|
free(rqp->rq_q, M_DEVBUF);
|
|
free(rqp, M_DEVBUF);
|
|
error = ENOMEM;
|
|
break;
|
|
}
|
|
|
|
rqp->rq_ifq = &ifp->if_snd;
|
|
qtail(rqp->rq_q) = NULL;
|
|
qlen(rqp->rq_q) = 0;
|
|
qlimit(rqp->rq_q) = BLUE_LIMIT;
|
|
|
|
/* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
|
|
blue_init(rqp->rq_blue, 0, 800, 1000, 50000);
|
|
|
|
/*
|
|
* set BLUE to this ifnet structure.
|
|
*/
|
|
error = altq_attach(rqp->rq_ifq, ALTQT_BLUE, rqp,
|
|
blue_enqueue, blue_dequeue, blue_request,
|
|
NULL, NULL);
|
|
if (error) {
|
|
free(rqp->rq_blue, M_DEVBUF);
|
|
free(rqp->rq_q, M_DEVBUF);
|
|
free(rqp, M_DEVBUF);
|
|
break;
|
|
}
|
|
|
|
/* add this state to the blue list */
|
|
rqp->rq_next = blue_list;
|
|
blue_list = rqp;
|
|
break;
|
|
|
|
case BLUE_IF_DETACH:
|
|
ifacep = (struct blue_interface *)addr;
|
|
if ((rqp = altq_lookup(ifacep->blue_ifname, ALTQT_BLUE)) == NULL) {
|
|
error = EBADF;
|
|
break;
|
|
}
|
|
error = blue_detach(rqp);
|
|
break;
|
|
|
|
case BLUE_GETSTATS:
|
|
do {
|
|
struct blue_stats *q_stats;
|
|
blue_t *rp;
|
|
|
|
q_stats = (struct blue_stats *)addr;
|
|
if ((rqp = altq_lookup(q_stats->iface.blue_ifname,
|
|
ALTQT_BLUE)) == NULL) {
|
|
error = EBADF;
|
|
break;
|
|
}
|
|
|
|
q_stats->q_len = qlen(rqp->rq_q);
|
|
q_stats->q_limit = qlimit(rqp->rq_q);
|
|
|
|
rp = rqp->rq_blue;
|
|
q_stats->q_pmark = rp->blue_pmark;
|
|
q_stats->xmit_packets = rp->blue_stats.xmit_packets;
|
|
q_stats->xmit_bytes = rp->blue_stats.xmit_bytes;
|
|
q_stats->drop_packets = rp->blue_stats.drop_packets;
|
|
q_stats->drop_bytes = rp->blue_stats.drop_bytes;
|
|
q_stats->drop_forced = rp->blue_stats.drop_forced;
|
|
q_stats->drop_unforced = rp->blue_stats.drop_unforced;
|
|
q_stats->marked_packets = rp->blue_stats.marked_packets;
|
|
|
|
} while (/*CONSTCOND*/ 0);
|
|
break;
|
|
|
|
case BLUE_CONFIG:
|
|
do {
|
|
struct blue_conf *fc;
|
|
int limit;
|
|
|
|
fc = (struct blue_conf *)addr;
|
|
if ((rqp = altq_lookup(fc->iface.blue_ifname,
|
|
ALTQT_BLUE)) == NULL) {
|
|
error = EBADF;
|
|
break;
|
|
}
|
|
limit = fc->blue_limit;
|
|
qlimit(rqp->rq_q) = limit;
|
|
fc->blue_limit = limit; /* write back the new value */
|
|
if (fc->blue_pkttime > 0)
|
|
rqp->rq_blue->blue_pkttime = fc->blue_pkttime;
|
|
if (fc->blue_max_pmark > 0)
|
|
rqp->rq_blue->blue_max_pmark = fc->blue_max_pmark;
|
|
if (fc->blue_hold_time > 0)
|
|
rqp->rq_blue->blue_hold_time = fc->blue_hold_time;
|
|
rqp->rq_blue->blue_flags = fc->blue_flags;
|
|
|
|
blue_init(rqp->rq_blue, rqp->rq_blue->blue_flags,
|
|
rqp->rq_blue->blue_pkttime,
|
|
rqp->rq_blue->blue_max_pmark,
|
|
rqp->rq_blue->blue_hold_time);
|
|
} while (/*CONSTCOND*/ 0);
|
|
break;
|
|
|
|
default:
|
|
error = EINVAL;
|
|
break;
|
|
}
|
|
return error;
|
|
}
|
|
|
|
static int
|
|
blue_detach(blue_queue_t *rqp)
|
|
{
|
|
blue_queue_t *tmp;
|
|
int error = 0;
|
|
|
|
if (ALTQ_IS_ENABLED(rqp->rq_ifq))
|
|
altq_disable(rqp->rq_ifq);
|
|
|
|
if ((error = altq_detach(rqp->rq_ifq)))
|
|
return (error);
|
|
|
|
if (blue_list == rqp)
|
|
blue_list = rqp->rq_next;
|
|
else {
|
|
for (tmp = blue_list; tmp != NULL; tmp = tmp->rq_next)
|
|
if (tmp->rq_next == rqp) {
|
|
tmp->rq_next = rqp->rq_next;
|
|
break;
|
|
}
|
|
if (tmp == NULL)
|
|
printf("blue_detach: no state found in blue_list!\n");
|
|
}
|
|
|
|
free(rqp->rq_q, M_DEVBUF);
|
|
free(rqp->rq_blue, M_DEVBUF);
|
|
free(rqp, M_DEVBUF);
|
|
return (error);
|
|
}
|
|
|
|
/*
|
|
* blue support routines
|
|
*/
|
|
|
|
int
|
|
blue_init(blue_t *rp, int flags, int pkttime, int blue_max_pmark,
|
|
int blue_hold_time)
|
|
{
|
|
int npkts_per_sec;
|
|
|
|
rp->blue_idle = 1;
|
|
rp->blue_flags = flags;
|
|
rp->blue_pkttime = pkttime;
|
|
rp->blue_max_pmark = blue_max_pmark;
|
|
rp->blue_hold_time = blue_hold_time;
|
|
if (pkttime == 0)
|
|
rp->blue_pkttime = 1;
|
|
|
|
/* when the link is very slow, adjust blue parameters */
|
|
npkts_per_sec = 1000000 / rp->blue_pkttime;
|
|
if (npkts_per_sec < 50) {
|
|
}
|
|
else if (npkts_per_sec < 300) {
|
|
}
|
|
|
|
microtime(&rp->blue_last);
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* enqueue routine:
|
|
*
|
|
* returns: 0 when successfully queued.
|
|
* ENOBUFS when drop occurs.
|
|
*/
|
|
static int
|
|
blue_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
|
|
{
|
|
blue_queue_t *rqp = (blue_queue_t *)ifq->altq_disc;
|
|
int error = 0;
|
|
|
|
if (blue_addq(rqp->rq_blue, rqp->rq_q, m, pktattr) == 0)
|
|
ifq->ifq_len++;
|
|
else
|
|
error = ENOBUFS;
|
|
return error;
|
|
}
|
|
|
|
#define DTYPE_NODROP 0 /* no drop */
|
|
#define DTYPE_FORCED 1 /* a "forced" drop */
|
|
#define DTYPE_EARLY 2 /* an "unforced" (early) drop */
|
|
|
|
int
|
|
blue_addq(blue_t *rp, class_queue_t *q, struct mbuf *m,
|
|
struct altq_pktattr *pktattr)
|
|
{
|
|
int droptype;
|
|
|
|
/*
|
|
* if we were idle, this is an enqueue onto an empty queue
|
|
* and we should decrement marking probability
|
|
*
|
|
*/
|
|
if (rp->blue_idle) {
|
|
struct timeval now;
|
|
int t;
|
|
rp->blue_idle = 0;
|
|
microtime(&now);
|
|
t = (now.tv_sec - rp->blue_last.tv_sec);
|
|
if ( t > 1) {
|
|
rp->blue_pmark = 1;
|
|
microtime(&rp->blue_last);
|
|
} else {
|
|
t = t * 1000000 + (now.tv_usec - rp->blue_last.tv_usec);
|
|
if (t > rp->blue_hold_time) {
|
|
rp->blue_pmark--;
|
|
if (rp->blue_pmark < 0) rp->blue_pmark = 0;
|
|
microtime(&rp->blue_last);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* see if we drop early */
|
|
droptype = DTYPE_NODROP;
|
|
if (drop_early(rp) && qlen(q) > 1) {
|
|
/* mark or drop by blue */
|
|
if ((rp->blue_flags & BLUEF_ECN) &&
|
|
mark_ecn(m, pktattr, rp->blue_flags)) {
|
|
/* successfully marked. do not drop. */
|
|
#ifdef BLUE_STATS
|
|
rp->blue_stats.marked_packets++;
|
|
#endif
|
|
} else {
|
|
/* unforced drop by blue */
|
|
droptype = DTYPE_EARLY;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* if the queue length hits the hard limit, it's a forced drop.
|
|
*/
|
|
if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
|
|
droptype = DTYPE_FORCED;
|
|
|
|
/* if successful or forced drop, enqueue this packet. */
|
|
if (droptype != DTYPE_EARLY)
|
|
_addq(q, m);
|
|
|
|
if (droptype != DTYPE_NODROP) {
|
|
if (droptype == DTYPE_EARLY) {
|
|
/* drop the incoming packet */
|
|
#ifdef BLUE_STATS
|
|
rp->blue_stats.drop_unforced++;
|
|
#endif
|
|
} else {
|
|
struct timeval now;
|
|
int t;
|
|
/* forced drop, select a victim packet in the queue. */
|
|
m = _getq_random(q);
|
|
microtime(&now);
|
|
t = (now.tv_sec - rp->blue_last.tv_sec);
|
|
t = t * 1000000 + (now.tv_usec - rp->blue_last.tv_usec);
|
|
if (t > rp->blue_hold_time) {
|
|
rp->blue_pmark += rp->blue_max_pmark >> 3;
|
|
if (rp->blue_pmark > rp->blue_max_pmark)
|
|
rp->blue_pmark = rp->blue_max_pmark;
|
|
microtime(&rp->blue_last);
|
|
}
|
|
#ifdef BLUE_STATS
|
|
rp->blue_stats.drop_forced++;
|
|
#endif
|
|
}
|
|
#ifdef BLUE_STATS
|
|
rp->blue_stats.drop_packets++;
|
|
rp->blue_stats.drop_bytes += m->m_pkthdr.len;
|
|
#endif
|
|
m_freem(m);
|
|
return (-1);
|
|
}
|
|
/* successfully queued */
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* early-drop probability is kept in blue_pmark
|
|
*
|
|
*/
|
|
static int
|
|
drop_early(blue_t *rp)
|
|
{
|
|
if ((cprng_fast32() % rp->blue_max_pmark) < rp->blue_pmark) {
|
|
/* drop or mark */
|
|
return (1);
|
|
}
|
|
/* no drop/mark */
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* try to mark CE bit to the packet.
|
|
* returns 1 if successfully marked, 0 otherwise.
|
|
*/
|
|
static int
|
|
mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags)
|
|
{
|
|
struct mbuf *m0;
|
|
|
|
if (pktattr == NULL ||
|
|
(pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
|
|
return (0);
|
|
|
|
/* verify that pattr_hdr is within the mbuf data */
|
|
for (m0 = m; m0 != NULL; m0 = m0->m_next)
|
|
if (((char *)pktattr->pattr_hdr >= m0->m_data) &&
|
|
((char *)pktattr->pattr_hdr < m0->m_data + m0->m_len))
|
|
break;
|
|
if (m0 == NULL) {
|
|
/* ick, pattr_hdr is stale */
|
|
pktattr->pattr_af = AF_UNSPEC;
|
|
return (0);
|
|
}
|
|
|
|
switch (pktattr->pattr_af) {
|
|
case AF_INET:
|
|
if (flags & BLUEF_ECN4) {
|
|
struct ip *ip = (struct ip *)pktattr->pattr_hdr;
|
|
u_int8_t otos;
|
|
int sum;
|
|
|
|
if (ip->ip_v != 4)
|
|
return (0); /* version mismatch! */
|
|
if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
|
|
return (0); /* not-ECT */
|
|
if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
|
|
return (1); /* already marked */
|
|
|
|
/*
|
|
* ecn-capable but not marked,
|
|
* mark CE and update checksum
|
|
*/
|
|
otos = ip->ip_tos;
|
|
ip->ip_tos |= IPTOS_ECN_CE;
|
|
/*
|
|
* update checksum (from RFC1624)
|
|
* HC' = ~(~HC + ~m + m')
|
|
*/
|
|
sum = ~ntohs(ip->ip_sum) & 0xffff;
|
|
sum += (~otos & 0xffff) + ip->ip_tos;
|
|
sum = (sum >> 16) + (sum & 0xffff);
|
|
sum += (sum >> 16); /* add carry */
|
|
ip->ip_sum = htons(~sum & 0xffff);
|
|
return (1);
|
|
}
|
|
break;
|
|
#ifdef INET6
|
|
case AF_INET6:
|
|
if (flags & BLUEF_ECN6) {
|
|
struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
|
|
u_int32_t flowlabel;
|
|
|
|
flowlabel = ntohl(ip6->ip6_flow);
|
|
if ((flowlabel >> 28) != 6)
|
|
return (0); /* version mismatch! */
|
|
if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
|
|
(IPTOS_ECN_NOTECT << 20))
|
|
return (0); /* not-ECT */
|
|
if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
|
|
(IPTOS_ECN_CE << 20))
|
|
return (1); /* already marked */
|
|
/*
|
|
* ecn-capable but not marked, mark CE
|
|
*/
|
|
flowlabel |= (IPTOS_ECN_CE << 20);
|
|
ip6->ip6_flow = htonl(flowlabel);
|
|
return (1);
|
|
}
|
|
break;
|
|
#endif /* INET6 */
|
|
}
|
|
|
|
/* not marked */
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* dequeue routine:
|
|
* must be called in splnet.
|
|
*
|
|
* returns: mbuf dequeued.
|
|
* NULL when no packet is available in the queue.
|
|
*/
|
|
|
|
static struct mbuf *
|
|
blue_dequeue(struct ifaltq * ifq, int op)
|
|
{
|
|
blue_queue_t *rqp = (blue_queue_t *)ifq->altq_disc;
|
|
struct mbuf *m = NULL;
|
|
|
|
if (op == ALTDQ_POLL)
|
|
return (qhead(rqp->rq_q));
|
|
|
|
m = blue_getq(rqp->rq_blue, rqp->rq_q);
|
|
if (m != NULL)
|
|
ifq->ifq_len--;
|
|
return m;
|
|
}
|
|
|
|
struct mbuf *
|
|
blue_getq(blue_t *rp, class_queue_t *q)
|
|
{
|
|
struct mbuf *m;
|
|
|
|
if ((m = _getq(q)) == NULL) {
|
|
if (rp->blue_idle == 0) {
|
|
rp->blue_idle = 1;
|
|
microtime(&rp->blue_last);
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
rp->blue_idle = 0;
|
|
#ifdef BLUE_STATS
|
|
rp->blue_stats.xmit_packets++;
|
|
rp->blue_stats.xmit_bytes += m->m_pkthdr.len;
|
|
#endif
|
|
return (m);
|
|
}
|
|
|
|
static int
|
|
blue_request(struct ifaltq *ifq, int req, void *arg)
|
|
{
|
|
blue_queue_t *rqp = (blue_queue_t *)ifq->altq_disc;
|
|
|
|
switch (req) {
|
|
case ALTRQ_PURGE:
|
|
_flushq(rqp->rq_q);
|
|
if (ALTQ_IS_ENABLED(ifq))
|
|
ifq->ifq_len = 0;
|
|
break;
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
|
|
#ifdef KLD_MODULE
|
|
|
|
static struct altqsw blue_sw =
|
|
{"blue", blueopen, blueclose, blueioctl};
|
|
|
|
ALTQ_MODULE(altq_blue, ALTQT_BLUE, &blue_sw);
|
|
|
|
#endif /* KLD_MODULE */
|
|
|
|
#endif /* ALTQ3_COMPAT */
|
|
#endif /* ALTQ_BLUE */
|