NetBSD/sys/altq/altq_cdnr.c
tls 3afd44cf08 First step of random number subsystem rework described in
<20111022023242.BA26F14A158@mail.netbsd.org>.  This change includes
the following:

	An initial cleanup and minor reorganization of the entropy pool
	code in sys/dev/rnd.c and sys/dev/rndpool.c.  Several bugs are
	fixed.  Some effort is made to accumulate entropy more quickly at
	boot time.

	A generic interface, "rndsink", is added, for stream generators to
	request that they be re-keyed with good quality entropy from the pool
	as soon as it is available.

	The arc4random()/arc4randbytes() implementation in libkern is
	adjusted to use the rndsink interface for rekeying, which helps
	address the problem of low-quality keys at boot time.

	An implementation of the FIPS 140-2 statistical tests for random
	number generator quality is provided (libkern/rngtest.c).  This
	is based on Greg Rose's implementation from Qualcomm.

	A new random stream generator, nist_ctr_drbg, is provided.  It is
	based on an implementation of the NIST SP800-90 CTR_DRBG by
	Henric Jungheim.  This generator users AES in a modified counter
	mode to generate a backtracking-resistant random stream.

	An abstraction layer, "cprng", is provided for in-kernel consumers
	of randomness.  The arc4random/arc4randbytes API is deprecated for
	in-kernel use.  It is replaced by "cprng_strong".  The current
	cprng_fast implementation wraps the existing arc4random
	implementation.  The current cprng_strong implementation wraps the
	new CTR_DRBG implementation.  Both interfaces are rekeyed from
	the entropy pool automatically at intervals justifiable from best
	current cryptographic practice.

	In some quick tests, cprng_fast() is about the same speed as
	the old arc4randbytes(), and cprng_strong() is about 20% faster
	than rnd_extract_data().  Performance is expected to improve.

	The AES code in src/crypto/rijndael is no longer an optional
	kernel component, as it is required by cprng_strong, which is
	not an optional kernel component.

	The entropy pool output is subjected to the rngtest tests at
	startup time; if it fails, the system will reboot.  There is
	approximately a 3/10000 chance of a false positive from these
	tests.  Entropy pool _input_ from hardware random numbers is
	subjected to the rngtest tests at attach time, as well as the
	FIPS continuous-output test, to detect bad or stuck hardware
	RNGs; if any are detected, they are detached, but the system
	continues to run.

	A problem with rndctl(8) is fixed -- datastructures with
	pointers in arrays are no longer passed to userspace (this
	was not a security problem, but rather a major issue for
	compat32).  A new kernel will require a new rndctl.

	The sysctl kern.arandom() and kern.urandom() nodes are hooked
	up to the new generators, but the /dev/*random pseudodevices
	are not, yet.

	Manual pages for the new kernel interfaces are forthcoming.
2011-11-19 22:51:18 +00:00

1315 lines
30 KiB
C

/* $NetBSD: altq_cdnr.c,v 1.20 2011/11/19 22:51:18 tls Exp $ */
/* $KAME: altq_cdnr.c,v 1.15 2005/04/13 03:44:24 suz Exp $ */
/*
* Copyright (C) 1999-2002
* Sony Computer Science Laboratories Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: altq_cdnr.c,v 1.20 2011/11/19 22:51:18 tls Exp $");
#ifdef _KERNEL_OPT
#include "opt_altq.h"
#include "opt_inet.h"
#endif
#include <sys/param.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/errno.h>
#include <sys/kernel.h>
#include <sys/queue.h>
#include <sys/kauth.h>
#include <sys/cprng.h>
#include <net/if.h>
#include <net/if_types.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#ifdef INET6
#include <netinet/ip6.h>
#endif
#include <altq/altq.h>
#include <altq/altq_conf.h>
#include <altq/altq_cdnr.h>
#ifdef ALTQ3_COMPAT
/*
* diffserv traffic conditioning module
*/
int altq_cdnr_enabled = 0;
/* traffic conditioner is enabled by ALTQ_CDNR option in opt_altq.h */
#ifdef ALTQ_CDNR
/* cdnr_list keeps all cdnr's allocated. */
static LIST_HEAD(, top_cdnr) tcb_list;
static int altq_cdnr_input(struct mbuf *, int);
static struct top_cdnr *tcb_lookup(char *ifname);
static struct cdnr_block *cdnr_handle2cb(u_long);
static u_long cdnr_cb2handle(struct cdnr_block *);
static void *cdnr_cballoc(struct top_cdnr *, int,
struct tc_action *(*)(struct cdnr_block *, struct cdnr_pktinfo *));
static void cdnr_cbdestroy(void *);
static int tca_verify_action(struct tc_action *);
static void tca_import_action(struct tc_action *, struct tc_action *);
static void tca_invalidate_action(struct tc_action *);
static int generic_element_destroy(struct cdnr_block *);
static struct top_cdnr *top_create(struct ifaltq *);
static int top_destroy(struct top_cdnr *);
static struct cdnr_block *element_create(struct top_cdnr *, struct tc_action *);
static int element_destroy(struct cdnr_block *);
static void tb_import_profile(struct tbe *, struct tb_profile *);
static struct tbmeter *tbm_create(struct top_cdnr *, struct tb_profile *,
struct tc_action *, struct tc_action *);
static int tbm_destroy(struct tbmeter *);
static struct tc_action *tbm_input(struct cdnr_block *, struct cdnr_pktinfo *);
static struct trtcm *trtcm_create(struct top_cdnr *,
struct tb_profile *, struct tb_profile *,
struct tc_action *, struct tc_action *, struct tc_action *,
int);
static int trtcm_destroy(struct trtcm *);
static struct tc_action *trtcm_input(struct cdnr_block *, struct cdnr_pktinfo *);
static struct tswtcm *tswtcm_create(struct top_cdnr *,
u_int32_t, u_int32_t, u_int32_t,
struct tc_action *, struct tc_action *, struct tc_action *);
static int tswtcm_destroy(struct tswtcm *);
static struct tc_action *tswtcm_input(struct cdnr_block *, struct cdnr_pktinfo *);
static int cdnrcmd_if_attach(char *);
static int cdnrcmd_if_detach(char *);
static int cdnrcmd_add_element(struct cdnr_add_element *);
static int cdnrcmd_delete_element(struct cdnr_delete_element *);
static int cdnrcmd_add_filter(struct cdnr_add_filter *);
static int cdnrcmd_delete_filter(struct cdnr_delete_filter *);
static int cdnrcmd_add_tbm(struct cdnr_add_tbmeter *);
static int cdnrcmd_modify_tbm(struct cdnr_modify_tbmeter *);
static int cdnrcmd_tbm_stats(struct cdnr_tbmeter_stats *);
static int cdnrcmd_add_trtcm(struct cdnr_add_trtcm *);
static int cdnrcmd_modify_trtcm(struct cdnr_modify_trtcm *);
static int cdnrcmd_tcm_stats(struct cdnr_tcm_stats *);
static int cdnrcmd_add_tswtcm(struct cdnr_add_tswtcm *);
static int cdnrcmd_modify_tswtcm(struct cdnr_modify_tswtcm *);
static int cdnrcmd_get_stats(struct cdnr_get_stats *);
altqdev_decl(cdnr);
/*
* top level input function called from ip_input.
* should be called before converting header fields to host-byte-order.
*/
int
altq_cdnr_input(struct mbuf *m, int af)
{
struct ifnet *ifp;
struct ip *ip;
struct top_cdnr *top;
struct tc_action *tca;
struct cdnr_block *cb;
struct cdnr_pktinfo pktinfo;
ifp = m->m_pkthdr.rcvif;
if (!ALTQ_IS_CNDTNING(&ifp->if_snd))
/* traffic conditioner is not enabled on this interface */
return (1);
top = ifp->if_snd.altq_cdnr;
ip = mtod(m, struct ip *);
#ifdef INET6
if (af == AF_INET6) {
u_int32_t flowlabel;
flowlabel = ((struct ip6_hdr *)ip)->ip6_flow;
pktinfo.pkt_dscp = (ntohl(flowlabel) >> 20) & DSCP_MASK;
} else
#endif
pktinfo.pkt_dscp = ip->ip_tos & DSCP_MASK;
pktinfo.pkt_len = m_pktlen(m);
tca = NULL;
cb = acc_classify(&top->tc_classifier, m, af);
if (cb != NULL)
tca = &cb->cb_action;
if (tca == NULL)
tca = &top->tc_block.cb_action;
while (1) {
PKTCNTR_ADD(&top->tc_cnts[tca->tca_code], pktinfo.pkt_len);
switch (tca->tca_code) {
case TCACODE_PASS:
return (1);
case TCACODE_DROP:
m_freem(m);
return (0);
case TCACODE_RETURN:
return (0);
case TCACODE_MARK:
#ifdef INET6
if (af == AF_INET6) {
struct ip6_hdr *ip6 = (struct ip6_hdr *)ip;
u_int32_t flowlabel;
flowlabel = ntohl(ip6->ip6_flow);
flowlabel = (tca->tca_dscp << 20) |
(flowlabel & ~(DSCP_MASK << 20));
ip6->ip6_flow = htonl(flowlabel);
} else
#endif
ip->ip_tos = tca->tca_dscp |
(ip->ip_tos & DSCP_CUMASK);
return (1);
case TCACODE_NEXT:
cb = tca->tca_next;
tca = (*cb->cb_input)(cb, &pktinfo);
break;
case TCACODE_NONE:
default:
return (1);
}
}
}
static struct top_cdnr *
tcb_lookup(char *ifname)
{
struct top_cdnr *top;
struct ifnet *ifp;
if ((ifp = ifunit(ifname)) != NULL)
LIST_FOREACH(top, &tcb_list, tc_next)
if (top->tc_ifq->altq_ifp == ifp)
return (top);
return (NULL);
}
static struct cdnr_block *
cdnr_handle2cb(u_long handle)
{
struct cdnr_block *cb;
cb = (struct cdnr_block *)handle;
if (handle != ALIGN(cb))
return (NULL);
if (cb == NULL || cb->cb_handle != handle)
return (NULL);
return (cb);
}
static u_long
cdnr_cb2handle(struct cdnr_block *cb)
{
return (cb->cb_handle);
}
static void *
cdnr_cballoc(struct top_cdnr *top, int type, struct tc_action *(*input_func)(
struct cdnr_block *, struct cdnr_pktinfo *))
{
struct cdnr_block *cb;
int size;
switch (type) {
case TCETYPE_TOP:
size = sizeof(struct top_cdnr);
break;
case TCETYPE_ELEMENT:
size = sizeof(struct cdnr_block);
break;
case TCETYPE_TBMETER:
size = sizeof(struct tbmeter);
break;
case TCETYPE_TRTCM:
size = sizeof(struct trtcm);
break;
case TCETYPE_TSWTCM:
size = sizeof(struct tswtcm);
break;
default:
return (NULL);
}
cb = malloc(size, M_DEVBUF, M_WAITOK|M_ZERO);
if (cb == NULL)
return (NULL);
cb->cb_len = size;
cb->cb_type = type;
cb->cb_ref = 0;
cb->cb_handle = (u_long)cb;
if (top == NULL)
cb->cb_top = (struct top_cdnr *)cb;
else
cb->cb_top = top;
if (input_func != NULL) {
/*
* if this cdnr has an action function,
* make tc_action to call itself.
*/
cb->cb_action.tca_code = TCACODE_NEXT;
cb->cb_action.tca_next = cb;
cb->cb_input = input_func;
} else
cb->cb_action.tca_code = TCACODE_NONE;
/* if this isn't top, register the element to the top level cdnr */
if (top != NULL)
LIST_INSERT_HEAD(&top->tc_elements, cb, cb_next);
return ((void *)cb);
}
static void
cdnr_cbdestroy(void *cblock)
{
struct cdnr_block *cb = cblock;
/* delete filters belonging to this cdnr */
acc_discard_filters(&cb->cb_top->tc_classifier, cb, 0);
/* remove from the top level cdnr */
if (cb->cb_top != cblock)
LIST_REMOVE(cb, cb_next);
free(cb, M_DEVBUF);
}
/*
* conditioner common destroy routine
*/
static int
generic_element_destroy(struct cdnr_block *cb)
{
int error = 0;
switch (cb->cb_type) {
case TCETYPE_TOP:
error = top_destroy((struct top_cdnr *)cb);
break;
case TCETYPE_ELEMENT:
error = element_destroy(cb);
break;
case TCETYPE_TBMETER:
error = tbm_destroy((struct tbmeter *)cb);
break;
case TCETYPE_TRTCM:
error = trtcm_destroy((struct trtcm *)cb);
break;
case TCETYPE_TSWTCM:
error = tswtcm_destroy((struct tswtcm *)cb);
break;
default:
error = EINVAL;
}
return (error);
}
static int
tca_verify_action(struct tc_action *utca)
{
switch (utca->tca_code) {
case TCACODE_PASS:
case TCACODE_DROP:
case TCACODE_MARK:
/* these are ok */
break;
case TCACODE_HANDLE:
/* verify handle value */
if (cdnr_handle2cb(utca->tca_handle) == NULL)
return (-1);
break;
case TCACODE_NONE:
case TCACODE_RETURN:
case TCACODE_NEXT:
default:
/* should not be passed from a user */
return (-1);
}
return (0);
}
static void
tca_import_action(struct tc_action *ktca, struct tc_action *utca)
{
struct cdnr_block *cb;
*ktca = *utca;
if (ktca->tca_code == TCACODE_HANDLE) {
cb = cdnr_handle2cb(ktca->tca_handle);
if (cb == NULL) {
ktca->tca_code = TCACODE_NONE;
return;
}
ktca->tca_code = TCACODE_NEXT;
ktca->tca_next = cb;
cb->cb_ref++;
} else if (ktca->tca_code == TCACODE_MARK) {
ktca->tca_dscp &= DSCP_MASK;
}
return;
}
static void
tca_invalidate_action(struct tc_action *tca)
{
struct cdnr_block *cb;
if (tca->tca_code == TCACODE_NEXT) {
cb = tca->tca_next;
if (cb == NULL)
return;
cb->cb_ref--;
}
tca->tca_code = TCACODE_NONE;
}
/*
* top level traffic conditioner
*/
static struct top_cdnr *
top_create(struct ifaltq *ifq)
{
struct top_cdnr *top;
if ((top = cdnr_cballoc(NULL, TCETYPE_TOP, NULL)) == NULL)
return (NULL);
top->tc_ifq = ifq;
/* set default action for the top level conditioner */
top->tc_block.cb_action.tca_code = TCACODE_PASS;
LIST_INSERT_HEAD(&tcb_list, top, tc_next);
ifq->altq_cdnr = top;
return (top);
}
static int
top_destroy(struct top_cdnr *top)
{
struct cdnr_block *cb;
if (ALTQ_IS_CNDTNING(top->tc_ifq))
ALTQ_CLEAR_CNDTNING(top->tc_ifq);
top->tc_ifq->altq_cdnr = NULL;
/*
* destroy all the conditioner elements belonging to this interface
*/
while ((cb = LIST_FIRST(&top->tc_elements)) != NULL) {
while (cb != NULL && cb->cb_ref > 0)
cb = LIST_NEXT(cb, cb_next);
if (cb != NULL)
generic_element_destroy(cb);
}
LIST_REMOVE(top, tc_next);
cdnr_cbdestroy(top);
/* if there is no active conditioner, remove the input hook */
if (altq_input != NULL) {
LIST_FOREACH(top, &tcb_list, tc_next)
if (ALTQ_IS_CNDTNING(top->tc_ifq))
break;
if (top == NULL)
altq_input = NULL;
}
return (0);
}
/*
* simple tc elements without input function (e.g., dropper and makers).
*/
static struct cdnr_block *
element_create(struct top_cdnr *top, struct tc_action *action)
{
struct cdnr_block *cb;
if (tca_verify_action(action) < 0)
return (NULL);
if ((cb = cdnr_cballoc(top, TCETYPE_ELEMENT, NULL)) == NULL)
return (NULL);
tca_import_action(&cb->cb_action, action);
return (cb);
}
static int
element_destroy(struct cdnr_block *cb)
{
if (cb->cb_ref > 0)
return (EBUSY);
tca_invalidate_action(&cb->cb_action);
cdnr_cbdestroy(cb);
return (0);
}
/*
* internal representation of token bucket parameters
* rate: byte_per_unittime << 32
* (((bits_per_sec) / 8) << 32) / machclk_freq
* depth: byte << 32
*
*/
#define TB_SHIFT 32
#define TB_SCALE(x) ((u_int64_t)(x) << TB_SHIFT)
#define TB_UNSCALE(x) ((x) >> TB_SHIFT)
static void
tb_import_profile(struct tbe *tb, struct tb_profile *profile)
{
tb->rate = TB_SCALE(profile->rate / 8) / machclk_freq;
tb->depth = TB_SCALE(profile->depth);
if (tb->rate > 0)
tb->filluptime = tb->depth / tb->rate;
else
tb->filluptime = 0xffffffffffffffffLL;
tb->token = tb->depth;
tb->last = read_machclk();
}
/*
* simple token bucket meter
*/
static struct tbmeter *
tbm_create(struct top_cdnr *top, struct tb_profile *profile,
struct tc_action *in_action, struct tc_action *out_action)
{
struct tbmeter *tbm = NULL;
if (tca_verify_action(in_action) < 0
|| tca_verify_action(out_action) < 0)
return (NULL);
if ((tbm = cdnr_cballoc(top, TCETYPE_TBMETER,
tbm_input)) == NULL)
return (NULL);
tb_import_profile(&tbm->tb, profile);
tca_import_action(&tbm->in_action, in_action);
tca_import_action(&tbm->out_action, out_action);
return (tbm);
}
static int
tbm_destroy(struct tbmeter *tbm)
{
if (tbm->cdnrblk.cb_ref > 0)
return (EBUSY);
tca_invalidate_action(&tbm->in_action);
tca_invalidate_action(&tbm->out_action);
cdnr_cbdestroy(tbm);
return (0);
}
static struct tc_action *
tbm_input(struct cdnr_block *cb, struct cdnr_pktinfo *pktinfo)
{
struct tbmeter *tbm = (struct tbmeter *)cb;
u_int64_t len;
u_int64_t interval, now;
len = TB_SCALE(pktinfo->pkt_len);
if (tbm->tb.token < len) {
now = read_machclk();
interval = now - tbm->tb.last;
if (interval >= tbm->tb.filluptime)
tbm->tb.token = tbm->tb.depth;
else {
tbm->tb.token += interval * tbm->tb.rate;
if (tbm->tb.token > tbm->tb.depth)
tbm->tb.token = tbm->tb.depth;
}
tbm->tb.last = now;
}
if (tbm->tb.token < len) {
PKTCNTR_ADD(&tbm->out_cnt, pktinfo->pkt_len);
return (&tbm->out_action);
}
tbm->tb.token -= len;
PKTCNTR_ADD(&tbm->in_cnt, pktinfo->pkt_len);
return (&tbm->in_action);
}
/*
* two rate three color marker
* as described in draft-heinanen-diffserv-trtcm-01.txt
*/
static struct trtcm *
trtcm_create(struct top_cdnr *top, struct tb_profile *cmtd_profile,
struct tb_profile *peak_profile, struct tc_action *green_action,
struct tc_action *yellow_action, struct tc_action *red_action,
int coloraware)
{
struct trtcm *tcm = NULL;
if (tca_verify_action(green_action) < 0
|| tca_verify_action(yellow_action) < 0
|| tca_verify_action(red_action) < 0)
return (NULL);
if ((tcm = cdnr_cballoc(top, TCETYPE_TRTCM,
trtcm_input)) == NULL)
return (NULL);
tb_import_profile(&tcm->cmtd_tb, cmtd_profile);
tb_import_profile(&tcm->peak_tb, peak_profile);
tca_import_action(&tcm->green_action, green_action);
tca_import_action(&tcm->yellow_action, yellow_action);
tca_import_action(&tcm->red_action, red_action);
/* set dscps to use */
if (tcm->green_action.tca_code == TCACODE_MARK)
tcm->green_dscp = tcm->green_action.tca_dscp & DSCP_MASK;
else
tcm->green_dscp = DSCP_AF11;
if (tcm->yellow_action.tca_code == TCACODE_MARK)
tcm->yellow_dscp = tcm->yellow_action.tca_dscp & DSCP_MASK;
else
tcm->yellow_dscp = DSCP_AF12;
if (tcm->red_action.tca_code == TCACODE_MARK)
tcm->red_dscp = tcm->red_action.tca_dscp & DSCP_MASK;
else
tcm->red_dscp = DSCP_AF13;
tcm->coloraware = coloraware;
return (tcm);
}
static int
trtcm_destroy(struct trtcm *tcm)
{
if (tcm->cdnrblk.cb_ref > 0)
return (EBUSY);
tca_invalidate_action(&tcm->green_action);
tca_invalidate_action(&tcm->yellow_action);
tca_invalidate_action(&tcm->red_action);
cdnr_cbdestroy(tcm);
return (0);
}
static struct tc_action *
trtcm_input(struct cdnr_block *cb, struct cdnr_pktinfo *pktinfo)
{
struct trtcm *tcm = (struct trtcm *)cb;
u_int64_t len;
u_int64_t interval, now;
u_int8_t color;
len = TB_SCALE(pktinfo->pkt_len);
if (tcm->coloraware) {
color = pktinfo->pkt_dscp;
if (color != tcm->yellow_dscp && color != tcm->red_dscp)
color = tcm->green_dscp;
} else {
/* if color-blind, precolor it as green */
color = tcm->green_dscp;
}
now = read_machclk();
if (tcm->cmtd_tb.token < len) {
interval = now - tcm->cmtd_tb.last;
if (interval >= tcm->cmtd_tb.filluptime)
tcm->cmtd_tb.token = tcm->cmtd_tb.depth;
else {
tcm->cmtd_tb.token += interval * tcm->cmtd_tb.rate;
if (tcm->cmtd_tb.token > tcm->cmtd_tb.depth)
tcm->cmtd_tb.token = tcm->cmtd_tb.depth;
}
tcm->cmtd_tb.last = now;
}
if (tcm->peak_tb.token < len) {
interval = now - tcm->peak_tb.last;
if (interval >= tcm->peak_tb.filluptime)
tcm->peak_tb.token = tcm->peak_tb.depth;
else {
tcm->peak_tb.token += interval * tcm->peak_tb.rate;
if (tcm->peak_tb.token > tcm->peak_tb.depth)
tcm->peak_tb.token = tcm->peak_tb.depth;
}
tcm->peak_tb.last = now;
}
if (color == tcm->red_dscp || tcm->peak_tb.token < len) {
pktinfo->pkt_dscp = tcm->red_dscp;
PKTCNTR_ADD(&tcm->red_cnt, pktinfo->pkt_len);
return (&tcm->red_action);
}
if (color == tcm->yellow_dscp || tcm->cmtd_tb.token < len) {
pktinfo->pkt_dscp = tcm->yellow_dscp;
tcm->peak_tb.token -= len;
PKTCNTR_ADD(&tcm->yellow_cnt, pktinfo->pkt_len);
return (&tcm->yellow_action);
}
pktinfo->pkt_dscp = tcm->green_dscp;
tcm->cmtd_tb.token -= len;
tcm->peak_tb.token -= len;
PKTCNTR_ADD(&tcm->green_cnt, pktinfo->pkt_len);
return (&tcm->green_action);
}
/*
* time sliding window three color marker
* as described in draft-fang-diffserv-tc-tswtcm-00.txt
*/
static struct tswtcm *
tswtcm_create(struct top_cdnr *top, u_int32_t cmtd_rate, u_int32_t peak_rate,
u_int32_t avg_interval, struct tc_action *green_action,
struct tc_action *yellow_action, struct tc_action *red_action)
{
struct tswtcm *tsw;
if (tca_verify_action(green_action) < 0
|| tca_verify_action(yellow_action) < 0
|| tca_verify_action(red_action) < 0)
return (NULL);
if ((tsw = cdnr_cballoc(top, TCETYPE_TSWTCM,
tswtcm_input)) == NULL)
return (NULL);
tca_import_action(&tsw->green_action, green_action);
tca_import_action(&tsw->yellow_action, yellow_action);
tca_import_action(&tsw->red_action, red_action);
/* set dscps to use */
if (tsw->green_action.tca_code == TCACODE_MARK)
tsw->green_dscp = tsw->green_action.tca_dscp & DSCP_MASK;
else
tsw->green_dscp = DSCP_AF11;
if (tsw->yellow_action.tca_code == TCACODE_MARK)
tsw->yellow_dscp = tsw->yellow_action.tca_dscp & DSCP_MASK;
else
tsw->yellow_dscp = DSCP_AF12;
if (tsw->red_action.tca_code == TCACODE_MARK)
tsw->red_dscp = tsw->red_action.tca_dscp & DSCP_MASK;
else
tsw->red_dscp = DSCP_AF13;
/* convert rates from bits/sec to bytes/sec */
tsw->cmtd_rate = cmtd_rate / 8;
tsw->peak_rate = peak_rate / 8;
tsw->avg_rate = 0;
/* timewin is converted from msec to machine clock unit */
tsw->timewin = (u_int64_t)machclk_freq * avg_interval / 1000;
return (tsw);
}
static int
tswtcm_destroy(struct tswtcm *tsw)
{
if (tsw->cdnrblk.cb_ref > 0)
return (EBUSY);
tca_invalidate_action(&tsw->green_action);
tca_invalidate_action(&tsw->yellow_action);
tca_invalidate_action(&tsw->red_action);
cdnr_cbdestroy(tsw);
return (0);
}
static struct tc_action *
tswtcm_input(struct cdnr_block *cb, struct cdnr_pktinfo *pktinfo)
{
struct tswtcm *tsw = (struct tswtcm *)cb;
int len;
u_int32_t avg_rate;
u_int64_t interval, now, tmp;
/*
* rate estimator
*/
len = pktinfo->pkt_len;
now = read_machclk();
interval = now - tsw->t_front;
/*
* calculate average rate:
* avg = (avg * timewin + pkt_len)/(timewin + interval)
* pkt_len needs to be multiplied by machclk_freq in order to
* get (bytes/sec).
* note: when avg_rate (bytes/sec) and timewin (machclk unit) are
* less than 32 bits, the following 64-bit operation has enough
* precision.
*/
tmp = ((u_int64_t)tsw->avg_rate * tsw->timewin
+ (u_int64_t)len * machclk_freq) / (tsw->timewin + interval);
tsw->avg_rate = avg_rate = (u_int32_t)tmp;
tsw->t_front = now;
/*
* marker
*/
if (avg_rate > tsw->cmtd_rate) {
u_int32_t randval = cprng_fast32() % avg_rate;
if (avg_rate > tsw->peak_rate) {
if (randval < avg_rate - tsw->peak_rate) {
/* mark red */
pktinfo->pkt_dscp = tsw->red_dscp;
PKTCNTR_ADD(&tsw->red_cnt, len);
return (&tsw->red_action);
} else if (randval < avg_rate - tsw->cmtd_rate)
goto mark_yellow;
} else {
/* peak_rate >= avg_rate > cmtd_rate */
if (randval < avg_rate - tsw->cmtd_rate) {
mark_yellow:
pktinfo->pkt_dscp = tsw->yellow_dscp;
PKTCNTR_ADD(&tsw->yellow_cnt, len);
return (&tsw->yellow_action);
}
}
}
/* mark green */
pktinfo->pkt_dscp = tsw->green_dscp;
PKTCNTR_ADD(&tsw->green_cnt, len);
return (&tsw->green_action);
}
/*
* ioctl requests
*/
static int
cdnrcmd_if_attach(char *ifname)
{
struct ifnet *ifp;
struct top_cdnr *top;
if ((ifp = ifunit(ifname)) == NULL)
return (EBADF);
if (ifp->if_snd.altq_cdnr != NULL)
return (EBUSY);
if ((top = top_create(&ifp->if_snd)) == NULL)
return (ENOMEM);
return (0);
}
static int
cdnrcmd_if_detach(char *ifname)
{
struct top_cdnr *top;
if ((top = tcb_lookup(ifname)) == NULL)
return (EBADF);
return top_destroy(top);
}
static int
cdnrcmd_add_element(struct cdnr_add_element *ap)
{
struct top_cdnr *top;
struct cdnr_block *cb;
if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
return (EBADF);
cb = element_create(top, &ap->action);
if (cb == NULL)
return (EINVAL);
/* return a class handle to the user */
ap->cdnr_handle = cdnr_cb2handle(cb);
return (0);
}
static int
cdnrcmd_delete_element(struct cdnr_delete_element *ap)
{
struct top_cdnr *top;
struct cdnr_block *cb;
if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
return (EBADF);
if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
return (EINVAL);
if (cb->cb_type != TCETYPE_ELEMENT)
return generic_element_destroy(cb);
return element_destroy(cb);
}
static int
cdnrcmd_add_filter(struct cdnr_add_filter *ap)
{
struct top_cdnr *top;
struct cdnr_block *cb;
if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
return (EBADF);
if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
return (EINVAL);
return acc_add_filter(&top->tc_classifier, &ap->filter,
cb, &ap->filter_handle);
}
static int
cdnrcmd_delete_filter(struct cdnr_delete_filter *ap)
{
struct top_cdnr *top;
if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
return (EBADF);
return acc_delete_filter(&top->tc_classifier, ap->filter_handle);
}
static int
cdnrcmd_add_tbm(struct cdnr_add_tbmeter *ap)
{
struct top_cdnr *top;
struct tbmeter *tbm;
if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
return (EBADF);
tbm = tbm_create(top, &ap->profile, &ap->in_action, &ap->out_action);
if (tbm == NULL)
return (EINVAL);
/* return a class handle to the user */
ap->cdnr_handle = cdnr_cb2handle(&tbm->cdnrblk);
return (0);
}
static int
cdnrcmd_modify_tbm(struct cdnr_modify_tbmeter *ap)
{
struct tbmeter *tbm;
if ((tbm = (struct tbmeter *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
return (EINVAL);
tb_import_profile(&tbm->tb, &ap->profile);
return (0);
}
static int
cdnrcmd_tbm_stats(struct cdnr_tbmeter_stats *ap)
{
struct tbmeter *tbm;
if ((tbm = (struct tbmeter *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
return (EINVAL);
ap->in_cnt = tbm->in_cnt;
ap->out_cnt = tbm->out_cnt;
return (0);
}
static int
cdnrcmd_add_trtcm(struct cdnr_add_trtcm *ap)
{
struct top_cdnr *top;
struct trtcm *tcm;
if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
return (EBADF);
tcm = trtcm_create(top, &ap->cmtd_profile, &ap->peak_profile,
&ap->green_action, &ap->yellow_action,
&ap->red_action, ap->coloraware);
if (tcm == NULL)
return (EINVAL);
/* return a class handle to the user */
ap->cdnr_handle = cdnr_cb2handle(&tcm->cdnrblk);
return (0);
}
static int
cdnrcmd_modify_trtcm(struct cdnr_modify_trtcm *ap)
{
struct trtcm *tcm;
if ((tcm = (struct trtcm *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
return (EINVAL);
tb_import_profile(&tcm->cmtd_tb, &ap->cmtd_profile);
tb_import_profile(&tcm->peak_tb, &ap->peak_profile);
return (0);
}
static int
cdnrcmd_tcm_stats(struct cdnr_tcm_stats *ap)
{
struct cdnr_block *cb;
if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
return (EINVAL);
if (cb->cb_type == TCETYPE_TRTCM) {
struct trtcm *tcm = (struct trtcm *)cb;
ap->green_cnt = tcm->green_cnt;
ap->yellow_cnt = tcm->yellow_cnt;
ap->red_cnt = tcm->red_cnt;
} else if (cb->cb_type == TCETYPE_TSWTCM) {
struct tswtcm *tsw = (struct tswtcm *)cb;
ap->green_cnt = tsw->green_cnt;
ap->yellow_cnt = tsw->yellow_cnt;
ap->red_cnt = tsw->red_cnt;
} else
return (EINVAL);
return (0);
}
static int
cdnrcmd_add_tswtcm(struct cdnr_add_tswtcm *ap)
{
struct top_cdnr *top;
struct tswtcm *tsw;
if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
return (EBADF);
if (ap->cmtd_rate > ap->peak_rate)
return (EINVAL);
tsw = tswtcm_create(top, ap->cmtd_rate, ap->peak_rate,
ap->avg_interval, &ap->green_action,
&ap->yellow_action, &ap->red_action);
if (tsw == NULL)
return (EINVAL);
/* return a class handle to the user */
ap->cdnr_handle = cdnr_cb2handle(&tsw->cdnrblk);
return (0);
}
static int
cdnrcmd_modify_tswtcm(struct cdnr_modify_tswtcm *ap)
{
struct tswtcm *tsw;
if ((tsw = (struct tswtcm *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
return (EINVAL);
if (ap->cmtd_rate > ap->peak_rate)
return (EINVAL);
/* convert rates from bits/sec to bytes/sec */
tsw->cmtd_rate = ap->cmtd_rate / 8;
tsw->peak_rate = ap->peak_rate / 8;
tsw->avg_rate = 0;
/* timewin is converted from msec to machine clock unit */
tsw->timewin = (u_int64_t)machclk_freq * ap->avg_interval / 1000;
return (0);
}
static int
cdnrcmd_get_stats(struct cdnr_get_stats *ap)
{
struct top_cdnr *top;
struct cdnr_block *cb;
struct tbmeter *tbm;
struct trtcm *tcm;
struct tswtcm *tsw;
struct tce_stats tce, *usp;
int error, n, nskip, nelements;
if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
return (EBADF);
/* copy action stats */
(void)memcpy(ap->cnts, top->tc_cnts, sizeof(ap->cnts));
/* stats for each element */
nelements = ap->nelements;
usp = ap->tce_stats;
if (nelements <= 0 || usp == NULL)
return (0);
nskip = ap->nskip;
n = 0;
LIST_FOREACH(cb, &top->tc_elements, cb_next) {
if (nskip > 0) {
nskip--;
continue;
}
(void)memset(&tce, 0, sizeof(tce));
tce.tce_handle = cb->cb_handle;
tce.tce_type = cb->cb_type;
switch (cb->cb_type) {
case TCETYPE_TBMETER:
tbm = (struct tbmeter *)cb;
tce.tce_cnts[0] = tbm->in_cnt;
tce.tce_cnts[1] = tbm->out_cnt;
break;
case TCETYPE_TRTCM:
tcm = (struct trtcm *)cb;
tce.tce_cnts[0] = tcm->green_cnt;
tce.tce_cnts[1] = tcm->yellow_cnt;
tce.tce_cnts[2] = tcm->red_cnt;
break;
case TCETYPE_TSWTCM:
tsw = (struct tswtcm *)cb;
tce.tce_cnts[0] = tsw->green_cnt;
tce.tce_cnts[1] = tsw->yellow_cnt;
tce.tce_cnts[2] = tsw->red_cnt;
break;
default:
continue;
}
if ((error = copyout((void *)&tce, (void *)usp++,
sizeof(tce))) != 0)
return (error);
if (++n == nelements)
break;
}
ap->nelements = n;
return (0);
}
/*
* conditioner device interface
*/
int
cdnropen(dev_t dev, int flag, int fmt,
struct lwp *l)
{
if (machclk_freq == 0)
init_machclk();
if (machclk_freq == 0) {
printf("cdnr: no CPU clock available!\n");
return (ENXIO);
}
/* everything will be done when the queueing scheme is attached. */
return 0;
}
int
cdnrclose(dev_t dev, int flag, int fmt,
struct lwp *l)
{
struct top_cdnr *top;
int err, error = 0;
while ((top = LIST_FIRST(&tcb_list)) != NULL) {
/* destroy all */
err = top_destroy(top);
if (err != 0 && error == 0)
error = err;
}
altq_input = NULL;
return (error);
}
int
cdnrioctl(dev_t dev, ioctlcmd_t cmd, void *addr, int flag,
struct lwp *l)
{
struct top_cdnr *top;
struct cdnr_interface *ifacep;
int s, error = 0;
/* check super-user privilege */
switch (cmd) {
case CDNR_GETSTATS:
break;
default:
#if (__FreeBSD_version > 400000)
if ((error = suser(p)) != 0)
#else
if ((error = kauth_authorize_network(l->l_cred,
KAUTH_NETWORK_ALTQ, KAUTH_REQ_NETWORK_ALTQ_CDNR, NULL,
NULL, NULL)) != 0)
#endif
return (error);
break;
}
s = splnet();
switch (cmd) {
case CDNR_IF_ATTACH:
ifacep = (struct cdnr_interface *)addr;
error = cdnrcmd_if_attach(ifacep->cdnr_ifname);
break;
case CDNR_IF_DETACH:
ifacep = (struct cdnr_interface *)addr;
error = cdnrcmd_if_detach(ifacep->cdnr_ifname);
break;
case CDNR_ENABLE:
case CDNR_DISABLE:
ifacep = (struct cdnr_interface *)addr;
if ((top = tcb_lookup(ifacep->cdnr_ifname)) == NULL) {
error = EBADF;
break;
}
switch (cmd) {
case CDNR_ENABLE:
ALTQ_SET_CNDTNING(top->tc_ifq);
if (altq_input == NULL)
altq_input = altq_cdnr_input;
break;
case CDNR_DISABLE:
ALTQ_CLEAR_CNDTNING(top->tc_ifq);
LIST_FOREACH(top, &tcb_list, tc_next)
if (ALTQ_IS_CNDTNING(top->tc_ifq))
break;
if (top == NULL)
altq_input = NULL;
break;
}
break;
case CDNR_ADD_ELEM:
error = cdnrcmd_add_element((struct cdnr_add_element *)addr);
break;
case CDNR_DEL_ELEM:
error = cdnrcmd_delete_element((struct cdnr_delete_element *)addr);
break;
case CDNR_ADD_TBM:
error = cdnrcmd_add_tbm((struct cdnr_add_tbmeter *)addr);
break;
case CDNR_MOD_TBM:
error = cdnrcmd_modify_tbm((struct cdnr_modify_tbmeter *)addr);
break;
case CDNR_TBM_STATS:
error = cdnrcmd_tbm_stats((struct cdnr_tbmeter_stats *)addr);
break;
case CDNR_ADD_TCM:
error = cdnrcmd_add_trtcm((struct cdnr_add_trtcm *)addr);
break;
case CDNR_MOD_TCM:
error = cdnrcmd_modify_trtcm((struct cdnr_modify_trtcm *)addr);
break;
case CDNR_TCM_STATS:
error = cdnrcmd_tcm_stats((struct cdnr_tcm_stats *)addr);
break;
case CDNR_ADD_FILTER:
error = cdnrcmd_add_filter((struct cdnr_add_filter *)addr);
break;
case CDNR_DEL_FILTER:
error = cdnrcmd_delete_filter((struct cdnr_delete_filter *)addr);
break;
case CDNR_GETSTATS:
error = cdnrcmd_get_stats((struct cdnr_get_stats *)addr);
break;
case CDNR_ADD_TSW:
error = cdnrcmd_add_tswtcm((struct cdnr_add_tswtcm *)addr);
break;
case CDNR_MOD_TSW:
error = cdnrcmd_modify_tswtcm((struct cdnr_modify_tswtcm *)addr);
break;
default:
error = EINVAL;
break;
}
splx(s);
return error;
}
#ifdef KLD_MODULE
static struct altqsw cdnr_sw =
{"cdnr", cdnropen, cdnrclose, cdnrioctl};
ALTQ_MODULE(altq_cdnr, ALTQT_CDNR, &cdnr_sw);
#endif /* KLD_MODULE */
#endif /* ALTQ3_COMPAT */
#endif /* ALTQ_CDNR */