NetBSD/sys/net/npf/npf_tableset.c
christos f75d79eb69 Sync NPF with the version on github: backport standalone NPF changes,
which allow us to create and run separate NPF instances. Minor fixes.
(from rmind@)
2016-12-26 23:05:05 +00:00

753 lines
16 KiB
C

/* $NetBSD: npf_tableset.c,v 1.25 2016/12/26 23:05:06 christos Exp $ */
/*-
* Copyright (c) 2009-2016 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This material is based upon work partially supported by The
* NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* NPF tableset module.
*
* Notes
*
* The tableset is an array of tables. After the creation, the array
* is immutable. The caller is responsible to synchronise the access
* to the tableset. The table can either be a hash or a tree. Its
* entries are protected by a read-write lock.
*/
#ifdef _KERNEL
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.25 2016/12/26 23:05:06 christos Exp $");
#include <sys/param.h>
#include <sys/types.h>
#include <sys/atomic.h>
#include <sys/hash.h>
#include <sys/cdbr.h>
#include <sys/kmem.h>
#include <sys/malloc.h>
#include <sys/pool.h>
#include <sys/queue.h>
#include <sys/rwlock.h>
#include <sys/systm.h>
#include <sys/types.h>
#include "lpm.h"
#endif
#include "npf_impl.h"
typedef struct npf_tblent {
LIST_ENTRY(npf_tblent) te_listent;
uint16_t te_preflen;
uint16_t te_alen;
npf_addr_t te_addr;
} npf_tblent_t;
LIST_HEAD(npf_hashl, npf_tblent);
struct npf_table {
/*
* The storage type can be: a) hash b) tree c) cdb.
* There are separate trees for IPv4 and IPv6.
*/
union {
struct {
struct npf_hashl *t_hashl;
u_long t_hashmask;
};
struct {
lpm_t * t_lpm;
LIST_HEAD(, npf_tblent) t_list;
};
struct {
void * t_blob;
size_t t_bsize;
struct cdbr * t_cdb;
};
} /* C11 */;
/*
* Table ID, type and lock. The ID may change during the
* config reload, it is protected by the npf_config_lock.
*/
int t_type;
u_int t_id;
krwlock_t t_lock;
/* The number of items, reference count and table name. */
u_int t_nitems;
u_int t_refcnt;
char t_name[NPF_TABLE_MAXNAMELEN];
};
struct npf_tableset {
u_int ts_nitems;
npf_table_t * ts_map[];
};
#define NPF_TABLESET_SIZE(n) \
(offsetof(npf_tableset_t, ts_map[n]) * sizeof(npf_table_t *))
#define NPF_ADDRLEN2TREE(alen) ((alen) >> 4)
static pool_cache_t tblent_cache __read_mostly;
/*
* npf_table_sysinit: initialise tableset structures.
*/
void
npf_tableset_sysinit(void)
{
tblent_cache = pool_cache_init(sizeof(npf_tblent_t), coherency_unit,
0, 0, "npftblpl", NULL, IPL_NONE, NULL, NULL, NULL);
}
void
npf_tableset_sysfini(void)
{
pool_cache_destroy(tblent_cache);
}
npf_tableset_t *
npf_tableset_create(u_int nitems)
{
npf_tableset_t *ts = kmem_zalloc(NPF_TABLESET_SIZE(nitems), KM_SLEEP);
ts->ts_nitems = nitems;
return ts;
}
void
npf_tableset_destroy(npf_tableset_t *ts)
{
/*
* Destroy all tables (no references should be held, since the
* ruleset should be destroyed before).
*/
for (u_int tid = 0; tid < ts->ts_nitems; tid++) {
npf_table_t *t = ts->ts_map[tid];
if (t && atomic_dec_uint_nv(&t->t_refcnt) == 0) {
npf_table_destroy(t);
}
}
kmem_free(ts, NPF_TABLESET_SIZE(ts->ts_nitems));
}
/*
* npf_tableset_insert: insert the table into the specified tableset.
*
* => Returns 0 on success. Fails and returns error if ID is already used.
*/
int
npf_tableset_insert(npf_tableset_t *ts, npf_table_t *t)
{
const u_int tid = t->t_id;
int error;
KASSERT((u_int)tid < ts->ts_nitems);
if (ts->ts_map[tid] == NULL) {
atomic_inc_uint(&t->t_refcnt);
ts->ts_map[tid] = t;
error = 0;
} else {
error = EEXIST;
}
return error;
}
/*
* npf_tableset_getbyname: look for a table in the set given the name.
*/
npf_table_t *
npf_tableset_getbyname(npf_tableset_t *ts, const char *name)
{
npf_table_t *t;
for (u_int tid = 0; tid < ts->ts_nitems; tid++) {
if ((t = ts->ts_map[tid]) == NULL)
continue;
if (strcmp(name, t->t_name) == 0)
return t;
}
return NULL;
}
npf_table_t *
npf_tableset_getbyid(npf_tableset_t *ts, u_int tid)
{
if (__predict_true(tid < ts->ts_nitems)) {
return ts->ts_map[tid];
}
return NULL;
}
/*
* npf_tableset_reload: iterate all tables and if the new table is of the
* same type and has no items, then we preserve the old one and its entries.
*
* => The caller is responsible for providing synchronisation.
*/
void
npf_tableset_reload(npf_t *npf, npf_tableset_t *nts, npf_tableset_t *ots)
{
for (u_int tid = 0; tid < nts->ts_nitems; tid++) {
npf_table_t *t, *ot;
if ((t = nts->ts_map[tid]) == NULL) {
continue;
}
/* If our table has entries, just load it. */
if (t->t_nitems) {
continue;
}
/* Look for a currently existing table with such name. */
ot = npf_tableset_getbyname(ots, t->t_name);
if (ot == NULL) {
/* Not found: we have a new table. */
continue;
}
/* Found. Did the type change? */
if (t->t_type != ot->t_type) {
/* Yes, load the new. */
continue;
}
/*
* Preserve the current table. Acquire a reference since
* we are keeping it in the old table set. Update its ID.
*/
atomic_inc_uint(&ot->t_refcnt);
nts->ts_map[tid] = ot;
KASSERT(npf_config_locked_p(npf));
ot->t_id = tid;
/* Destroy the new table (we hold the only reference). */
t->t_refcnt--;
npf_table_destroy(t);
}
}
int
npf_tableset_export(npf_t *npf, const npf_tableset_t *ts, prop_array_t tables)
{
const npf_table_t *t;
KASSERT(npf_config_locked_p(npf));
for (u_int tid = 0; tid < ts->ts_nitems; tid++) {
if ((t = ts->ts_map[tid]) == NULL) {
continue;
}
prop_dictionary_t tdict = prop_dictionary_create();
prop_dictionary_set_cstring(tdict, "name", t->t_name);
prop_dictionary_set_uint32(tdict, "type", t->t_type);
prop_dictionary_set_uint32(tdict, "id", tid);
prop_array_add(tables, tdict);
prop_object_release(tdict);
}
return 0;
}
/*
* Few helper routines.
*/
static npf_tblent_t *
table_hash_lookup(const npf_table_t *t, const npf_addr_t *addr,
const int alen, struct npf_hashl **rhtbl)
{
const uint32_t hidx = hash32_buf(addr, alen, HASH32_BUF_INIT);
struct npf_hashl *htbl = &t->t_hashl[hidx & t->t_hashmask];
npf_tblent_t *ent;
/*
* Lookup the hash table and check for duplicates.
* Note: mask is ignored for the hash storage.
*/
LIST_FOREACH(ent, htbl, te_listent) {
if (ent->te_alen != alen) {
continue;
}
if (memcmp(&ent->te_addr, addr, alen) == 0) {
break;
}
}
*rhtbl = htbl;
return ent;
}
static void
table_hash_flush(npf_table_t *t)
{
for (unsigned n = 0; n <= t->t_hashmask; n++) {
npf_tblent_t *ent;
while ((ent = LIST_FIRST(&t->t_hashl[n])) != NULL) {
LIST_REMOVE(ent, te_listent);
pool_cache_put(tblent_cache, ent);
}
}
}
static void
table_tree_flush(npf_table_t *t)
{
npf_tblent_t *ent;
while ((ent = LIST_FIRST(&t->t_list)) != NULL) {
LIST_REMOVE(ent, te_listent);
pool_cache_put(tblent_cache, ent);
}
lpm_clear(t->t_lpm, NULL, NULL);
}
/*
* npf_table_create: create table with a specified ID.
*/
npf_table_t *
npf_table_create(const char *name, u_int tid, int type,
void *blob, size_t size)
{
npf_table_t *t;
t = kmem_zalloc(sizeof(npf_table_t), KM_SLEEP);
strlcpy(t->t_name, name, NPF_TABLE_MAXNAMELEN);
switch (type) {
case NPF_TABLE_TREE:
if ((t->t_lpm = lpm_create()) == NULL) {
goto out;
}
LIST_INIT(&t->t_list);
break;
case NPF_TABLE_HASH:
t->t_hashl = hashinit(1024, HASH_LIST, true, &t->t_hashmask);
if (t->t_hashl == NULL) {
goto out;
}
break;
case NPF_TABLE_CDB:
t->t_blob = blob;
t->t_bsize = size;
t->t_cdb = cdbr_open_mem(blob, size, CDBR_DEFAULT, NULL, NULL);
if (t->t_cdb == NULL) {
free(blob, M_TEMP);
goto out;
}
t->t_nitems = cdbr_entries(t->t_cdb);
break;
default:
KASSERT(false);
}
rw_init(&t->t_lock);
t->t_type = type;
t->t_id = tid;
return t;
out:
kmem_free(t, sizeof(npf_table_t));
return NULL;
}
/*
* npf_table_destroy: free all table entries and table itself.
*/
void
npf_table_destroy(npf_table_t *t)
{
KASSERT(t->t_refcnt == 0);
switch (t->t_type) {
case NPF_TABLE_HASH:
table_hash_flush(t);
hashdone(t->t_hashl, HASH_LIST, t->t_hashmask);
break;
case NPF_TABLE_TREE:
table_tree_flush(t);
lpm_destroy(t->t_lpm);
break;
case NPF_TABLE_CDB:
cdbr_close(t->t_cdb);
free(t->t_blob, M_TEMP);
break;
default:
KASSERT(false);
}
rw_destroy(&t->t_lock);
kmem_free(t, sizeof(npf_table_t));
}
/*
* npf_table_check: validate the name, ID and type.
*/
int
npf_table_check(npf_tableset_t *ts, const char *name, u_int tid, int type)
{
if ((u_int)tid >= ts->ts_nitems) {
return EINVAL;
}
if (ts->ts_map[tid] != NULL) {
return EEXIST;
}
switch (type) {
case NPF_TABLE_TREE:
case NPF_TABLE_HASH:
case NPF_TABLE_CDB:
break;
default:
return EINVAL;
}
if (strlen(name) >= NPF_TABLE_MAXNAMELEN) {
return ENAMETOOLONG;
}
if (npf_tableset_getbyname(ts, name)) {
return EEXIST;
}
return 0;
}
static int
table_cidr_check(const u_int aidx, const npf_addr_t *addr,
const npf_netmask_t mask)
{
if (aidx > 1) {
return EINVAL;
}
if (mask > NPF_MAX_NETMASK && mask != NPF_NO_NETMASK) {
return EINVAL;
}
/*
* For IPv4 (aidx = 0) - 32 and for IPv6 (aidx = 1) - 128.
* If it is a host - shall use NPF_NO_NETMASK.
*/
if (mask > (aidx ? 128 : 32) && mask != NPF_NO_NETMASK) {
return EINVAL;
}
return 0;
}
/*
* npf_table_insert: add an IP CIDR entry into the table.
*/
int
npf_table_insert(npf_table_t *t, const int alen,
const npf_addr_t *addr, const npf_netmask_t mask)
{
const u_int aidx = NPF_ADDRLEN2TREE(alen);
npf_tblent_t *ent;
int error;
error = table_cidr_check(aidx, addr, mask);
if (error) {
return error;
}
ent = pool_cache_get(tblent_cache, PR_WAITOK);
memcpy(&ent->te_addr, addr, alen);
ent->te_alen = alen;
/*
* Insert the entry. Return an error on duplicate.
*/
rw_enter(&t->t_lock, RW_WRITER);
switch (t->t_type) {
case NPF_TABLE_HASH: {
struct npf_hashl *htbl;
/*
* Hash tables by the concept support only IPs.
*/
if (mask != NPF_NO_NETMASK) {
error = EINVAL;
break;
}
if (!table_hash_lookup(t, addr, alen, &htbl)) {
LIST_INSERT_HEAD(htbl, ent, te_listent);
t->t_nitems++;
} else {
error = EEXIST;
}
break;
}
case NPF_TABLE_TREE: {
const unsigned preflen =
(mask == NPF_NO_NETMASK) ? (alen * 8) : mask;
if (lpm_lookup(t->t_lpm, addr, alen) == NULL &&
lpm_insert(t->t_lpm, addr, alen, preflen, ent) == 0) {
LIST_INSERT_HEAD(&t->t_list, ent, te_listent);
ent->te_preflen = preflen;
t->t_nitems++;
error = 0;
} else {
error = EEXIST;
}
break;
}
case NPF_TABLE_CDB:
error = EINVAL;
break;
default:
KASSERT(false);
}
rw_exit(&t->t_lock);
if (error) {
pool_cache_put(tblent_cache, ent);
}
return error;
}
/*
* npf_table_remove: remove the IP CIDR entry from the table.
*/
int
npf_table_remove(npf_table_t *t, const int alen,
const npf_addr_t *addr, const npf_netmask_t mask)
{
const u_int aidx = NPF_ADDRLEN2TREE(alen);
npf_tblent_t *ent = NULL;
int error = ENOENT;
error = table_cidr_check(aidx, addr, mask);
if (error) {
return error;
}
rw_enter(&t->t_lock, RW_WRITER);
switch (t->t_type) {
case NPF_TABLE_HASH: {
struct npf_hashl *htbl;
ent = table_hash_lookup(t, addr, alen, &htbl);
if (__predict_true(ent != NULL)) {
LIST_REMOVE(ent, te_listent);
t->t_nitems--;
}
break;
}
case NPF_TABLE_TREE: {
ent = lpm_lookup(t->t_lpm, addr, alen);
if (__predict_true(ent != NULL)) {
LIST_REMOVE(ent, te_listent);
lpm_remove(t->t_lpm, &ent->te_addr,
ent->te_alen, ent->te_preflen);
t->t_nitems--;
}
break;
}
case NPF_TABLE_CDB:
error = EINVAL;
break;
default:
KASSERT(false);
ent = NULL;
}
rw_exit(&t->t_lock);
if (ent) {
pool_cache_put(tblent_cache, ent);
}
return error;
}
/*
* npf_table_lookup: find the table according to ID, lookup and match
* the contents with the specified IP address.
*/
int
npf_table_lookup(npf_table_t *t, const int alen, const npf_addr_t *addr)
{
const u_int aidx = NPF_ADDRLEN2TREE(alen);
struct npf_hashl *htbl;
const void *data;
size_t dlen;
bool found;
if (__predict_false(aidx > 1)) {
return EINVAL;
}
switch (t->t_type) {
case NPF_TABLE_HASH:
rw_enter(&t->t_lock, RW_READER);
found = table_hash_lookup(t, addr, alen, &htbl) != NULL;
rw_exit(&t->t_lock);
break;
case NPF_TABLE_TREE:
rw_enter(&t->t_lock, RW_READER);
found = lpm_lookup(t->t_lpm, addr, alen) != NULL;
rw_exit(&t->t_lock);
break;
case NPF_TABLE_CDB:
if (cdbr_find(t->t_cdb, addr, alen, &data, &dlen) == 0) {
found = dlen == (u_int)alen &&
memcmp(addr, data, dlen) == 0;
} else {
found = false;
}
break;
default:
KASSERT(false);
found = false;
}
return found ? 0 : ENOENT;
}
static int
table_ent_copyout(const npf_addr_t *addr, const int alen, npf_netmask_t mask,
void *ubuf, size_t len, size_t *off)
{
void *ubufp = (uint8_t *)ubuf + *off;
npf_ioctl_ent_t uent;
if ((*off += sizeof(npf_ioctl_ent_t)) > len) {
return ENOMEM;
}
uent.alen = alen;
memcpy(&uent.addr, addr, sizeof(npf_addr_t));
uent.mask = mask;
return copyout(&uent, ubufp, sizeof(npf_ioctl_ent_t));
}
static int
table_hash_list(const npf_table_t *t, void *ubuf, size_t len)
{
size_t off = 0;
int error = 0;
for (unsigned n = 0; n <= t->t_hashmask; n++) {
npf_tblent_t *ent;
LIST_FOREACH(ent, &t->t_hashl[n], te_listent) {
error = table_ent_copyout(&ent->te_addr,
ent->te_alen, 0, ubuf, len, &off);
if (error)
break;
}
}
return error;
}
static int
table_tree_list(const npf_table_t *t, void *ubuf, size_t len)
{
npf_tblent_t *ent;
size_t off = 0;
int error = 0;
LIST_FOREACH(ent, &t->t_list, te_listent) {
error = table_ent_copyout(&ent->te_addr,
ent->te_alen, 0, ubuf, len, &off);
if (error)
break;
}
return error;
}
static int
table_cdb_list(npf_table_t *t, void *ubuf, size_t len)
{
size_t off = 0, dlen;
const void *data;
int error = 0;
for (size_t i = 0; i < t->t_nitems; i++) {
if (cdbr_get(t->t_cdb, i, &data, &dlen) != 0) {
return EINVAL;
}
error = table_ent_copyout(data, dlen, 0, ubuf, len, &off);
if (error)
break;
}
return error;
}
/*
* npf_table_list: copy a list of all table entries into a userspace buffer.
*/
int
npf_table_list(npf_table_t *t, void *ubuf, size_t len)
{
int error = 0;
rw_enter(&t->t_lock, RW_READER);
switch (t->t_type) {
case NPF_TABLE_HASH:
error = table_hash_list(t, ubuf, len);
break;
case NPF_TABLE_TREE:
error = table_tree_list(t, ubuf, len);
break;
case NPF_TABLE_CDB:
error = table_cdb_list(t, ubuf, len);
break;
default:
KASSERT(false);
}
rw_exit(&t->t_lock);
return error;
}
/*
* npf_table_flush: remove all table entries.
*/
int
npf_table_flush(npf_table_t *t)
{
int error = 0;
rw_enter(&t->t_lock, RW_WRITER);
switch (t->t_type) {
case NPF_TABLE_HASH:
table_hash_flush(t);
t->t_nitems = 0;
break;
case NPF_TABLE_TREE:
table_tree_flush(t);
t->t_nitems = 0;
break;
case NPF_TABLE_CDB:
error = EINVAL;
break;
default:
KASSERT(false);
}
rw_exit(&t->t_lock);
return error;
}