From 07861232b4f7870dab467aa7b8fb149bb193896e Mon Sep 17 00:00:00 2001 From: rmind Date: Sat, 23 May 2020 19:56:00 +0000 Subject: [PATCH] Backport selected NPF fixes from the upstream (to be pulled up): - npf_conndb_lookup: protect the connection lookup with pserialize(9), instead of incorrectly assuming that the handler always runs at IPL_SOFNET. Should fix crashes reported on high load (PR/55182). - npf_config_destroy: handle partially initialized config; fixes crashes with some invalid configurations. - NAT policy creation / destruction: set the initial reference and do not wait for reference draining on destruction; destroy the policy on the last reference drop instead. Fixes a lockup with the dynamic NAT rules. - npf_nat_{export,import}: fix a regression since dynamic NAT rules. - npfctl: fix a regression and restore the default group behaviour. - Add npf_cache_tcp() and validate the TCP data offset (from maxv@). --- sys/net/npf/npf_conf.c | 18 +++-- sys/net/npf/npf_conn.c | 6 +- sys/net/npf/npf_conn.h | 2 +- sys/net/npf/npf_conndb.c | 8 ++- sys/net/npf/npf_inet.c | 22 +++++-- sys/net/npf/npf_nat.c | 113 +++++++++++++++++++++----------- usr.sbin/npf/npfctl/npf_build.c | 48 +++++++++++--- usr.sbin/npf/npfctl/npf_show.c | 4 +- usr.sbin/npf/npfctl/npfctl.h | 1 + 9 files changed, 159 insertions(+), 63 deletions(-) diff --git a/sys/net/npf/npf_conf.c b/sys/net/npf/npf_conf.c index f1de680eb752..3106e6e322c9 100644 --- a/sys/net/npf/npf_conf.c +++ b/sys/net/npf/npf_conf.c @@ -47,7 +47,7 @@ #ifdef _KERNEL #include -__KERNEL_RCSID(0, "$NetBSD: npf_conf.c,v 1.15 2019/08/25 13:21:03 rmind Exp $"); +__KERNEL_RCSID(0, "$NetBSD: npf_conf.c,v 1.16 2020/05/23 19:56:00 rmind Exp $"); #include #include @@ -94,10 +94,18 @@ npf_config_destroy(npf_config_t *nc) * Note: the rulesets must be destroyed first, in order to drop * any references to the tableset. */ - npf_ruleset_destroy(nc->ruleset); - npf_ruleset_destroy(nc->nat_ruleset); - npf_rprocset_destroy(nc->rule_procs); - npf_tableset_destroy(nc->tableset); + if (nc->ruleset) { + npf_ruleset_destroy(nc->ruleset); + } + if (nc->nat_ruleset) { + npf_ruleset_destroy(nc->nat_ruleset); + } + if (nc->rule_procs) { + npf_rprocset_destroy(nc->rule_procs); + } + if (nc->tableset) { + npf_tableset_destroy(nc->tableset); + } kmem_free(nc, sizeof(npf_config_t)); } diff --git a/sys/net/npf/npf_conn.c b/sys/net/npf/npf_conn.c index ec38eacde0f3..ba8feca5273f 100644 --- a/sys/net/npf/npf_conn.c +++ b/sys/net/npf/npf_conn.c @@ -107,7 +107,7 @@ #ifdef _KERNEL #include -__KERNEL_RCSID(0, "$NetBSD: npf_conn.c,v 1.30 2019/09/29 17:00:29 rmind Exp $"); +__KERNEL_RCSID(0, "$NetBSD: npf_conn.c,v 1.31 2020/05/23 19:56:00 rmind Exp $"); #include #include @@ -311,7 +311,7 @@ npf_conn_lookup(const npf_cache_t *npc, const int di, bool *forw) if (!npf_conn_conkey(npc, &key, true)) { return NULL; } - con = npf_conndb_lookup(npf->conn_db, &key, forw); + con = npf_conndb_lookup(npf, &key, forw); if (con == NULL) { return NULL; } @@ -908,7 +908,7 @@ npf_conn_find(npf_t *npf, const nvlist_t *idict, nvlist_t **odict) if (!kdict || !npf_connkey_import(kdict, &key)) { return EINVAL; } - con = npf_conndb_lookup(npf->conn_db, &key, &forw); + con = npf_conndb_lookup(npf, &key, &forw); if (con == NULL) { return ESRCH; } diff --git a/sys/net/npf/npf_conn.h b/sys/net/npf/npf_conn.h index 62cbe279f2cd..efae5a5ffdbd 100644 --- a/sys/net/npf/npf_conn.h +++ b/sys/net/npf/npf_conn.h @@ -157,7 +157,7 @@ void npf_conndb_sysfini(npf_t *); npf_conndb_t * npf_conndb_create(void); void npf_conndb_destroy(npf_conndb_t *); -npf_conn_t * npf_conndb_lookup(npf_conndb_t *, const npf_connkey_t *, bool *); +npf_conn_t * npf_conndb_lookup(npf_t *, const npf_connkey_t *, bool *); bool npf_conndb_insert(npf_conndb_t *, const npf_connkey_t *, npf_conn_t *, bool); npf_conn_t * npf_conndb_remove(npf_conndb_t *, npf_connkey_t *); diff --git a/sys/net/npf/npf_conndb.c b/sys/net/npf/npf_conndb.c index 0aec07618c17..504238c6049b 100644 --- a/sys/net/npf/npf_conndb.c +++ b/sys/net/npf/npf_conndb.c @@ -46,7 +46,7 @@ #ifdef _KERNEL #include -__KERNEL_RCSID(0, "$NetBSD: npf_conndb.c,v 1.7 2019/12/14 15:21:51 riastradh Exp $"); +__KERNEL_RCSID(0, "$NetBSD: npf_conndb.c,v 1.8 2020/05/23 19:56:00 rmind Exp $"); #include #include @@ -143,8 +143,9 @@ npf_conndb_destroy(npf_conndb_t *cd) * npf_conndb_lookup: find a connection given the key. */ npf_conn_t * -npf_conndb_lookup(npf_conndb_t *cd, const npf_connkey_t *ck, bool *forw) +npf_conndb_lookup(npf_t *npf, const npf_connkey_t *ck, bool *forw) { + npf_conndb_t *cd = atomic_load_relaxed(&npf->conn_db); const unsigned keylen = NPF_CONNKEY_LEN(ck); npf_conn_t *con; void *val; @@ -152,8 +153,10 @@ npf_conndb_lookup(npf_conndb_t *cd, const npf_connkey_t *ck, bool *forw) /* * Lookup the connection key in the key-value map. */ + int s = npf_config_read_enter(npf); val = thmap_get(cd->cd_map, ck->ck_key, keylen); if (!val) { + npf_config_read_exit(npf, s); return NULL; } @@ -169,6 +172,7 @@ npf_conndb_lookup(npf_conndb_t *cd, const npf_connkey_t *ck, bool *forw) * Acquire a reference and return the connection. */ atomic_inc_uint(&con->c_refcnt); + npf_config_read_exit(npf, s); return con; } diff --git a/sys/net/npf/npf_inet.c b/sys/net/npf/npf_inet.c index fa1d2d87b76d..a3934edcd4d0 100644 --- a/sys/net/npf/npf_inet.c +++ b/sys/net/npf/npf_inet.c @@ -38,7 +38,7 @@ #ifdef _KERNEL #include -__KERNEL_RCSID(0, "$NetBSD: npf_inet.c,v 1.55 2019/08/11 20:26:34 rmind Exp $"); +__KERNEL_RCSID(0, "$NetBSD: npf_inet.c,v 1.56 2020/05/23 19:56:00 rmind Exp $"); #include #include @@ -562,6 +562,22 @@ npf_cache_ip(npf_cache_t *npc, nbuf_t *nbuf) return flags; } +static inline int +npf_cache_tcp(npf_cache_t *npc, nbuf_t *nbuf, unsigned hlen) +{ + struct tcphdr *th; + + th = nbuf_advance(nbuf, hlen, sizeof(struct tcphdr)); + if (__predict_false(th == NULL)) { + return NPC_FMTERR; + } + if (__predict_false(th->th_off < 5)) { + return NPC_FMTERR; + } + npc->npc_l4.tcp = th; + return NPC_LAYER4 | NPC_TCP; +} + /* * npf_cache_all: general routine to cache all relevant IP (v4 or v6) * and TCP, UDP or ICMP headers. @@ -601,9 +617,7 @@ again: switch (npc->npc_proto) { case IPPROTO_TCP: /* Cache: layer 4 - TCP. */ - npc->npc_l4.tcp = nbuf_advance(nbuf, hlen, - sizeof(struct tcphdr)); - l4flags = NPC_LAYER4 | NPC_TCP; + l4flags = npf_cache_tcp(npc, nbuf, hlen); break; case IPPROTO_UDP: /* Cache: layer 4 - UDP. */ diff --git a/sys/net/npf/npf_nat.c b/sys/net/npf/npf_nat.c index 7ae56c59490a..3103d379acf7 100644 --- a/sys/net/npf/npf_nat.c +++ b/sys/net/npf/npf_nat.c @@ -67,7 +67,7 @@ #ifdef _KERNEL #include -__KERNEL_RCSID(0, "$NetBSD: npf_nat.c,v 1.48 2019/08/25 13:21:03 rmind Exp $"); +__KERNEL_RCSID(0, "$NetBSD: npf_nat.c,v 1.49 2020/05/23 19:56:00 rmind Exp $"); #include #include @@ -182,6 +182,7 @@ npf_nat_newpolicy(npf_t *npf, const nvlist_t *nat, npf_ruleset_t *rset) size_t len; np = kmem_zalloc(sizeof(npf_natpolicy_t), KM_SLEEP); + atomic_store_relaxed(&np->n_refcnt, 1); np->n_npfctx = npf; /* The translation type, flags and policy ID. */ @@ -271,39 +272,55 @@ npf_nat_policyexport(const npf_natpolicy_t *np, nvlist_t *nat) return 0; } -/* - * npf_nat_freepolicy: free the NAT policy. - * - * => Called from npf_rule_free() during the reload via npf_ruleset_destroy(). - */ -void -npf_nat_freepolicy(npf_natpolicy_t *np) +static void +npf_natpolicy_release(npf_natpolicy_t *np) { - npf_conn_t *con; - npf_nat_t *nt; + KASSERT(atomic_load_relaxed(&np->n_refcnt) > 0); - /* - * Disassociate all entries from the policy. At this point, - * new entries can no longer be created for this policy. - */ - while (np->n_refcnt) { - mutex_enter(&np->n_lock); - LIST_FOREACH(nt, &np->n_nat_list, nt_entry) { - con = nt->nt_conn; - KASSERT(con != NULL); - npf_conn_expire(con); - } - mutex_exit(&np->n_lock); - - /* Kick the worker - all references should be going away. */ - npf_worker_signal(np->n_npfctx); - kpause("npfgcnat", false, 1, NULL); + if (atomic_dec_uint_nv(&np->n_refcnt) != 0) { + return; } KASSERT(LIST_EMPTY(&np->n_nat_list)); mutex_destroy(&np->n_lock); kmem_free(np, sizeof(npf_natpolicy_t)); } +/* + * npf_nat_freepolicy: free the NAT policy. + * + * => Called from npf_rule_free() during the reload via npf_ruleset_destroy(). + * => At this point, NAT policy cannot acquire new references. + */ +void +npf_nat_freepolicy(npf_natpolicy_t *np) +{ + /* + * Drain the references. If there are active NAT connections, + * then expire them and kick the worker. + */ + if (atomic_load_relaxed(&np->n_refcnt) > 1) { + npf_nat_t *nt; + + mutex_enter(&np->n_lock); + LIST_FOREACH(nt, &np->n_nat_list, nt_entry) { + npf_conn_t *con = nt->nt_conn; + KASSERT(con != NULL); + npf_conn_expire(con); + } + mutex_exit(&np->n_lock); + npf_worker_signal(np->n_npfctx); + } + KASSERT(atomic_load_relaxed(&np->n_refcnt) >= 1); + + /* + * Drop the initial reference, but it might not be the last one. + * If so, the last reference will be triggered via: + * + * npf_conn_destroy() -> npf_nat_destroy() -> npf_natpolicy_release() + */ + npf_natpolicy_release(np); +} + void npf_nat_freealg(npf_natpolicy_t *np, npf_alg_t *alg) { @@ -649,7 +666,7 @@ npf_do_nat(npf_cache_t *npc, npf_conn_t *con, const int di) npf_recache(npc); } error = npf_nat_algo(npc, np, forw); - atomic_dec_uint(&np->n_refcnt); + npf_natpolicy_release(np); return error; } @@ -662,7 +679,7 @@ npf_do_nat(npf_cache_t *npc, npf_conn_t *con, const int di) if (con == NULL) { ncon = npf_conn_establish(npc, di, true); if (ncon == NULL) { - atomic_dec_uint(&np->n_refcnt); + npf_natpolicy_release(np); return ENOMEM; } con = ncon; @@ -674,7 +691,7 @@ npf_do_nat(npf_cache_t *npc, npf_conn_t *con, const int di) */ nt = npf_nat_create(npc, np, con); if (nt == NULL) { - atomic_dec_uint(&np->n_refcnt); + npf_natpolicy_release(np); error = ENOMEM; goto out; } @@ -757,11 +774,15 @@ npf_nat_destroy(npf_nat_t *nt) } npf_stats_inc(np->n_npfctx, NPF_STAT_NAT_DESTROY); + /* + * Remove the connection from the list and drop the reference on + * the NAT policy. Note: this might trigger its destruction. + */ mutex_enter(&np->n_lock); LIST_REMOVE(nt, nt_entry); - KASSERT(np->n_refcnt > 0); - atomic_dec_uint(&np->n_refcnt); mutex_exit(&np->n_lock); + npf_natpolicy_release(np); + pool_cache_put(nat_cache, nt); } @@ -772,10 +793,13 @@ void npf_nat_export(nvlist_t *condict, npf_nat_t *nt) { npf_natpolicy_t *np = nt->nt_natpolicy; + unsigned alen = nt->nt_alen; nvlist_t *nat; nat = nvlist_create(0); + nvlist_add_number(nat, "alen", alen); nvlist_add_binary(nat, "oaddr", &nt->nt_oaddr, sizeof(npf_addr_t)); + nvlist_add_binary(nat, "taddr", &nt->nt_taddr, alen); nvlist_add_number(nat, "oport", nt->nt_oport); nvlist_add_number(nat, "tport", nt->nt_tport); nvlist_add_number(nat, "nat-policy", np->n_id); @@ -791,9 +815,9 @@ npf_nat_import(npf_t *npf, const nvlist_t *nat, { npf_natpolicy_t *np; npf_nat_t *nt; - const void *oaddr; + const void *taddr, *oaddr; + size_t alen, len; uint64_t np_id; - size_t len; np_id = dnvlist_get_number(nat, "nat-policy", UINT64_MAX); if ((np = npf_ruleset_findnat(natlist, np_id)) == NULL) { @@ -802,12 +826,23 @@ npf_nat_import(npf_t *npf, const nvlist_t *nat, nt = pool_cache_get(nat_cache, PR_WAITOK); memset(nt, 0, sizeof(npf_nat_t)); + alen = dnvlist_get_number(nat, "alen", 0); + if (alen == 0 || alen > sizeof(npf_addr_t)) { + goto err; + } + + taddr = dnvlist_get_binary(nat, "taddr", &len, NULL, 0); + if (!taddr || len != alen) { + goto err; + } + memcpy(&nt->nt_taddr, taddr, sizeof(npf_addr_t)); + oaddr = dnvlist_get_binary(nat, "oaddr", &len, NULL, 0); - if (!oaddr || len != sizeof(npf_addr_t)) { - pool_cache_put(nat_cache, nt); - return NULL; + if (!oaddr || len != alen) { + goto err; } memcpy(&nt->nt_oaddr, oaddr, sizeof(npf_addr_t)); + nt->nt_oport = dnvlist_get_number(nat, "oport", 0); nt->nt_tport = dnvlist_get_number(nat, "tport", 0); @@ -817,8 +852,7 @@ npf_nat_import(npf_t *npf, const nvlist_t *nat, if (!npf_portmap_take(pm, nt->nt_alen, &nt->nt_taddr, nt->nt_tport)) { - pool_cache_put(nat_cache, nt); - return NULL; + goto err; } } npf_stats_inc(npf, NPF_STAT_NAT_CREATE); @@ -832,6 +866,9 @@ npf_nat_import(npf_t *npf, const nvlist_t *nat, np->n_refcnt++; LIST_INSERT_HEAD(&np->n_nat_list, nt, nt_entry); return nt; +err: + pool_cache_put(nat_cache, nt); + return NULL; } #if defined(DDB) || defined(_NPF_TESTING) diff --git a/usr.sbin/npf/npfctl/npf_build.c b/usr.sbin/npf/npfctl/npf_build.c index 20465aa2158c..f200dddb64a7 100644 --- a/usr.sbin/npf/npfctl/npf_build.c +++ b/usr.sbin/npf/npfctl/npf_build.c @@ -32,7 +32,7 @@ */ #include -__RCSID("$NetBSD: npf_build.c,v 1.53 2019/09/30 00:37:11 rmind Exp $"); +__RCSID("$NetBSD: npf_build.c,v 1.54 2020/05/23 19:56:00 rmind Exp $"); #include #define __FAVOR_BSD @@ -56,8 +56,9 @@ __RCSID("$NetBSD: npf_build.c,v 1.53 2019/09/30 00:37:11 rmind Exp $"); static nl_config_t * npf_conf = NULL; static bool npf_debug = false; static nl_rule_t * the_rule = NULL; +static bool npf_conf_built = false; -static bool defgroup = false; +static nl_rule_t * defgroup = NULL; static nl_rule_t * current_group[MAX_RULE_NESTING]; static unsigned rule_nesting_level = 0; static unsigned npfctl_tid_counter = 0; @@ -71,8 +72,31 @@ npfctl_config_init(bool debug) if (npf_conf == NULL) { errx(EXIT_FAILURE, "npf_config_create failed"); } - npf_debug = debug; memset(current_group, 0, sizeof(current_group)); + npf_debug = debug; + npf_conf_built = false; +} + +void +npfctl_config_build(void) +{ + /* Run-once. */ + if (npf_conf_built) { + return; + } + + /* + * The default group is mandatory. Note: npfctl_build_group_end() + * skipped the default rule, since it must be the last one. + */ + if (!defgroup) { + errx(EXIT_FAILURE, "default group was not defined"); + } + assert(rule_nesting_level == 0); + npf_rule_insert(npf_conf, NULL, defgroup); + + npf_config_build(npf_conf); + npf_conf_built = true; } int @@ -81,9 +105,7 @@ npfctl_config_send(int fd) npf_error_t errinfo; int error = 0; - if (!defgroup) { - errx(EXIT_FAILURE, "default group was not defined"); - } + npfctl_config_build(); error = npf_config_submit(npf_conf, fd, &errinfo); if (error == EEXIST) { /* XXX */ errx(EXIT_FAILURE, "(re)load failed: " @@ -118,6 +140,8 @@ npfctl_config_save(nl_config_t *ncf, const char *outfile) void npfctl_config_debug(const char *outfile) { + npfctl_config_build(); + printf("\nConfiguration:\n\n"); _npf_config_dump(npf_conf, STDOUT_FILENO); @@ -593,7 +617,7 @@ npfctl_build_group(const char *name, int attr, const char *ifname, bool def) if (rule_nesting_level) { yyerror("default group can only be at the top level"); } - defgroup = true; + defgroup = rl; } /* Set the current group and increase the nesting level. */ @@ -613,7 +637,15 @@ npfctl_build_group_end(void) group = current_group[rule_nesting_level]; current_group[rule_nesting_level--] = NULL; - /* Note: if the parent is NULL, then it is a global rule. */ + /* + * Note: + * - If the parent is NULL, then it is a global rule. + * - The default rule must be the last, so it is inserted later. + */ + if (group == defgroup) { + assert(parent == NULL); + return; + } npf_rule_insert(npf_conf, parent, group); } diff --git a/usr.sbin/npf/npfctl/npf_show.c b/usr.sbin/npf/npfctl/npf_show.c index 3cd263849049..eb188e1ec12a 100644 --- a/usr.sbin/npf/npfctl/npf_show.c +++ b/usr.sbin/npf/npfctl/npf_show.c @@ -34,7 +34,7 @@ */ #include -__RCSID("$NetBSD: npf_show.c,v 1.30 2019/11/01 13:58:32 christos Exp $"); +__RCSID("$NetBSD: npf_show.c,v 1.31 2020/05/23 19:56:00 rmind Exp $"); #include #define __FAVOR_BSD @@ -575,7 +575,7 @@ npfctl_config_show(int fd) print_linesep(ctx); } else { ncf = npfctl_config_ref(); - (void)npf_config_build(ncf); + npfctl_config_build(); loaded = true; } ctx->conf = ncf; diff --git a/usr.sbin/npf/npfctl/npfctl.h b/usr.sbin/npf/npfctl/npfctl.h index 66a424603a4f..7adca404a217 100644 --- a/usr.sbin/npf/npfctl/npfctl.h +++ b/usr.sbin/npf/npfctl/npfctl.h @@ -192,6 +192,7 @@ void npfctl_bpf_table(npf_bpf_t *, u_int, u_int); #define NPFCTL_NAT_STATIC 2 void npfctl_config_init(bool); +void npfctl_config_build(void); int npfctl_config_send(int); nl_config_t * npfctl_config_ref(void); int npfctl_config_show(int);