Make bpf MP-safe

By the change, bpf_mtap can run without any locks as long as its bpf filter doesn't match a target packet. Pushing data to a bpf buffer still needs a lock. Removing the lock requires big changes and it's a future work. Another known issue is that we need to remain some obsolete variables to avoid breaking kvm(3) users such as netstat and fstat. One problem for MP-ification is that in order to keep statistic counters of bpf_d we need to use atomic operations for them. Once we retire the kvm(3) users, we should make the counters per-CPU and remove the atomic operations.
2017-02-09 09:30:26 +00:00 · 2017-02-09 09:30:26 +00:00 · f66c9ca3fd
parent 929132b4a7
commit f66c9ca3fd
3 changed files with 287 additions and 159 deletions
--- a/sys/net/bpf.c
+++ b/sys/net/bpf.c
--- a/sys/net/bpfdesc.h
+++ b/sys/net/bpfdesc.h
@ -1,4 +1,4 @@
-/*	$NetBSD: bpfdesc.h,v 1.43 2017/02/01 08:16:42 ozaki-r Exp $	*/
+/*	$NetBSD: bpfdesc.h,v 1.44 2017/02/09 09:30:26 ozaki-r Exp $	*/

 /*
 * Copyright (c) 1990, 1991, 1993
@ -49,8 +49,15 @@
 #include <sys/pslist.h>
 #include <sys/mutex.h>
 #include <sys/condvar.h>
+#include <sys/psref.h>
 #endif

+struct bpf_filter {
+	struct bpf_insn *bf_insn; 	/* filter code */
+	size_t		bf_size;
+	bpfjit_func_t	bf_jitcode;	/* compiled filter program */
+};
+
 /*
 * Descriptor associated with each open bpf file.
 */
@ -76,7 +83,12 @@ struct bpf_d {

 	struct bpf_if *	bd_bif;		/* interface descriptor */
 	u_long		bd_rtout;	/* Read timeout in 'ticks' */
-	struct bpf_insn *bd_filter; 	/* filter code */
+	/* DEPRECATED. Keep it to avoid breaking kvm(3) users */
+	struct bpf_insn *_bd_filter; 	/* filter code */
+	/*
+	 * XXX we should make the counters per-CPU once we retire kvm(3) users
+	 * that directly access them.
+	 */
 	u_long		bd_rcount;	/* number of packets received */
 	u_long		bd_dcount;	/* number of packets dropped */
 	u_long		bd_ccount;	/* number of packets captured */
@ -108,12 +120,14 @@ struct bpf_d {
 #ifdef _LP64
 	int		bd_compat32;	/* 32-bit stream on LP64 system */
 #endif
+	/* DEPRECATED. Keep it to avoid breaking kvm(3) users */
 	bpfjit_func_t	bd_jitcode;	/* compiled filter program */
-	size_t		bd_filter_size;
+	struct bpf_filter *bd_filter;
 #ifdef _KERNEL
 	struct pslist_entry	bd_bif_dlist_entry; /* For bpf_if */
 	struct pslist_entry	bd_bpf_dlist_entry; /* For the global list */
 	kmutex_t	*bd_mtx;
+	kmutex_t	*bd_buf_mtx;
 	kcondvar_t	bd_cv;
 #endif
 };
@ -160,6 +174,7 @@ struct bpf_if {
 #ifdef _KERNEL
 	struct pslist_entry bif_iflist_entry;
 	struct pslist_head bif_dlist_head;
+	struct psref_target bif_psref;
 #endif
 };

--- a/sys/net/if.c
+++ b/sys/net/if.c
@ -1,4 +1,4 @@
-/*	$NetBSD: if.c,v 1.375 2017/01/25 03:04:21 christos Exp $	*/
+/*	$NetBSD: if.c,v 1.376 2017/02/09 09:30:26 ozaki-r Exp $	*/

 /*-
 * Copyright (c) 1999, 2000, 2001, 2008 The NetBSD Foundation, Inc.
@ -90,7 +90,7 @@
 */

 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: if.c,v 1.375 2017/01/25 03:04:21 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: if.c,v 1.376 2017/02/09 09:30:26 ozaki-r Exp $");

 #if defined(_KERNEL_OPT)
 #include "opt_inet.h"
@ -774,13 +774,7 @@ if_percpuq_softint(void *arg)

 	while ((m = if_percpuq_dequeue(ipq)) != NULL) {
 		ifp->if_ipackets++;
-#ifndef NET_MPSAFE
-		KERNEL_LOCK(1, NULL);
-#endif
 		bpf_mtap(ifp, m);
-#ifndef NET_MPSAFE
-		KERNEL_UNLOCK_ONE(NULL);
-#endif

 		ifp->_if_input(ifp, m);
 	}
@ -1072,13 +1066,7 @@ if_input(struct ifnet *ifp, struct mbuf *m)
 	KASSERT(!cpu_intr_p());

 	ifp->if_ipackets++;
-#ifndef NET_MPSAFE
-	KERNEL_LOCK(1, NULL);
-#endif
 	bpf_mtap(ifp, m);
-#ifndef NET_MPSAFE
-	KERNEL_UNLOCK_ONE(NULL);
-#endif

 	ifp->_if_input(ifp, m);
 }