- sched_idle(): if a migration took place, rather than going idle briefly

to avoid stealing back the LWP, remember the target CPU and avoid it. - sched_preempted(): only choose a totally idle CPU; in the interests of overall throughput it's better to make use of SMT / slow CPUs.
2020-04-04 20:17:58 +00:00 · 2020-04-04 20:17:58 +00:00 · dac58e129b
commit dac58e129b
parent d8e96a2d4f
1 changed files with 31 additions and 23 deletions
--- a/sys/kern/kern_runq.c
+++ b/sys/kern/kern_runq.c
@ -1,4 +1,4 @@
-/*	$NetBSD: kern_runq.c,v 1.64 2020/03/26 19:25:07 ad Exp $	*/
+/*	$NetBSD: kern_runq.c,v 1.65 2020/04/04 20:17:58 ad Exp $	*/

 /*-
 * Copyright (c) 2019, 2020 The NetBSD Foundation, Inc.
@ -56,7 +56,7 @@
 */

 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_runq.c,v 1.64 2020/03/26 19:25:07 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_runq.c,v 1.65 2020/04/04 20:17:58 ad Exp $");

 #include "opt_dtrace.h"

@ -340,17 +340,17 @@ sched_resched_cpu(struct cpu_info *ci, pri_t pri, bool unlock)
 	}

 	/*
-	 * Things start as soon as we touch ci_want_resched: x86 for example
-	 * has an instruction that monitors the memory cell it's in.  We
-	 * want to drop the schedstate lock in advance, otherwise the remote
-	 * CPU can awaken and immediately block on the lock.
+	 * Things can start as soon as ci_want_resched is touched: x86 has
+	 * an instruction that monitors the memory cell it's in.  Drop the
+	 * schedstate lock in advance, otherwise the remote CPU can awaken
+	 * and immediately block on the lock.
 	 */
 	if (__predict_true(unlock)) {
 		spc_unlock(ci);
 	}

 	/*
-	 * The caller will always have a second scheduler lock held: either
+	 * The caller almost always has a second scheduler lock held: either
 	 * the running LWP lock (spc_lwplock), or a sleep queue lock.  That
 	 * keeps preemption disabled, which among other things ensures all
 	 * LWPs involved won't be freed while we're here (see lwp_dtor()).
@ -361,8 +361,10 @@ sched_resched_cpu(struct cpu_info *ci, pri_t pri, bool unlock)
 		n = atomic_cas_uint(&ci->ci_want_resched, o, o | f);
 		if (__predict_true(o == n)) {
 			/*
-			 * We're the first.  If we're in process context on
-			 * the same CPU, we can avoid the visit to trap().
+			 * We're the first to set a resched on the CPU.  Try
+			 * to avoid causing a needless trip through trap()
+			 * to handle an AST fault, if it's known the LWP
+			 * will either block or go through userret() soon.
 			 */
 			if (l != curlwp || cpu_intr_p()) {
 				cpu_need_resched(ci, l, f);
@ -680,9 +682,10 @@ sched_catchlwp(struct cpu_info *ci)
 }

 /*
- * Called from sched_idle() to handle migration.
+ * Called from sched_idle() to handle migration.  Return the CPU that we
+ * pushed the LWP to (may be NULL).
 */
-static void
+static struct cpu_info *
 sched_idle_migrate(void)
 {
 	struct cpu_info *ci = curcpu(), *tci = NULL;
@ -748,13 +751,14 @@ sched_idle_migrate(void)
 		sched_resched_lwp(l, true);
 		/* tci now unlocked */
 		spc_unlock(ci);
-		return;
+		return tci;
 	}
 	if (dlock == true) {
 		KASSERT(tci != NULL);
 		spc_unlock(tci);
 	}
 	spc_unlock(ci);
+	return NULL;
 }

 /*
@ -785,21 +789,22 @@ sched_steal(struct cpu_info *ci, struct cpu_info *tci)
 void
 sched_idle(void)
 {
-	struct cpu_info *ci = curcpu(), *inner, *outer, *first, *tci = NULL;
+	struct cpu_info *ci, *inner, *outer, *first, *tci, *mci;
 	struct schedstate_percpu *spc, *tspc;
 	struct lwp *l;

+	ci = curcpu();
 	spc = &ci->ci_schedstate;
+	tci = NULL;
+	mci = NULL;

 	/*
 	 * Handle LWP migrations off this CPU to another.  If there a is
-	 * migration to do then go idle afterwards (we'll wake again soon),
-	 * as we don't want to instantly steal back the LWP we just moved
-	 * out.
+	 * migration to do then remember the CPU the LWP was sent to, and
+	 * don't steal the LWP back from that CPU below.
 	 */
 	if (spc->spc_migrating != NULL) {
-		sched_idle_migrate();
-		return;
+		mci = sched_idle_migrate();
 	}

 	/* If this CPU is offline, or we have an LWP to run, we're done. */
@ -812,7 +817,7 @@ sched_idle(void)
 		/* Try to help our siblings out. */
 		tci = ci->ci_sibling[CPUREL_CORE];
 		while (tci != ci) {
-			if (sched_steal(ci, tci)) {
+			if (tci != mci && sched_steal(ci, tci)) {
 				return;
 			}
 			tci = tci->ci_sibling[CPUREL_CORE];
@ -849,7 +854,8 @@ sched_idle(void)
 		do {
 			/* Don't hit the locks unless needed. */
 			tspc = &inner->ci_schedstate;
-			if (ci == inner || spc->spc_psid != tspc->spc_psid ||
+			if (ci == inner || ci == mci ||
+			    spc->spc_psid != tspc->spc_psid ||
 			    tspc->spc_mcount < min_catch) {
 				continue;
 			}
@ -874,6 +880,7 @@ sched_idle(void)
 void
 sched_preempted(struct lwp *l)
 {
+	const int flags = SPCF_IDLE | SPCF_1STCLASS;
 	struct schedstate_percpu *tspc;
 	struct cpu_info *ci, *tci;

@ -903,7 +910,6 @@ sched_preempted(struct lwp *l)
 	 */
 	tci = ci->ci_sibling[CPUREL_CORE];
 	while (tci != ci) {
-		const int flags = SPCF_IDLE | SPCF_1STCLASS;
 		tspc = &tci->ci_schedstate;
 		if ((tspc->spc_flags & flags) == flags &&
 		    sched_migratable(l, tci)) {
@ -928,7 +934,9 @@ sched_preempted(struct lwp *l)
 	} else {
 		/*
 		 * Try to find a better CPU to take it, but don't move to
-		 * another 2nd class CPU; there's not much point.
+		 * another 2nd class CPU, and don't move to a non-idle CPU,
+		 * because that would prevent SMT being used to maximise
+		 * throughput.
 		 *
 		 * Search in the current CPU package in order to try and
 		 * keep L2/L3 cache locality, but expand to include the
@ -936,7 +944,7 @@ sched_preempted(struct lwp *l)
 		 */
 		tci = sched_bestcpu(l, l->l_cpu);
 		if (tci != ci &&
-		    (tci->ci_schedstate.spc_flags & SPCF_1STCLASS) != 0) {
+		    (tci->ci_schedstate.spc_flags & flags) == flags) {
 			l->l_target_cpu = tci;
 		}
 	}