If the pagedaemon cannot free any memory due to not being able to

lock any uvm objects, check if lockholders are currently on CPU and yield to try very soon again instead of assuming deadlock. This makes limited-memory kernels perform the same as memory-unlimited kernels (provided there is a reasonable amount of memory available). For example, for large file copy off of ffs where the image is backed on host memory (i.e. no disk i/o, i.e. ideal conditions) the figures are, per rump kernel memory limit: 3000kB: same 1000kB: 10% slower 500kB: 50% slower (per pagedaemon code might still be able to use some tweak, though)
2010-12-01 20:29:56 +00:00 · 2010-12-01 20:29:56 +00:00 · 1de81dd955
parent 0acc6b4268
commit 1de81dd955
2 changed files with 52 additions and 10 deletions
--- a/sys/rump/librump/rumpkern/scheduler.c
+++ b/sys/rump/librump/rumpkern/scheduler.c
@ -1,4 +1,4 @@
-/*      $NetBSD: scheduler.c,v 1.22 2010/11/21 22:01:15 pooka Exp $	*/
+/*      $NetBSD: scheduler.c,v 1.23 2010/12/01 20:29:56 pooka Exp $	*/

 /*
 * Copyright (c) 2010 Antti Kantee.  All Rights Reserved.
@ -26,7 +26,7 @@
 */

 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: scheduler.c,v 1.22 2010/11/21 22:01:15 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: scheduler.c,v 1.23 2010/12/01 20:29:56 pooka Exp $");

 #include <sys/param.h>
 #include <sys/atomic.h>
@ -342,6 +342,8 @@ rump_schedule_cpu_interlock(struct lwp *l, void *interlock)
 	l->l_cpu = l->l_target_cpu = rcpu->rcpu_ci;
 	l->l_mutex = rcpu->rcpu_ci->ci_schedstate.spc_mutex;
 	l->l_ncsw++;
+
+	rcpu->rcpu_ci->ci_curlwp = l;
 }

 void
@ -407,6 +409,7 @@ rump_unschedule_cpu1(struct lwp *l, void *interlock)
 	void *old;

 	ci = l->l_cpu;
+	ci->ci_curlwp = NULL;
 	l->l_cpu = NULL;
 	rcpu = &rcpu_storage[ci-&rump_cpus[0]];

--- a/sys/rump/librump/rumpkern/vm.c
+++ b/sys/rump/librump/rumpkern/vm.c
@ -1,4 +1,4 @@
-/*	$NetBSD: vm.c,v 1.103 2010/12/01 11:19:18 pooka Exp $	*/
+/*	$NetBSD: vm.c,v 1.104 2010/12/01 20:29:57 pooka Exp $	*/

 /*
 * Copyright (c) 2007-2010 Antti Kantee.  All Rights Reserved.
@ -41,7 +41,7 @@
 */

 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vm.c,v 1.103 2010/12/01 11:19:18 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vm.c,v 1.104 2010/12/01 20:29:57 pooka Exp $");

 #include <sys/param.h>
 #include <sys/atomic.h>
@ -174,8 +174,9 @@ uvm_pagealloc_strat(struct uvm_object *uobj, voff_t off, struct vm_anon *anon,
 	KASSERT(anon == NULL);

 	pg = pool_cache_get(&pagecache, PR_NOWAIT);
-	if (__predict_false(pg == NULL))
+	if (__predict_false(pg == NULL)) {
 		return NULL;
+	}

 	pg->offset = off;
 	pg->uobject = uobj;
@ -866,7 +867,7 @@ uvm_pageout_done(int npages)
 }

 static bool
-processpage(struct vm_page *pg)
+processpage(struct vm_page *pg, bool *lockrunning)
 {
 	struct uvm_object *uobj;

@ -882,6 +883,18 @@ processpage(struct vm_page *pg)
 		} else {
 			mutex_exit(&uobj->vmobjlock);
 		}
+	} else if (*lockrunning == false && ncpu > 1) {
+		CPU_INFO_ITERATOR cii;
+		struct cpu_info *ci;
+		struct lwp *l;
+
+		l = mutex_owner(&uobj->vmobjlock);
+		for (CPU_INFO_FOREACH(cii, ci)) {
+			if (ci->ci_curlwp == l) {
+				*lockrunning = true;
+				break;
+			}
+		}
 	}

 	return false;
@ -899,6 +912,7 @@ uvm_pageout(void *arg)
 	int timo = 0;
 	int cleaned, skip, skipped;
 	bool succ = false;
+	bool lockrunning;

 	mutex_enter(&pdaemonmtx);
 	for (;;) {
@ -913,8 +927,10 @@ uvm_pageout(void *arg)
 		}
 		succ = false;

-		cv_timedwait(&pdaemoncv, &pdaemonmtx, timo);
-		uvmexp.pdwoke++;
+		if (pdaemon_waiters == 0) {
+			cv_timedwait(&pdaemoncv, &pdaemonmtx, timo);
+			uvmexp.pdwoke++;
+		}

 		/* tell the world that we are hungry */
 		kernel_map->flags |= VM_MAP_WANTVA;
@ -943,6 +959,7 @@ uvm_pageout(void *arg)
 		 */
 		cleaned = 0;
 		skip = 0;
+		lockrunning = false;
 again:
 		mutex_enter(&uvm_pageqlock);
 		while (cleaned < PAGEDAEMON_OBJCHUNK) {
@ -958,7 +975,7 @@ uvm_pageout(void *arg)
 				while (skipped++ < skip)
 					continue;

-				if (processpage(pg)) {
+				if (processpage(pg, &lockrunning)) {
 					cleaned++;
 					goto again;
 				}
@ -969,6 +986,28 @@ uvm_pageout(void *arg)
 		}
 		mutex_exit(&uvm_pageqlock);

+		/*
+		 * Ok, someone is running with an object lock held.
+		 * We want to yield the host CPU to make sure the
+		 * thread is not parked on the host.  Since sched_yield()
+		 * doesn't appear to do anything on NetBSD, nanosleep
+		 * for the smallest possible time and hope we're back in
+		 * the game soon.
+		 */
+		if (cleaned == 0 && lockrunning) {
+			uint64_t sec, nsec;
+
+			sec = 0;
+			nsec = 1;
+			rumpuser_nanosleep(&sec, &nsec, NULL);
+
+			lockrunning = false;
+			skip = 0;
+
+			/* and here we go again */
+			goto again;
+		}
+
 		/*
 		 * And of course we need to reclaim the page cache
 		 * again to actually release memory.
@ -1012,7 +1051,7 @@ uvm_pageout(void *arg)
 		 * Unfortunately, the wife just borrowed it.
 		 */

-		if (!succ) {
+		if (!succ && cleaned == 0) {
 			rumpuser_dprintf("pagedaemoness: failed to reclaim "
 			    "memory ... sleeping (deadlock?)\n");
 			timo = hz;