Start trying to reduce cache misses on vm_page during fault processing.

- Make PGO_LOCKED getpages imply PGO_NOBUSY and remove the latter. Mark pages busy only when there's actually I/O to do. - When doing COW on a uvm_object, don't mess with neighbouring pages. In all likelyhood they're already entered. - Don't mess with neighbouring VAs that have existing mappings as replacing those mappings with same can be quite costly. - Don't enqueue pages for neighbour faults unless not enqueued already, and don't activate centre pages unless uvmpdpol says its useful. Also: - Make PGO_LOCKED getpages on UAOs work more like vnodes: do gang lookup in the radix tree, and don't allocate new pages. - Fix many assertion failures around faults/loans with tmpfs.
2020-05-17 19:38:16 +00:00 · 2020-05-17 19:38:16 +00:00 · ff872804dc
parent 013f352853
commit ff872804dc
10 changed files with 267 additions and 317 deletions
--- a/sys/miscfs/genfs/genfs_io.c
+++ b/sys/miscfs/genfs/genfs_io.c
@ -1,4 +1,4 @@
-/*	$NetBSD: genfs_io.c,v 1.95 2020/03/22 18:32:41 ad Exp $	*/
+/*	$NetBSD: genfs_io.c,v 1.96 2020/05/17 19:38:16 ad Exp $	*/

 /*
 * Copyright (c) 1982, 1986, 1989, 1993
@ -31,7 +31,7 @@
 */

 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: genfs_io.c,v 1.95 2020/03/22 18:32:41 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: genfs_io.c,v 1.96 2020/05/17 19:38:16 ad Exp $");

 #include <sys/param.h>
 #include <sys/systm.h>
@ -128,12 +128,12 @@ genfs_getpages(void *v)

 	/*
 	 * the object must be locked.  it can only be a read lock when
-	 * processing a read fault with PGO_LOCKED | PGO_NOBUSY.
+	 * processing a read fault with PGO_LOCKED.
 	 */

 	KASSERT(rw_lock_held(uobj->vmobjlock));
 	KASSERT(rw_write_held(uobj->vmobjlock) ||
-	   ((~flags & (PGO_LOCKED | PGO_NOBUSY)) == 0 && !memwrite));
+	   ((flags & PGO_LOCKED) != 0 && !memwrite));

 #ifdef DIAGNOSTIC
 	if ((flags & PGO_JOURNALLOCKED) && vp->v_mount->mnt_wapbl)
@ -237,9 +237,8 @@ startover:
 #endif /* defined(DEBUG) */
 		nfound = uvn_findpages(uobj, origoffset, &npages,
 		    ap->a_m, NULL,
-		    UFP_NOWAIT | UFP_NOALLOC |
-		    (memwrite ? UFP_NORDONLY : 0) |
-		    ((flags & PGO_NOBUSY) != 0 ? UFP_NOBUSY : 0));
+		    UFP_NOWAIT | UFP_NOALLOC | UFP_NOBUSY |
+		    (memwrite ? UFP_NORDONLY : 0));
 		KASSERT(npages == *ap->a_count);
 		if (nfound == 0) {
 			error = EBUSY;
@ -250,10 +249,6 @@ startover:
 		 * the file behind us.
 		 */
 		if (!genfs_node_rdtrylock(vp)) {
-			if ((flags & PGO_NOBUSY) == 0) {
-				genfs_rel_pages(ap->a_m, npages);
-			}
-
 			/*
 			 * restore the array.
 			 */
--- a/sys/nfs/nfs_bio.c
+++ b/sys/nfs/nfs_bio.c
@ -1,4 +1,4 @@
-/*	$NetBSD: nfs_bio.c,v 1.196 2020/04/23 21:47:08 ad Exp $	*/
+/*	$NetBSD: nfs_bio.c,v 1.197 2020/05/17 19:38:16 ad Exp $	*/

 /*
 * Copyright (c) 1989, 1993
@ -35,7 +35,7 @@
 */

 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: nfs_bio.c,v 1.196 2020/04/23 21:47:08 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: nfs_bio.c,v 1.197 2020/05/17 19:38:16 ad Exp $");

 #ifdef _KERNEL_OPT
 #include "opt_nfs.h"
@ -1260,7 +1260,6 @@ nfs_getpages(void *v)
 	bool v3 = NFS_ISV3(vp);
 	bool write = (ap->a_access_type & VM_PROT_WRITE) != 0;
 	bool locked = (ap->a_flags & PGO_LOCKED) != 0;
-	bool nobusy = (ap->a_flags & PGO_NOBUSY);

 	/*
 	 * XXX NFS wants to modify the pages below and that can't be done
@ -1348,14 +1347,10 @@ nfs_getpages(void *v)
 			if (!mutex_tryenter(&np->n_commitlock)) {

 				/*
-				 * Since PGO_LOCKED is set, we need to unbusy
-				 * all pages fetched by genfs_getpages() above,
 				 * tell the caller that there are no pages
 				 * available and put back original pgs array.
 				 */

-				if (nobusy == false)
-					uvm_page_unbusy(pgs, npages);
 				*ap->a_count = 0;
 				memcpy(pgs, opgs,
 				    npages * sizeof(struct vm_pages *));
--- a/sys/uvm/uvm_aobj.c
+++ b/sys/uvm/uvm_aobj.c
@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_aobj.c,v 1.140 2020/05/15 22:27:04 ad Exp $	*/
+/*	$NetBSD: uvm_aobj.c,v 1.141 2020/05/17 19:38:17 ad Exp $	*/

 /*
 * Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and
@ -38,7 +38,7 @@
 */

 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_aobj.c,v 1.140 2020/05/15 22:27:04 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_aobj.c,v 1.141 2020/05/17 19:38:17 ad Exp $");

 #ifdef _KERNEL_OPT
 #include "opt_uvmhist.h"
@ -250,6 +250,8 @@ uao_find_swslot(struct uvm_object *uobj, int pageidx)
 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
 	struct uao_swhash_elt *elt;

+	KASSERT(UVM_OBJ_IS_AOBJ(uobj));
+
 	/*
 	 * if noswap flag is set, then we never return a slot
 	 */
@ -293,6 +295,7 @@ uao_set_swslot(struct uvm_object *uobj, int pageidx, int slot)
 	    (uintptr_t)aobj, pageidx, slot, 0);

 	KASSERT(rw_write_held(uobj->vmobjlock) || uobj->uo_refs == 0);
+	KASSERT(UVM_OBJ_IS_AOBJ(uobj));

 	/*
 	 * if noswap flag is set, then we can't set a non-zero slot.
@ -365,6 +368,7 @@ uao_free(struct uvm_aobj *aobj)
 {
 	struct uvm_object *uobj = &aobj->u_obj;

+	KASSERT(UVM_OBJ_IS_AOBJ(uobj));
 	KASSERT(rw_write_held(uobj->vmobjlock));
 	uao_dropswap_range(uobj, 0, 0);
 	rw_exit(uobj->vmobjlock);
@ -665,6 +669,7 @@ uao_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
 	voff_t curoff;
 	UVMHIST_FUNC("uao_put"); UVMHIST_CALLED(maphist);

+	KASSERT(UVM_OBJ_IS_AOBJ(uobj));
 	KASSERT(rw_write_held(uobj->vmobjlock));

 	if (flags & PGO_ALLPAGES) {
@ -808,13 +813,13 @@ uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps,

 	/*
 	 * the object must be locked.  it can only be a read lock when
-	 * processing a read fault with PGO_LOCKED | PGO_NOBUSY.
+	 * processing a read fault with PGO_LOCKED.
 	 */

+	KASSERT(UVM_OBJ_IS_AOBJ(uobj));
 	KASSERT(rw_lock_held(uobj->vmobjlock));
 	KASSERT(rw_write_held(uobj->vmobjlock) ||
-	   ((~flags & (PGO_LOCKED | PGO_NOBUSY)) == 0 &&
-	   (access_type & VM_PROT_WRITE) == 0));
+	   ((flags & PGO_LOCKED) != 0 && (access_type & VM_PROT_WRITE) == 0));

 	/*
 	 * get number of pages
@ -827,7 +832,7 @@ uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps,
 	 */

 	if (flags & PGO_LOCKED) {
-		krw_t lktype = rw_lock_op(uobj->vmobjlock);
+		struct uvm_page_array a;

 		/*
 		 * step 1a: get pages that are already resident.   only do
@ -835,77 +840,56 @@ uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps,
 		 * time through).
 		 */

+		uvm_page_array_init(&a);
 		done = true;	/* be optimistic */
 		gotpages = 0;	/* # of pages we got so far */
-		for (lcv = 0, current_offset = offset ; lcv < maxpages ;
-		    lcv++, current_offset += PAGE_SIZE) {
-			/* do we care about this page?  if not, skip it */
-			if (pps[lcv] == PGO_DONTCARE)
-				continue;
-			ptmp = uvm_pagelookup(uobj, current_offset);
-
-			/*
- 			 * if page is new, attempt to allocate the page,
-			 * zero-fill'd.  we can only do this if the caller
-			 * holds a write lock.
- 			 */
-
-			if (ptmp == NULL && lktype == RW_WRITER &&
-			    uao_find_swslot(uobj,
-			    current_offset >> PAGE_SHIFT) == 0) {
-				ptmp = uao_pagealloc(uobj, current_offset,
-				    UVM_FLAG_COLORMATCH|UVM_PGA_ZERO);
-				if (ptmp) {
-					/* new page */
-					ptmp->flags &= ~(PG_FAKE);
-					uvm_pagemarkdirty(ptmp,
-					    UVM_PAGE_STATUS_UNKNOWN);
-					if ((flags & PGO_NOBUSY) != 0)
-						ptmp->flags &= ~PG_BUSY;
-					goto gotpage;
-				}
+		for (lcv = 0; lcv < maxpages; lcv++) {
+			ptmp = uvm_page_array_fill_and_peek(&a, uobj,
+			    offset + (lcv << PAGE_SHIFT), maxpages, 0);
+			if (ptmp == NULL) {
+				break;
 			}
+			KASSERT(ptmp->offset >= offset);
+			lcv = (ptmp->offset - offset) >> PAGE_SHIFT;
+			if (lcv >= maxpages) {
+				break;
+			}
+			uvm_page_array_advance(&a);

 			/*
 			 * to be useful must get a non-busy page
 			 */

-			if (ptmp == NULL || (ptmp->flags & PG_BUSY) != 0) {
-				if (lcv == centeridx ||
-				    (flags & PGO_ALLPAGES) != 0)
-					/* need to do a wait or I/O! */
-					done = false;
+			if ((ptmp->flags & PG_BUSY) != 0) {
 				continue;
 			}

 			/*
-			 * useful page: busy/lock it and plug it in our
-			 * result array
+			 * useful page: plug it in our result array
 			 */
+
 			KASSERT(uvm_pagegetdirty(ptmp) !=
 			    UVM_PAGE_STATUS_CLEAN);
-
-			if ((flags & PGO_NOBUSY) == 0) {
-				/* caller must un-busy this page */
-				ptmp->flags |= PG_BUSY;
-				UVM_PAGE_OWN(ptmp, "uao_get1");
-			}
-gotpage:
 			pps[lcv] = ptmp;
 			gotpages++;
 		}
+		uvm_page_array_fini(&a);

 		/*
 		 * step 1b: now we've either done everything needed or we
 		 * to unlock and do some waiting or I/O.
 		 */

+		if ((flags & PGO_ALLPAGES) != 0) {
+			for (int i = 0; i < maxpages; i++) {
+				done &= (pps[i] != NULL);
+			}
+		} else {
+			done = (pps[centeridx] != NULL);
+		}
 		UVMHIST_LOG(pdhist, "<- done (done=%jd)", done, 0,0,0);
 		*npagesp = gotpages;
-		if (done)
-			return 0;
-		else
-			return EBUSY;
+		return done ? 0 : EBUSY;
 	}

 	/*
@ -1117,6 +1101,8 @@ uao_dropswap(struct uvm_object *uobj, int pageidx)
 {
 	int slot;

+	KASSERT(UVM_OBJ_IS_AOBJ(uobj));
+
 	slot = uao_set_swslot(uobj, pageidx, 0);
 	if (slot) {
 		uvm_swap_free(slot, 1);
@ -1340,6 +1326,7 @@ uao_dropswap_range(struct uvm_object *uobj, voff_t start, voff_t end)
 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
 	int swpgonlydelta = 0;

+	KASSERT(UVM_OBJ_IS_AOBJ(uobj));
 	KASSERT(rw_write_held(uobj->vmobjlock));

 	if (end == 0) {
--- a/sys/uvm/uvm_fault.c
+++ b/sys/uvm/uvm_fault.c
@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_fault.c,v 1.226 2020/05/15 22:35:05 ad Exp $	*/
+/*	$NetBSD: uvm_fault.c,v 1.227 2020/05/17 19:38:17 ad Exp $	*/

 /*
 * Copyright (c) 1997 Charles D. Cranor and Washington University.
@ -32,7 +32,7 @@
 */

 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_fault.c,v 1.226 2020/05/15 22:35:05 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_fault.c,v 1.227 2020/05/17 19:38:17 ad Exp $");

 #include "opt_uvmhist.h"

@ -43,6 +43,7 @@ __KERNEL_RCSID(0, "$NetBSD: uvm_fault.c,v 1.226 2020/05/15 22:35:05 ad Exp $");
 #include <sys/mman.h>

 #include <uvm/uvm.h>
+#include <uvm/uvm_pdpolicy.h>

 /*
 *
@ -569,11 +570,7 @@ uvmfault_promote(struct uvm_faultinfo *ufi,
 	} else if (uobjpage != PGO_DONTCARE) {
 		/* object-backed COW */
 		opg = uobjpage;
-		if ((uobjpage->flags & PG_BUSY) != 0) {
-			KASSERT(rw_write_held(opg->uobject->vmobjlock));
-		} else {
-			KASSERT(rw_read_held(opg->uobject->vmobjlock));
-		}
+		KASSERT(rw_lock_held(opg->uobject->vmobjlock));
 	} else {
 		/* ZFOD */
 		opg = NULL;
@ -627,10 +624,6 @@ uvmfault_promote(struct uvm_faultinfo *ufi,
 		}

 		/* unlock and fail ... */
-		if (uobjpage != PGO_DONTCARE &&
-		    (uobjpage->flags & PG_BUSY) != 0) {
-			uvm_page_unbusy(&uobjpage, 1);
-		}
 		uvmfault_unlockall(ufi, amap, uobj);
 		if (!uvm_reclaimable()) {
 			UVMHIST_LOG(maphist, "out of VM", 0,0,0,0);
@ -655,6 +648,17 @@ uvmfault_promote(struct uvm_faultinfo *ufi,
 	amap_add(&ufi->entry->aref, ufi->orig_rvaddr - ufi->entry->start, anon,
 	    oanon != NULL);

+	/*
+	 * from this point on am_lock won't be dropped until the page is
+	 * entered, so it's safe to unbusy the page up front.
+	 *
+	 * uvm_fault_{upper,lower}_done will activate or enqueue the page.
+	 */
+
+	pg = anon->an_page;
+	pg->flags &= ~(PG_BUSY|PG_FAKE);
+	UVM_PAGE_OWN(pg, NULL);
+
 	*nanon = anon;
 	error = 0;
 done:
@ -1088,6 +1092,17 @@ uvm_fault_check(
 		return EFAULT;
 	}

+	/*
+	 * for a case 2B fault waste no time on adjacent pages because
+	 * they are likely already entered.
+	 */
+
+	if (uobj != NULL && amap != NULL &&
+	    (flt->access_type & VM_PROT_WRITE) != 0) {
+		/* wide fault (!narrow) */
+		flt->narrow = true;
+	}
+
 	/*
 	 * establish range of interest based on advice from mapper
 	 * and then clip to fit map entry.   note that we only want
@ -1338,14 +1353,6 @@ uvm_fault_upper_lookup(
 	UVMHIST_LOG(maphist, "  shadowed=%jd, will_get=%jd", shadowed,
 	    (ufi->entry->object.uvm_obj && shadowed != false),0,0);

-	/*
-	 * note that if we are really short of RAM we could sleep in the above
-	 * call to pmap_enter with everything locked.   bad?
-	 *
-	 * XXX Actually, that is bad; pmap_enter() should just fail in that
-	 * XXX case.  --thorpej
-	 */
-
 	return 0;
 }

@ -1370,17 +1377,16 @@ uvm_fault_upper_neighbor(
 	KASSERT(uvm_pagegetdirty(pg) != UVM_PAGE_STATUS_CLEAN);

 	/*
-	 * in the read-locked case, it's not possible for this to be a new
-	 * page, therefore it's enqueued already.  there wasn't a direct
-	 * fault on the page, so avoid the cost of re-enqueuing it unless
-	 * write-locked.
+	 * there wasn't a direct fault on the page, so avoid the cost of
+	 * activating it.
 	 */

-	if (flt->upper_lock_type == RW_WRITER) {
+	if (!uvmpdpol_pageisqueued_p(pg) && pg->wire_count == 0) {
 		uvm_pagelock(pg);
 		uvm_pageenqueue(pg);
 		uvm_pageunlock(pg);
 	}
+
 	UVMHIST_LOG(maphist,
 	    "  MAPPING: n anon: pm=%#jx, va=%#jx, pg=%#jx",
 	    (uintptr_t)ufi->orig_map->pmap, currva, (uintptr_t)pg, 0);
@ -1615,13 +1621,10 @@ uvm_fault_upper_promote(
 	default:
 		return error;
 	}
+	pg = anon->an_page;

 	KASSERT(anon->an_lock == oanon->an_lock);
-
-	/* uvm_fault_upper_done will activate or enqueue the page */
-	pg = anon->an_page;
-	pg->flags &= ~(PG_BUSY|PG_FAKE);
-	UVM_PAGE_OWN(pg, NULL);
+	KASSERT((pg->flags & (PG_BUSY | PG_FAKE)) == 0);

 	/* deref: can not drop to zero here by defn! */
 	KASSERT(oanon->an_ref > 1);
@ -1714,11 +1717,9 @@ uvm_fault_upper_enter(
 		 * we just promoted.
 		 */

-		if (flt->upper_lock_type == RW_WRITER) {
-			uvm_pagelock(pg);
-			uvm_pageenqueue(pg);
-			uvm_pageunlock(pg);
-		}
+		uvm_pagelock(pg);
+		uvm_pageenqueue(pg);
+		uvm_pageunlock(pg);

 		/*
 		 * No need to undo what we did; we can simply think of
@ -1768,15 +1769,11 @@ uvm_fault_upper_done(
 	 * ... update the page queues.
 	 */

-	uvm_pagelock(pg);
 	if (wire_paging) {
+		uvm_pagelock(pg);
 		uvm_pagewire(pg);
-	} else {
-		uvm_pageactivate(pg);
-	}
-	uvm_pageunlock(pg);
+		uvm_pageunlock(pg);

-	if (wire_paging) {
 		/*
 		 * since the now-wired page cannot be paged out,
 		 * release its swap resources for others to use.
@ -1786,6 +1783,15 @@ uvm_fault_upper_done(

 		uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY);
 		uvm_anon_dropswap(anon);
+	} else if (uvmpdpol_pageactivate_p(pg)) {
+		/*
+		 * avoid re-activating the page unless needed,
+		 * to avoid false sharing on multiprocessor.
+		 */
+
+		uvm_pagelock(pg);
+		uvm_pageactivate(pg);
+		uvm_pageunlock(pg);
 	}
 }

@ -1808,7 +1814,6 @@ uvm_fault_lower_upgrade(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
 	 */

 	if (__predict_true(flt->lower_lock_type == RW_WRITER)) {
-		KASSERT(uobjpage == NULL || (uobjpage->flags & PG_BUSY) != 0);
 		return 0;
 	}

@ -1827,18 +1832,6 @@ uvm_fault_lower_upgrade(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
 	}
 	cpu_count(CPU_COUNT_FLTUP, 1);
 	KASSERT(flt->lower_lock_type == rw_lock_op(uobj->vmobjlock));
-
-	/*
-	 * finally, if a page was supplied, assert that it's not busy
-	 * (can't be with a reader lock) and then mark it busy now that
-	 * we have a writer lock.
-	 */
-
-	if (uobjpage != NULL) {
-		KASSERT((uobjpage->flags & PG_BUSY) == 0);
-		uobjpage->flags |= PG_BUSY;
-		UVM_PAGE_OWN(uobjpage, "upgrdlwr");
-	}
 	return 0;
 }

@ -1899,17 +1892,8 @@ uvm_fault_lower(
 	 */
 	KASSERT(amap == NULL ||
 	    rw_lock_op(amap->am_lock) == flt->upper_lock_type);
-	if (flt->lower_lock_type == RW_WRITER) {
-		KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock));
-		KASSERTMSG(uobjpage == NULL ||
-		    (uobjpage->flags & PG_BUSY) != 0,
-		    "page %p should be busy", uobjpage);
-	} else {
-		KASSERT(uobj == NULL || rw_read_held(uobj->vmobjlock));
-		KASSERTMSG(uobjpage == NULL ||
-		    (uobjpage->flags & PG_BUSY) == 0,
-		    "page %p should not be busy", uobjpage);
-	}
+	KASSERT(uobj == NULL ||
+	    rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type);

 	/*
 	 * note that uobjpage can not be PGO_DONTCARE at this point.  we now
@ -1952,13 +1936,8 @@ uvm_fault_lower(
 	 */
 	KASSERT(amap == NULL ||
 	    rw_lock_op(amap->am_lock) == flt->upper_lock_type);
-	if (flt->lower_lock_type == RW_WRITER) {
-		KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock));
-		KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) != 0);
-	} else {
-		KASSERT(uobj == NULL || rw_read_held(uobj->vmobjlock));
-		KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) == 0);
-	}
+	KASSERT(uobj == NULL ||
+	    rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type);

 	/*
 	 * notes:
@ -1966,10 +1945,12 @@ uvm_fault_lower(
 	 *  - at this point uobjpage can not be PG_RELEASED (since we checked
 	 *  for it above)
 	 *  - at this point uobjpage could be waited on (handle later)
+	 *  - uobjpage can be from a different object if tmpfs (vnode vs UAO)
 	 */

 	KASSERT(uobjpage != NULL);
-	KASSERT(uobj == NULL || uobj == uobjpage->uobject);
+	KASSERT(uobj == NULL ||
+	    uobjpage->uobject->vmobjlock == uobj->vmobjlock);
 	KASSERT(uobj == NULL || !UVM_OBJ_IS_CLEAN(uobjpage->uobject) ||
 	    uvm_pagegetdirty(uobjpage) == UVM_PAGE_STATUS_CLEAN);

@ -1997,16 +1978,13 @@ uvm_fault_lower_lookup(
 	struct uvm_object *uobj = ufi->entry->object.uvm_obj;
 	int lcv, gotpages;
 	vaddr_t currva;
+	bool entered;
 	UVMHIST_FUNC("uvm_fault_lower_lookup"); UVMHIST_CALLED(maphist);

 	rw_enter(uobj->vmobjlock, flt->lower_lock_type);

 	/*
 	 * Locked: maps(read), amap(if there), uobj
-	 *
-	 * if we have a read lock on the object, do a PGO_NOBUSY get, which
-	 * will return us pages with PG_BUSY clear.  if a write lock is held
-	 * pages will be returned with PG_BUSY set.
 	 */

 	cpu_count(CPU_COUNT_FLTLGET, 1);
@ -2015,7 +1993,7 @@ uvm_fault_lower_lookup(
 	    ufi->entry->offset + flt->startva - ufi->entry->start,
 	    pages, &gotpages, flt->centeridx,
 	    flt->access_type & MASK(ufi->entry), ufi->entry->advice,
-	    PGO_LOCKED | (flt->lower_lock_type == RW_WRITER ? 0 : PGO_NOBUSY));
+	    PGO_LOCKED);

 	KASSERT(rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type);

@ -2028,6 +2006,7 @@ uvm_fault_lower_lookup(
 		return;
 	}

+	entered = false;
 	currva = flt->startva;
 	for (lcv = 0; lcv < flt->npages; lcv++, currva += PAGE_SIZE) {
 		struct vm_page *curpg;
@ -2036,32 +2015,31 @@ uvm_fault_lower_lookup(
 		if (curpg == NULL || curpg == PGO_DONTCARE) {
 			continue;
 		}
-		KASSERT(curpg->uobject == uobj);
-
-		if (flt->lower_lock_type == RW_WRITER) {
-			KASSERT(rw_write_held(uobj->vmobjlock));
-			KASSERTMSG((curpg->flags & PG_BUSY) != 0,
-			    "page %p should be busy", curpg);
-		} else {
-			KASSERT(rw_read_held(uobj->vmobjlock));
-			KASSERTMSG((curpg->flags & PG_BUSY) == 0,
-			    "page %p should not be busy", curpg);
-		}

 		/*
-		 * if center page is resident and not PG_BUSY|PG_RELEASED
-		 * and !PGO_NOBUSY, then pgo_get made it PG_BUSY for us and
-		 * gave us a handle to it.
+		 * in the case of tmpfs, the pages might be from a different
+		 * uvm_object.  just make sure that they have the same lock.
+		 */
+
+		KASSERT(curpg->uobject->vmobjlock == uobj->vmobjlock);
+		KASSERT((curpg->flags & PG_BUSY) == 0);
+
+		/*
+		 * leave the centre page for later.  don't screw with
+		 * existing mappings (needless & expensive).
 		 */

 		if (lcv == flt->centeridx) {
 			UVMHIST_LOG(maphist, "  got uobjpage (%#jx) "
 			    "with locked get", (uintptr_t)curpg, 0, 0, 0);
-		} else {
+		} else if (!pmap_extract(ufi->orig_map->pmap, currva, NULL)) {
 			uvm_fault_lower_neighbor(ufi, flt, currva, curpg);
+			entered = true;
 		}
 	}
-	pmap_update(ufi->orig_map->pmap);
+	if (entered) {
+		pmap_update(ufi->orig_map->pmap);
+	}
 }

 /*
@ -2082,20 +2060,17 @@ uvm_fault_lower_neighbor(
 	 * calling pgo_get with PGO_LOCKED returns us pages which
 	 * are neither busy nor released, so we don't need to check
 	 * for this.  we can just directly enter the pages.
-	 */
-
-	/*
-	 * in the read-locked case, it's not possible for this to be a new
-	 * page.  it must be cached with the object and enqueued already.
+	 *
 	 * there wasn't a direct fault on the page, so avoid the cost of
-	 * re-enqueuing it.
+	 * activating it.
 	 */

-	if (flt->lower_lock_type == RW_WRITER) {
+	if (!uvmpdpol_pageisqueued_p(pg) && pg->wire_count == 0) {
 		uvm_pagelock(pg);
 		uvm_pageenqueue(pg);
 		uvm_pageunlock(pg);
 	}
+
 	UVMHIST_LOG(maphist,
 	    "  MAPPING: n obj: pm=%#jx, va=%#jx, pg=%#jx",
 	    (uintptr_t)ufi->orig_map->pmap, currva, (uintptr_t)pg, 0);
@ -2112,20 +2087,7 @@ uvm_fault_lower_neighbor(
 	KASSERT((pg->flags & PG_RELEASED) == 0);
 	KASSERT(!UVM_OBJ_IS_CLEAN(pg->uobject) ||
 	    uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_CLEAN);
-
-	/*
-	 * if a write lock was held on the object, the pages have been
-	 * busied.  unbusy them now, as we are about to enter and then
-	 * forget about them.
-	 */
-
-	if (flt->lower_lock_type == RW_WRITER) {
-		KASSERT((pg->flags & PG_BUSY) != 0);
-		pg->flags &= ~(PG_BUSY);
-		UVM_PAGE_OWN(pg, NULL);
-	} else {
-		KASSERT((pg->flags & PG_BUSY) == 0);
-	}
+	KASSERT((pg->flags & PG_BUSY) == 0);
 	KASSERT(rw_lock_op(pg->uobject->vmobjlock) == flt->lower_lock_type);

 	const vm_prot_t mapprot =
@ -2253,34 +2215,38 @@ uvm_fault_lower_io(
 	}

 	/*
-	 * didn't get the lock?   release the page and retry.
+	 * unbusy/release the page.
+	 */
+
+	if ((pg->flags & PG_RELEASED) == 0) {
+		pg->flags &= ~PG_BUSY;
+		uvm_pagelock(pg);
+		uvm_pagewakeup(pg);
+		uvm_pageunlock(pg);
+		UVM_PAGE_OWN(pg, NULL);
+	} else {
+		cpu_count(CPU_COUNT_FLTPGRELE, 1);
+		uvm_pagefree(pg);
+	}
+
+	/*
+	 * didn't get the lock?   retry.
 	 */

 	if (locked == false) {
 		UVMHIST_LOG(maphist,
 		    "  wasn't able to relock after fault: retry",
 		    0,0,0,0);
-		if ((pg->flags & PG_RELEASED) == 0) {
-			pg->flags &= ~PG_BUSY;
-			uvm_pagelock(pg);
-			uvm_pagewakeup(pg);
-			uvm_pageunlock(pg);
-			UVM_PAGE_OWN(pg, NULL);
-		} else {
-			cpu_count(CPU_COUNT_FLTPGRELE, 1);
-			uvm_pagefree(pg);
-		}
 		rw_exit(uobj->vmobjlock);
 		return ERESTART;
 	}

 	/*
-	 * we have the data in pg which is busy and
-	 * not released.  we are holding object lock (so the page
+	 * we have the data in pg.  we are holding object lock (so the page
 	 * can't be released on us).
 	 */

-	/* locked: maps(read), amap(if !null), uobj, pg */
+	/* locked: maps(read), amap(if !null), uobj */

 	*ruobj = uobj;
 	*ruobjpage = pg;
@ -2328,12 +2294,7 @@ uvm_fault_lower_direct(
 		uvm_fault_lower_direct_loan(ufi, flt, uobj, &pg, &uobjpage);
 	}
 	KASSERT(pg == uobjpage);
-
-	if (flt->lower_lock_type == RW_READER) {
-		KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) == 0);
-	} else {
-		KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) != 0);
-	}
+	KASSERT((pg->flags & PG_BUSY) == 0);
 	return uvm_fault_lower_enter(ufi, flt, uobj, NULL, pg);
 }

@ -2375,16 +2336,6 @@ uvm_fault_lower_direct_loan(
 		pg = uvm_loanbreak(uobjpage);
 		if (pg == NULL) {

-			/*
-			 * drop ownership of page, it can't be released
-			 */
-
-			uvm_pagelock(uobjpage);
-			uvm_pagewakeup(uobjpage);
-			uvm_pageunlock(uobjpage);
-			uobjpage->flags &= ~PG_BUSY;
-			UVM_PAGE_OWN(uobjpage, NULL);
-
 			uvmfault_unlockall(ufi, amap, uobj);
 			UVMHIST_LOG(maphist,
 			  "  out of RAM breaking loan, waiting",
@ -2395,6 +2346,17 @@ uvm_fault_lower_direct_loan(
 		}
 		*rpg = pg;
 		*ruobjpage = pg;
+
+		/*
+		 * drop ownership of page while still holding object lock,
+		 * which won't be dropped until the page is entered.
+		 */
+
+		uvm_pagelock(pg);
+		uvm_pagewakeup(pg);
+		uvm_pageunlock(pg);
+		pg->flags &= ~PG_BUSY;
+		UVM_PAGE_OWN(pg, NULL);
 	}
 	return 0;
 }
@ -2426,6 +2388,8 @@ uvm_fault_lower_promote(
 		return error;
 	}
 	KASSERT(rw_write_held(amap->am_lock));
+	KASSERT(uobj == NULL ||
+	    rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type);

 	/*
 	 * If we are going to promote the data to an anon we
@ -2446,11 +2410,6 @@ uvm_fault_lower_promote(
 	/*
 	 * Fill in the data.
 	 */
-	if (flt->lower_lock_type == RW_READER) {
-		KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) == 0);
-	} else {
-		KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) != 0);
-	}

 	if (uobjpage != PGO_DONTCARE) {
 		cpu_count(CPU_COUNT_FLT_PRCOPY, 1);
@ -2467,19 +2426,6 @@ uvm_fault_lower_promote(
 			 */
 		}

-		/*
-		 * dispose of uobjpage.  it can't be PG_RELEASED
-		 * since we still hold the object lock.
-		 */
-
-		if ((uobjpage->flags & PG_BUSY) != 0) {
-			uobjpage->flags &= ~PG_BUSY;
-			uvm_pagelock(uobjpage);
-			uvm_pagewakeup(uobjpage);
-			uvm_pageunlock(uobjpage);
-			UVM_PAGE_OWN(uobjpage, NULL);
-		}
-
 		UVMHIST_LOG(maphist,
 		    "  promote uobjpage %#jx to anon/page %#jx/%#jx",
 		    (uintptr_t)uobjpage, (uintptr_t)anon, (uintptr_t)pg, 0);
@ -2525,18 +2471,20 @@ uvm_fault_lower_enter(
 	 *
 	 * Note: pg is either the uobjpage or the new page in the new anon.
 	 */
+
 	KASSERT(amap == NULL ||
 	    rw_lock_op(amap->am_lock) == flt->upper_lock_type);
+	KASSERT(uobj == NULL ||
+	    rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type);
 	KASSERT(anon == NULL || anon->an_lock == amap->am_lock);
-	if (flt->lower_lock_type == RW_WRITER) {
-		KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock));
-		KASSERTMSG((pg->flags & PG_BUSY) != 0,
-		    "page %p should be busy", pg);
-	} else {
-		KASSERT(uobj == NULL || rw_read_held(uobj->vmobjlock));
-		KASSERTMSG(anon != NULL || (pg->flags & PG_BUSY) == 0,
-		    "page %p should not be busy", pg);
-	}
+
+	/*
+	 * note that pg can't be PG_RELEASED or PG_BUSY since we did
+	 * not drop the object lock since the last time we checked.
+	 */
+
+	KASSERT((pg->flags & PG_RELEASED) == 0);
+	KASSERT((pg->flags & PG_BUSY) == 0);

 	/*
 	 * all resources are present.   we can now map it in and free our
@ -2573,23 +2521,11 @@ uvm_fault_lower_enter(
 		 * we just promoted the page.
 		 */

-		if (anon != NULL || flt->lower_lock_type == RW_WRITER) {
+		if (anon != NULL) {
 			uvm_pagelock(pg);
 			uvm_pageenqueue(pg);
 			uvm_pagewakeup(pg);
 			uvm_pageunlock(pg);
-		} else {
-			KASSERT((pg->flags & PG_BUSY) == 0);
-		}
-
-		/*
-		 * note that pg can't be PG_RELEASED since we did not drop
-		 * the object lock since the last time we checked.
-		 */
-		KASSERT((pg->flags & PG_RELEASED) == 0);
-		if ((pg->flags & PG_BUSY) != 0) {
-			pg->flags &= ~(PG_BUSY|PG_FAKE);
-			UVM_PAGE_OWN(pg, NULL);
 		}

 		uvmfault_unlockall(ufi, amap, uobj);
@ -2606,20 +2542,6 @@ uvm_fault_lower_enter(
 	}

 	uvm_fault_lower_done(ufi, flt, uobj, pg);
-
-	/*
-	 * note that pg can't be PG_RELEASED since we did not drop the object
-	 * lock since the last time we checked.
-	 */
-	KASSERT((pg->flags & PG_RELEASED) == 0);
-	if ((pg->flags & PG_BUSY) != 0) {
-		uvm_pagelock(pg);
-		uvm_pagewakeup(pg);
-		uvm_pageunlock(pg);
-		pg->flags &= ~(PG_BUSY|PG_FAKE);
-		UVM_PAGE_OWN(pg, NULL);
-	}
-
 	pmap_update(ufi->orig_map->pmap);
 	uvmfault_unlockall(ufi, amap, uobj);

@ -2636,13 +2558,13 @@ uvm_fault_lower_done(
 	struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt,
 	struct uvm_object *uobj, struct vm_page *pg)
 {
-	bool dropswap = false;

 	UVMHIST_FUNC("uvm_fault_lower_done"); UVMHIST_CALLED(maphist);

-	uvm_pagelock(pg);
 	if (flt->wire_paging) {
+		uvm_pagelock(pg);
 		uvm_pagewire(pg);
+		uvm_pageunlock(pg);
 		if (pg->flags & PG_AOBJ) {

 			/*
@ -2650,19 +2572,26 @@ uvm_fault_lower_done(
 			 * release its swap resources for others to use.
 			 * since an aobj page with no swap cannot be clean,
 			 * mark it dirty now.
+			 *
+			 * use pg->uobject here.  if the page is from a
+			 * tmpfs vnode, the pages are backed by its UAO and
+			 * not the vnode.
 			 */

 			KASSERT(uobj != NULL);
+			KASSERT(uobj->vmobjlock == pg->uobject->vmobjlock);
 			uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY);
-			dropswap = true;
+			uao_dropswap(pg->uobject, pg->offset >> PAGE_SHIFT);
 		}
-	} else {
-		uvm_pageactivate(pg);
-	}
-	uvm_pageunlock(pg);
+	} else if (uvmpdpol_pageactivate_p(pg)) {
+		/*
+		 * avoid re-activating the page unless needed,
+		 * to avoid false sharing on multiprocessor.
+		 */

-	if (dropswap) {
-		uao_dropswap(uobj, pg->offset >> PAGE_SHIFT);
+		uvm_pagelock(pg);
+		uvm_pageactivate(pg);
+		uvm_pageunlock(pg);
 	}
 }

--- a/sys/uvm/uvm_loan.c
+++ b/sys/uvm/uvm_loan.c
@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_loan.c,v 1.100 2020/03/22 18:32:42 ad Exp $	*/
+/*	$NetBSD: uvm_loan.c,v 1.101 2020/05/17 19:38:17 ad Exp $	*/

 /*
 * Copyright (c) 1997 Charles D. Cranor and Washington University.
@ -32,7 +32,7 @@
 */

 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.100 2020/03/22 18:32:42 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.101 2020/05/17 19:38:17 ad Exp $");

 #include <sys/param.h>
 #include <sys/systm.h>
@ -109,7 +109,7 @@ static int	uvm_loanuobj(struct uvm_faultinfo *, void ***,
 static int	uvm_loanzero(struct uvm_faultinfo *, void ***, int);
 static void	uvm_unloananon(struct vm_anon **, int);
 static void	uvm_unloanpage(struct vm_page **, int);
-static int	uvm_loanpage(struct vm_page **, int);
+static int	uvm_loanpage(struct vm_page **, int, bool);


 /*
@ -442,12 +442,11 @@ uvm_loananon(struct uvm_faultinfo *ufi, void ***output, int flags,
 *
 * => pages should be object-owned and the object should be locked.
 * => in the case of error, the object might be unlocked and relocked.
- * => caller should busy the pages beforehand.
- * => pages will be unbusied.
+ * => pages will be unbusied (if busied is true).
 * => fail with EBUSY if meet a wired page.
 */
 static int
-uvm_loanpage(struct vm_page **pgpp, int npages)
+uvm_loanpage(struct vm_page **pgpp, int npages, bool busied)
 {
 	int i;
 	int error = 0;
@ -461,7 +460,7 @@ uvm_loanpage(struct vm_page **pgpp, int npages)
 		KASSERT(pg->uobject == pgpp[0]->uobject);
 		KASSERT(!(pg->flags & (PG_RELEASED|PG_PAGEOUT)));
 		KASSERT(rw_write_held(pg->uobject->vmobjlock));
-		KASSERT(pg->flags & PG_BUSY);
+		KASSERT(busied == ((pg->flags & PG_BUSY) != 0));

 		if (pg->wire_count > 0) {
 			UVMHIST_LOG(loanhist, "wired %#jx", (uintptr_t)pg,
@ -479,7 +478,9 @@ uvm_loanpage(struct vm_page **pgpp, int npages)
 		uvm_pageunlock(pg);
 	}

-	uvm_page_unbusy(pgpp, npages);
+	if (busied) {
+		uvm_page_unbusy(pgpp, npages);
+	}

 	if (error) {
 		/*
@ -553,7 +554,7 @@ reget:
 				if (slock) {
 					KASSERT(npendloan > 0);
 					error = uvm_loanpage(pgpp - npendloan,
-					    npendloan);
+					    npendloan, true);
 					rw_exit(slock);
 					if (error)
 						goto fail;
@ -587,7 +588,7 @@ reget:
 		}
 		KASSERT(slock != NULL);
 		KASSERT(npendloan > 0);
-		error = uvm_loanpage(pgpp - npendloan, npendloan);
+		error = uvm_loanpage(pgpp - npendloan, npendloan, true);
 		rw_exit(slock);
 		if (error)
 			goto fail;
@ -702,36 +703,45 @@ uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va)
 		}

 		/*
-		 * didn't get the lock?   release the page and retry.
+		 * unbusy the page.
 		 */

-		if (locked == false) {
-			if (pg->flags & PG_RELEASED) {
-				uvm_pagefree(pg);
-				rw_exit(uobj->vmobjlock);
-				return (0);
-			}
+		if ((pg->flags & PG_RELEASED) == 0) {
 			uvm_pagelock(pg);
-			uvm_pageactivate(pg);
 			uvm_pagewakeup(pg);
 			uvm_pageunlock(pg);
 			pg->flags &= ~PG_BUSY;
 			UVM_PAGE_OWN(pg, NULL);
+		}
+
+		/*
+		 * didn't get the lock?   release the page and retry.
+		 */
+
+ 		if (locked == false) {
+			if (pg->flags & PG_RELEASED) {
+				uvm_pagefree(pg);
+			}
 			rw_exit(uobj->vmobjlock);
 			return (0);
 		}
 	}

-	KASSERT(uobj == pg->uobject);
+	/*
+	 * for tmpfs vnodes, the page will be from a UAO rather than
+	 * the vnode.  just check the locks match.
+	 */
+
+	KASSERT(uobj->vmobjlock == pg->uobject->vmobjlock);

 	/*
-	 * at this point we have the page we want ("pg") marked PG_BUSY for us
-	 * and we have all data structures locked.  do the loanout.  page can
-	 * not be PG_RELEASED (we caught this above).
+	 * at this point we have the page we want ("pg") and we have
+	 * all data structures locked.  do the loanout.  page can not
+	 * be PG_RELEASED (we caught this above).
 	 */

 	if ((flags & UVM_LOAN_TOANON) == 0) {
-		if (uvm_loanpage(&pg, 1)) {
+		if (uvm_loanpage(&pg, 1, false)) {
 			uvmfault_unlockall(ufi, amap, uobj);
 			return (-1);
 		}
@ -1099,7 +1109,7 @@ uvm_loan_init(void)
 * uvm_loanbreak: break loan on a uobj page
 *
 * => called with uobj locked
- * => the page should be busy
+ * => the page may be busy; if it's busy, it will be unbusied
 * => return value:
 *	newly allocated page if succeeded
 */
@ -1111,7 +1121,6 @@ uvm_loanbreak(struct vm_page *uobjpage)

 	KASSERT(uobj != NULL);
 	KASSERT(rw_write_held(uobj->vmobjlock));
-	KASSERT(uobjpage->flags & PG_BUSY);

 	/* alloc new un-owned page */
 	pg = uvm_pagealloc(NULL, 0, NULL, 0);
@ -1131,8 +1140,10 @@ uvm_loanbreak(struct vm_page *uobjpage)
 	KASSERT(uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_DIRTY);
 	pmap_page_protect(uobjpage, VM_PROT_NONE);
 	/* uobj still locked */
-	uobjpage->flags &= ~PG_BUSY;
-	UVM_PAGE_OWN(uobjpage, NULL);
+	if ((uobjpage->flags & PG_BUSY) != 0) {
+		uobjpage->flags &= ~PG_BUSY;
+		UVM_PAGE_OWN(uobjpage, NULL);
+	}

 	/*
 	 * if the page is no longer referenced by
--- a/sys/uvm/uvm_page.h
+++ b/sys/uvm/uvm_page.h
@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_page.h,v 1.102 2020/03/17 18:31:39 ad Exp $	*/
+/*	$NetBSD: uvm_page.h,v 1.103 2020/05/17 19:38:17 ad Exp $	*/

 /*
 * Copyright (c) 1997 Charles D. Cranor and Washington University.
@ -151,20 +151,18 @@
 *
 * On the ordering of fields:
 *
- * The fields most heavily used by the page allocator and uvmpdpol are
- * clustered together at the start of the structure, so that while under
- * global lock it's more likely that only one cache line for each page need
- * be touched.
+ * The fields most heavily used during fault processing are clustered
+ * together at the start of the structure to reduce cache misses.
+ * XXX This entire thing should be shrunk to fit in one cache line.
 */

 struct vm_page {
+	/* _LP64: first cache line */
 	union {
 		TAILQ_ENTRY(vm_page) queue;	/* w: wired page queue
 						 * or uvm_pglistalloc output */
 		LIST_ENTRY(vm_page) list;	/* f: global free page queue */
 	} pageq;
-	TAILQ_ENTRY(vm_page)	pdqueue;	/* p: pagedaemon queue */
-	kmutex_t		interlock;	/* s: lock on identity */
 	uint32_t		pqflags;	/* i: pagedaemon flags */
 	uint32_t		flags;		/* o: object flags */
 	paddr_t			phys_addr;	/* o: physical address of pg */
@ -174,6 +172,10 @@ struct vm_page {
 	struct uvm_object	*uobject;	/* o,i: object */
 	voff_t			offset;		/* o: offset into object */

+	/* _LP64: second cache line */
+	kmutex_t		interlock;	/* s: lock on identity */
+	TAILQ_ENTRY(vm_page)	pdqueue;	/* p: pagedaemon queue */
+
 #ifdef __HAVE_VM_PAGE_MD
 	struct vm_page_md	mdpage;		/* ?: pmap-specific data */
 #endif
--- a/sys/uvm/uvm_pager.h
+++ b/sys/uvm/uvm_pager.h
@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_pager.h,v 1.47 2020/03/22 18:32:42 ad Exp $	*/
+/*	$NetBSD: uvm_pager.h,v 1.48 2020/05/17 19:38:17 ad Exp $	*/

 /*
 * Copyright (c) 1997 Charles D. Cranor and Washington University.
@ -152,7 +152,6 @@ struct uvm_pagerops {
 #define PGO_JOURNALLOCKED 0x020	/* journal is already locked [get/put] */
 #define PGO_LOCKED	0x040	/* fault data structures are locked [get] */
 #define PGO_BUSYFAIL	0x080	/* fail if a page is busy [put] */
-#define PGO_NOBUSY	0x100	/* don't busy returned pages (read locked) */
 #define PGO_OVERWRITE	0x200	/* pages will be overwritten before unlocked */
 #define PGO_PASTEOF	0x400	/* allow allocation of pages past EOF */
 #define PGO_NOBLOCKALLOC 0x800	/* backing block allocation is not needed */
--- a/sys/uvm/uvm_pdpolicy.h
+++ b/sys/uvm/uvm_pdpolicy.h
@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_pdpolicy.h,v 1.7 2020/02/23 15:46:43 ad Exp $	*/
+/*	$NetBSD: uvm_pdpolicy.h,v 1.8 2020/05/17 19:38:17 ad Exp $	*/

 /*-
 * Copyright (c)2005, 2006 YAMAMOTO Takashi,
@ -48,6 +48,7 @@ void uvmpdpol_pageactivate(struct vm_page *);
 void uvmpdpol_pagedeactivate(struct vm_page *);
 void uvmpdpol_pagedequeue(struct vm_page *);
 void uvmpdpol_pageenqueue(struct vm_page *);
+bool uvmpdpol_pageactivate_p(struct vm_page *);
 bool uvmpdpol_pageisqueued_p(struct vm_page *);
 void uvmpdpol_pagerealize(struct vm_page *);
 void uvmpdpol_anfree(struct vm_anon *);
--- a/sys/uvm/uvm_pdpolicy_clock.c
+++ b/sys/uvm/uvm_pdpolicy_clock.c
@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_pdpolicy_clock.c,v 1.36 2020/04/02 16:29:30 maxv Exp $	*/
+/*	$NetBSD: uvm_pdpolicy_clock.c,v 1.37 2020/05/17 19:38:17 ad Exp $	*/
 /*	NetBSD: uvm_pdaemon.c,v 1.72 2006/01/05 10:47:33 yamt Exp $	*/

 /*-
@ -98,7 +98,7 @@
 #else /* defined(PDSIM) */

 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clock.c,v 1.36 2020/04/02 16:29:30 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clock.c,v 1.37 2020/05/17 19:38:17 ad Exp $");

 #include <sys/param.h>
 #include <sys/proc.h>
@ -565,6 +565,29 @@ uvmpdpol_pageisqueued_p(struct vm_page *pg)
 	}
 }

+bool
+uvmpdpol_pageactivate_p(struct vm_page *pg)
+{
+	uint32_t pqflags;
+
+	/* consider intent in preference to actual state. */
+	pqflags = atomic_load_relaxed(&pg->pqflags);
+	if ((pqflags & PQ_INTENT_SET) != 0) {
+		pqflags &= PQ_INTENT_MASK;
+		return pqflags != PQ_INTENT_A && pqflags != PQ_INTENT_E;
+	} else {
+		/*
+		 * TODO: Enabling this may be too much of a big hammer,
+		 * since we do get useful information from activations.
+		 * Think about it more and maybe come up with a heuristic
+		 * or something.
+		 *
+		 * return (pqflags & PQ_ACTIVE) == 0;
+		 */
+		return true;
+	}
+}
+
 void
 uvmpdpol_estimatepageable(int *active, int *inactive)
 {
--- a/sys/uvm/uvm_pdpolicy_clockpro.c
+++ b/sys/uvm/uvm_pdpolicy_clockpro.c
@ -1,4 +1,4 @@
-/*	$NetBSD: uvm_pdpolicy_clockpro.c,v 1.25 2020/04/10 18:17:56 tsutsui Exp $	*/
+/*	$NetBSD: uvm_pdpolicy_clockpro.c,v 1.26 2020/05/17 19:38:17 ad Exp $	*/

 /*-
 * Copyright (c)2005, 2006 YAMAMOTO Takashi,
@ -43,7 +43,7 @@
 #else /* defined(PDSIM) */

 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clockpro.c,v 1.25 2020/04/10 18:17:56 tsutsui Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clockpro.c,v 1.26 2020/05/17 19:38:17 ad Exp $");

 #include "opt_ddb.h"

@ -1307,6 +1307,14 @@ uvmpdpol_pageisqueued_p(struct vm_page *pg)
 	return clockpro_getq(pg) != CLOCKPRO_NOQUEUE;
 }

+bool
+uvmpdpol_pageactivate_p(struct vm_page *pg)
+{
+
+	/* For now, no heuristic, always receive activations. */
+	return true;
+}
+
 void
 uvmpdpol_scaninit(void)
 {