Start trying to reduce cache misses on vm_page during fault processing.

- Make PGO_LOCKED getpages imply PGO_NOBUSY and remove the latter.  Mark
  pages busy only when there's actually I/O to do.

- When doing COW on a uvm_object, don't mess with neighbouring pages.  In
  all likelyhood they're already entered.

- Don't mess with neighbouring VAs that have existing mappings as replacing
  those mappings with same can be quite costly.

- Don't enqueue pages for neighbour faults unless not enqueued already, and
  don't activate centre pages unless uvmpdpol says its useful.

Also:

- Make PGO_LOCKED getpages on UAOs work more like vnodes: do gang lookup in
  the radix tree, and don't allocate new pages.

- Fix many assertion failures around faults/loans with tmpfs.
This commit is contained in:
ad 2020-05-17 19:38:16 +00:00
parent 013f352853
commit ff872804dc
10 changed files with 267 additions and 317 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: genfs_io.c,v 1.95 2020/03/22 18:32:41 ad Exp $ */
/* $NetBSD: genfs_io.c,v 1.96 2020/05/17 19:38:16 ad Exp $ */
/*
* Copyright (c) 1982, 1986, 1989, 1993
@ -31,7 +31,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: genfs_io.c,v 1.95 2020/03/22 18:32:41 ad Exp $");
__KERNEL_RCSID(0, "$NetBSD: genfs_io.c,v 1.96 2020/05/17 19:38:16 ad Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@ -128,12 +128,12 @@ genfs_getpages(void *v)
/*
* the object must be locked. it can only be a read lock when
* processing a read fault with PGO_LOCKED | PGO_NOBUSY.
* processing a read fault with PGO_LOCKED.
*/
KASSERT(rw_lock_held(uobj->vmobjlock));
KASSERT(rw_write_held(uobj->vmobjlock) ||
((~flags & (PGO_LOCKED | PGO_NOBUSY)) == 0 && !memwrite));
((flags & PGO_LOCKED) != 0 && !memwrite));
#ifdef DIAGNOSTIC
if ((flags & PGO_JOURNALLOCKED) && vp->v_mount->mnt_wapbl)
@ -237,9 +237,8 @@ startover:
#endif /* defined(DEBUG) */
nfound = uvn_findpages(uobj, origoffset, &npages,
ap->a_m, NULL,
UFP_NOWAIT | UFP_NOALLOC |
(memwrite ? UFP_NORDONLY : 0) |
((flags & PGO_NOBUSY) != 0 ? UFP_NOBUSY : 0));
UFP_NOWAIT | UFP_NOALLOC | UFP_NOBUSY |
(memwrite ? UFP_NORDONLY : 0));
KASSERT(npages == *ap->a_count);
if (nfound == 0) {
error = EBUSY;
@ -250,10 +249,6 @@ startover:
* the file behind us.
*/
if (!genfs_node_rdtrylock(vp)) {
if ((flags & PGO_NOBUSY) == 0) {
genfs_rel_pages(ap->a_m, npages);
}
/*
* restore the array.
*/

View File

@ -1,4 +1,4 @@
/* $NetBSD: nfs_bio.c,v 1.196 2020/04/23 21:47:08 ad Exp $ */
/* $NetBSD: nfs_bio.c,v 1.197 2020/05/17 19:38:16 ad Exp $ */
/*
* Copyright (c) 1989, 1993
@ -35,7 +35,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: nfs_bio.c,v 1.196 2020/04/23 21:47:08 ad Exp $");
__KERNEL_RCSID(0, "$NetBSD: nfs_bio.c,v 1.197 2020/05/17 19:38:16 ad Exp $");
#ifdef _KERNEL_OPT
#include "opt_nfs.h"
@ -1260,7 +1260,6 @@ nfs_getpages(void *v)
bool v3 = NFS_ISV3(vp);
bool write = (ap->a_access_type & VM_PROT_WRITE) != 0;
bool locked = (ap->a_flags & PGO_LOCKED) != 0;
bool nobusy = (ap->a_flags & PGO_NOBUSY);
/*
* XXX NFS wants to modify the pages below and that can't be done
@ -1348,14 +1347,10 @@ nfs_getpages(void *v)
if (!mutex_tryenter(&np->n_commitlock)) {
/*
* Since PGO_LOCKED is set, we need to unbusy
* all pages fetched by genfs_getpages() above,
* tell the caller that there are no pages
* available and put back original pgs array.
*/
if (nobusy == false)
uvm_page_unbusy(pgs, npages);
*ap->a_count = 0;
memcpy(pgs, opgs,
npages * sizeof(struct vm_pages *));

View File

@ -1,4 +1,4 @@
/* $NetBSD: uvm_aobj.c,v 1.140 2020/05/15 22:27:04 ad Exp $ */
/* $NetBSD: uvm_aobj.c,v 1.141 2020/05/17 19:38:17 ad Exp $ */
/*
* Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and
@ -38,7 +38,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvm_aobj.c,v 1.140 2020/05/15 22:27:04 ad Exp $");
__KERNEL_RCSID(0, "$NetBSD: uvm_aobj.c,v 1.141 2020/05/17 19:38:17 ad Exp $");
#ifdef _KERNEL_OPT
#include "opt_uvmhist.h"
@ -250,6 +250,8 @@ uao_find_swslot(struct uvm_object *uobj, int pageidx)
struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
struct uao_swhash_elt *elt;
KASSERT(UVM_OBJ_IS_AOBJ(uobj));
/*
* if noswap flag is set, then we never return a slot
*/
@ -293,6 +295,7 @@ uao_set_swslot(struct uvm_object *uobj, int pageidx, int slot)
(uintptr_t)aobj, pageidx, slot, 0);
KASSERT(rw_write_held(uobj->vmobjlock) || uobj->uo_refs == 0);
KASSERT(UVM_OBJ_IS_AOBJ(uobj));
/*
* if noswap flag is set, then we can't set a non-zero slot.
@ -365,6 +368,7 @@ uao_free(struct uvm_aobj *aobj)
{
struct uvm_object *uobj = &aobj->u_obj;
KASSERT(UVM_OBJ_IS_AOBJ(uobj));
KASSERT(rw_write_held(uobj->vmobjlock));
uao_dropswap_range(uobj, 0, 0);
rw_exit(uobj->vmobjlock);
@ -665,6 +669,7 @@ uao_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
voff_t curoff;
UVMHIST_FUNC("uao_put"); UVMHIST_CALLED(maphist);
KASSERT(UVM_OBJ_IS_AOBJ(uobj));
KASSERT(rw_write_held(uobj->vmobjlock));
if (flags & PGO_ALLPAGES) {
@ -808,13 +813,13 @@ uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps,
/*
* the object must be locked. it can only be a read lock when
* processing a read fault with PGO_LOCKED | PGO_NOBUSY.
* processing a read fault with PGO_LOCKED.
*/
KASSERT(UVM_OBJ_IS_AOBJ(uobj));
KASSERT(rw_lock_held(uobj->vmobjlock));
KASSERT(rw_write_held(uobj->vmobjlock) ||
((~flags & (PGO_LOCKED | PGO_NOBUSY)) == 0 &&
(access_type & VM_PROT_WRITE) == 0));
((flags & PGO_LOCKED) != 0 && (access_type & VM_PROT_WRITE) == 0));
/*
* get number of pages
@ -827,7 +832,7 @@ uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps,
*/
if (flags & PGO_LOCKED) {
krw_t lktype = rw_lock_op(uobj->vmobjlock);
struct uvm_page_array a;
/*
* step 1a: get pages that are already resident. only do
@ -835,77 +840,56 @@ uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps,
* time through).
*/
uvm_page_array_init(&a);
done = true; /* be optimistic */
gotpages = 0; /* # of pages we got so far */
for (lcv = 0, current_offset = offset ; lcv < maxpages ;
lcv++, current_offset += PAGE_SIZE) {
/* do we care about this page? if not, skip it */
if (pps[lcv] == PGO_DONTCARE)
continue;
ptmp = uvm_pagelookup(uobj, current_offset);
/*
* if page is new, attempt to allocate the page,
* zero-fill'd. we can only do this if the caller
* holds a write lock.
*/
if (ptmp == NULL && lktype == RW_WRITER &&
uao_find_swslot(uobj,
current_offset >> PAGE_SHIFT) == 0) {
ptmp = uao_pagealloc(uobj, current_offset,
UVM_FLAG_COLORMATCH|UVM_PGA_ZERO);
if (ptmp) {
/* new page */
ptmp->flags &= ~(PG_FAKE);
uvm_pagemarkdirty(ptmp,
UVM_PAGE_STATUS_UNKNOWN);
if ((flags & PGO_NOBUSY) != 0)
ptmp->flags &= ~PG_BUSY;
goto gotpage;
}
for (lcv = 0; lcv < maxpages; lcv++) {
ptmp = uvm_page_array_fill_and_peek(&a, uobj,
offset + (lcv << PAGE_SHIFT), maxpages, 0);
if (ptmp == NULL) {
break;
}
KASSERT(ptmp->offset >= offset);
lcv = (ptmp->offset - offset) >> PAGE_SHIFT;
if (lcv >= maxpages) {
break;
}
uvm_page_array_advance(&a);
/*
* to be useful must get a non-busy page
*/
if (ptmp == NULL || (ptmp->flags & PG_BUSY) != 0) {
if (lcv == centeridx ||
(flags & PGO_ALLPAGES) != 0)
/* need to do a wait or I/O! */
done = false;
if ((ptmp->flags & PG_BUSY) != 0) {
continue;
}
/*
* useful page: busy/lock it and plug it in our
* result array
* useful page: plug it in our result array
*/
KASSERT(uvm_pagegetdirty(ptmp) !=
UVM_PAGE_STATUS_CLEAN);
if ((flags & PGO_NOBUSY) == 0) {
/* caller must un-busy this page */
ptmp->flags |= PG_BUSY;
UVM_PAGE_OWN(ptmp, "uao_get1");
}
gotpage:
pps[lcv] = ptmp;
gotpages++;
}
uvm_page_array_fini(&a);
/*
* step 1b: now we've either done everything needed or we
* to unlock and do some waiting or I/O.
*/
if ((flags & PGO_ALLPAGES) != 0) {
for (int i = 0; i < maxpages; i++) {
done &= (pps[i] != NULL);
}
} else {
done = (pps[centeridx] != NULL);
}
UVMHIST_LOG(pdhist, "<- done (done=%jd)", done, 0,0,0);
*npagesp = gotpages;
if (done)
return 0;
else
return EBUSY;
return done ? 0 : EBUSY;
}
/*
@ -1117,6 +1101,8 @@ uao_dropswap(struct uvm_object *uobj, int pageidx)
{
int slot;
KASSERT(UVM_OBJ_IS_AOBJ(uobj));
slot = uao_set_swslot(uobj, pageidx, 0);
if (slot) {
uvm_swap_free(slot, 1);
@ -1340,6 +1326,7 @@ uao_dropswap_range(struct uvm_object *uobj, voff_t start, voff_t end)
struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
int swpgonlydelta = 0;
KASSERT(UVM_OBJ_IS_AOBJ(uobj));
KASSERT(rw_write_held(uobj->vmobjlock));
if (end == 0) {

View File

@ -1,4 +1,4 @@
/* $NetBSD: uvm_fault.c,v 1.226 2020/05/15 22:35:05 ad Exp $ */
/* $NetBSD: uvm_fault.c,v 1.227 2020/05/17 19:38:17 ad Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvm_fault.c,v 1.226 2020/05/15 22:35:05 ad Exp $");
__KERNEL_RCSID(0, "$NetBSD: uvm_fault.c,v 1.227 2020/05/17 19:38:17 ad Exp $");
#include "opt_uvmhist.h"
@ -43,6 +43,7 @@ __KERNEL_RCSID(0, "$NetBSD: uvm_fault.c,v 1.226 2020/05/15 22:35:05 ad Exp $");
#include <sys/mman.h>
#include <uvm/uvm.h>
#include <uvm/uvm_pdpolicy.h>
/*
*
@ -569,11 +570,7 @@ uvmfault_promote(struct uvm_faultinfo *ufi,
} else if (uobjpage != PGO_DONTCARE) {
/* object-backed COW */
opg = uobjpage;
if ((uobjpage->flags & PG_BUSY) != 0) {
KASSERT(rw_write_held(opg->uobject->vmobjlock));
} else {
KASSERT(rw_read_held(opg->uobject->vmobjlock));
}
KASSERT(rw_lock_held(opg->uobject->vmobjlock));
} else {
/* ZFOD */
opg = NULL;
@ -627,10 +624,6 @@ uvmfault_promote(struct uvm_faultinfo *ufi,
}
/* unlock and fail ... */
if (uobjpage != PGO_DONTCARE &&
(uobjpage->flags & PG_BUSY) != 0) {
uvm_page_unbusy(&uobjpage, 1);
}
uvmfault_unlockall(ufi, amap, uobj);
if (!uvm_reclaimable()) {
UVMHIST_LOG(maphist, "out of VM", 0,0,0,0);
@ -655,6 +648,17 @@ uvmfault_promote(struct uvm_faultinfo *ufi,
amap_add(&ufi->entry->aref, ufi->orig_rvaddr - ufi->entry->start, anon,
oanon != NULL);
/*
* from this point on am_lock won't be dropped until the page is
* entered, so it's safe to unbusy the page up front.
*
* uvm_fault_{upper,lower}_done will activate or enqueue the page.
*/
pg = anon->an_page;
pg->flags &= ~(PG_BUSY|PG_FAKE);
UVM_PAGE_OWN(pg, NULL);
*nanon = anon;
error = 0;
done:
@ -1088,6 +1092,17 @@ uvm_fault_check(
return EFAULT;
}
/*
* for a case 2B fault waste no time on adjacent pages because
* they are likely already entered.
*/
if (uobj != NULL && amap != NULL &&
(flt->access_type & VM_PROT_WRITE) != 0) {
/* wide fault (!narrow) */
flt->narrow = true;
}
/*
* establish range of interest based on advice from mapper
* and then clip to fit map entry. note that we only want
@ -1338,14 +1353,6 @@ uvm_fault_upper_lookup(
UVMHIST_LOG(maphist, " shadowed=%jd, will_get=%jd", shadowed,
(ufi->entry->object.uvm_obj && shadowed != false),0,0);
/*
* note that if we are really short of RAM we could sleep in the above
* call to pmap_enter with everything locked. bad?
*
* XXX Actually, that is bad; pmap_enter() should just fail in that
* XXX case. --thorpej
*/
return 0;
}
@ -1370,17 +1377,16 @@ uvm_fault_upper_neighbor(
KASSERT(uvm_pagegetdirty(pg) != UVM_PAGE_STATUS_CLEAN);
/*
* in the read-locked case, it's not possible for this to be a new
* page, therefore it's enqueued already. there wasn't a direct
* fault on the page, so avoid the cost of re-enqueuing it unless
* write-locked.
* there wasn't a direct fault on the page, so avoid the cost of
* activating it.
*/
if (flt->upper_lock_type == RW_WRITER) {
if (!uvmpdpol_pageisqueued_p(pg) && pg->wire_count == 0) {
uvm_pagelock(pg);
uvm_pageenqueue(pg);
uvm_pageunlock(pg);
}
UVMHIST_LOG(maphist,
" MAPPING: n anon: pm=%#jx, va=%#jx, pg=%#jx",
(uintptr_t)ufi->orig_map->pmap, currva, (uintptr_t)pg, 0);
@ -1615,13 +1621,10 @@ uvm_fault_upper_promote(
default:
return error;
}
pg = anon->an_page;
KASSERT(anon->an_lock == oanon->an_lock);
/* uvm_fault_upper_done will activate or enqueue the page */
pg = anon->an_page;
pg->flags &= ~(PG_BUSY|PG_FAKE);
UVM_PAGE_OWN(pg, NULL);
KASSERT((pg->flags & (PG_BUSY | PG_FAKE)) == 0);
/* deref: can not drop to zero here by defn! */
KASSERT(oanon->an_ref > 1);
@ -1714,11 +1717,9 @@ uvm_fault_upper_enter(
* we just promoted.
*/
if (flt->upper_lock_type == RW_WRITER) {
uvm_pagelock(pg);
uvm_pageenqueue(pg);
uvm_pageunlock(pg);
}
uvm_pagelock(pg);
uvm_pageenqueue(pg);
uvm_pageunlock(pg);
/*
* No need to undo what we did; we can simply think of
@ -1768,15 +1769,11 @@ uvm_fault_upper_done(
* ... update the page queues.
*/
uvm_pagelock(pg);
if (wire_paging) {
uvm_pagelock(pg);
uvm_pagewire(pg);
} else {
uvm_pageactivate(pg);
}
uvm_pageunlock(pg);
uvm_pageunlock(pg);
if (wire_paging) {
/*
* since the now-wired page cannot be paged out,
* release its swap resources for others to use.
@ -1786,6 +1783,15 @@ uvm_fault_upper_done(
uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY);
uvm_anon_dropswap(anon);
} else if (uvmpdpol_pageactivate_p(pg)) {
/*
* avoid re-activating the page unless needed,
* to avoid false sharing on multiprocessor.
*/
uvm_pagelock(pg);
uvm_pageactivate(pg);
uvm_pageunlock(pg);
}
}
@ -1808,7 +1814,6 @@ uvm_fault_lower_upgrade(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
*/
if (__predict_true(flt->lower_lock_type == RW_WRITER)) {
KASSERT(uobjpage == NULL || (uobjpage->flags & PG_BUSY) != 0);
return 0;
}
@ -1827,18 +1832,6 @@ uvm_fault_lower_upgrade(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
}
cpu_count(CPU_COUNT_FLTUP, 1);
KASSERT(flt->lower_lock_type == rw_lock_op(uobj->vmobjlock));
/*
* finally, if a page was supplied, assert that it's not busy
* (can't be with a reader lock) and then mark it busy now that
* we have a writer lock.
*/
if (uobjpage != NULL) {
KASSERT((uobjpage->flags & PG_BUSY) == 0);
uobjpage->flags |= PG_BUSY;
UVM_PAGE_OWN(uobjpage, "upgrdlwr");
}
return 0;
}
@ -1899,17 +1892,8 @@ uvm_fault_lower(
*/
KASSERT(amap == NULL ||
rw_lock_op(amap->am_lock) == flt->upper_lock_type);
if (flt->lower_lock_type == RW_WRITER) {
KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock));
KASSERTMSG(uobjpage == NULL ||
(uobjpage->flags & PG_BUSY) != 0,
"page %p should be busy", uobjpage);
} else {
KASSERT(uobj == NULL || rw_read_held(uobj->vmobjlock));
KASSERTMSG(uobjpage == NULL ||
(uobjpage->flags & PG_BUSY) == 0,
"page %p should not be busy", uobjpage);
}
KASSERT(uobj == NULL ||
rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type);
/*
* note that uobjpage can not be PGO_DONTCARE at this point. we now
@ -1952,13 +1936,8 @@ uvm_fault_lower(
*/
KASSERT(amap == NULL ||
rw_lock_op(amap->am_lock) == flt->upper_lock_type);
if (flt->lower_lock_type == RW_WRITER) {
KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock));
KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) != 0);
} else {
KASSERT(uobj == NULL || rw_read_held(uobj->vmobjlock));
KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) == 0);
}
KASSERT(uobj == NULL ||
rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type);
/*
* notes:
@ -1966,10 +1945,12 @@ uvm_fault_lower(
* - at this point uobjpage can not be PG_RELEASED (since we checked
* for it above)
* - at this point uobjpage could be waited on (handle later)
* - uobjpage can be from a different object if tmpfs (vnode vs UAO)
*/
KASSERT(uobjpage != NULL);
KASSERT(uobj == NULL || uobj == uobjpage->uobject);
KASSERT(uobj == NULL ||
uobjpage->uobject->vmobjlock == uobj->vmobjlock);
KASSERT(uobj == NULL || !UVM_OBJ_IS_CLEAN(uobjpage->uobject) ||
uvm_pagegetdirty(uobjpage) == UVM_PAGE_STATUS_CLEAN);
@ -1997,16 +1978,13 @@ uvm_fault_lower_lookup(
struct uvm_object *uobj = ufi->entry->object.uvm_obj;
int lcv, gotpages;
vaddr_t currva;
bool entered;
UVMHIST_FUNC("uvm_fault_lower_lookup"); UVMHIST_CALLED(maphist);
rw_enter(uobj->vmobjlock, flt->lower_lock_type);
/*
* Locked: maps(read), amap(if there), uobj
*
* if we have a read lock on the object, do a PGO_NOBUSY get, which
* will return us pages with PG_BUSY clear. if a write lock is held
* pages will be returned with PG_BUSY set.
*/
cpu_count(CPU_COUNT_FLTLGET, 1);
@ -2015,7 +1993,7 @@ uvm_fault_lower_lookup(
ufi->entry->offset + flt->startva - ufi->entry->start,
pages, &gotpages, flt->centeridx,
flt->access_type & MASK(ufi->entry), ufi->entry->advice,
PGO_LOCKED | (flt->lower_lock_type == RW_WRITER ? 0 : PGO_NOBUSY));
PGO_LOCKED);
KASSERT(rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type);
@ -2028,6 +2006,7 @@ uvm_fault_lower_lookup(
return;
}
entered = false;
currva = flt->startva;
for (lcv = 0; lcv < flt->npages; lcv++, currva += PAGE_SIZE) {
struct vm_page *curpg;
@ -2036,32 +2015,31 @@ uvm_fault_lower_lookup(
if (curpg == NULL || curpg == PGO_DONTCARE) {
continue;
}
KASSERT(curpg->uobject == uobj);
if (flt->lower_lock_type == RW_WRITER) {
KASSERT(rw_write_held(uobj->vmobjlock));
KASSERTMSG((curpg->flags & PG_BUSY) != 0,
"page %p should be busy", curpg);
} else {
KASSERT(rw_read_held(uobj->vmobjlock));
KASSERTMSG((curpg->flags & PG_BUSY) == 0,
"page %p should not be busy", curpg);
}
/*
* if center page is resident and not PG_BUSY|PG_RELEASED
* and !PGO_NOBUSY, then pgo_get made it PG_BUSY for us and
* gave us a handle to it.
* in the case of tmpfs, the pages might be from a different
* uvm_object. just make sure that they have the same lock.
*/
KASSERT(curpg->uobject->vmobjlock == uobj->vmobjlock);
KASSERT((curpg->flags & PG_BUSY) == 0);
/*
* leave the centre page for later. don't screw with
* existing mappings (needless & expensive).
*/
if (lcv == flt->centeridx) {
UVMHIST_LOG(maphist, " got uobjpage (%#jx) "
"with locked get", (uintptr_t)curpg, 0, 0, 0);
} else {
} else if (!pmap_extract(ufi->orig_map->pmap, currva, NULL)) {
uvm_fault_lower_neighbor(ufi, flt, currva, curpg);
entered = true;
}
}
pmap_update(ufi->orig_map->pmap);
if (entered) {
pmap_update(ufi->orig_map->pmap);
}
}
/*
@ -2082,20 +2060,17 @@ uvm_fault_lower_neighbor(
* calling pgo_get with PGO_LOCKED returns us pages which
* are neither busy nor released, so we don't need to check
* for this. we can just directly enter the pages.
*/
/*
* in the read-locked case, it's not possible for this to be a new
* page. it must be cached with the object and enqueued already.
*
* there wasn't a direct fault on the page, so avoid the cost of
* re-enqueuing it.
* activating it.
*/
if (flt->lower_lock_type == RW_WRITER) {
if (!uvmpdpol_pageisqueued_p(pg) && pg->wire_count == 0) {
uvm_pagelock(pg);
uvm_pageenqueue(pg);
uvm_pageunlock(pg);
}
UVMHIST_LOG(maphist,
" MAPPING: n obj: pm=%#jx, va=%#jx, pg=%#jx",
(uintptr_t)ufi->orig_map->pmap, currva, (uintptr_t)pg, 0);
@ -2112,20 +2087,7 @@ uvm_fault_lower_neighbor(
KASSERT((pg->flags & PG_RELEASED) == 0);
KASSERT(!UVM_OBJ_IS_CLEAN(pg->uobject) ||
uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_CLEAN);
/*
* if a write lock was held on the object, the pages have been
* busied. unbusy them now, as we are about to enter and then
* forget about them.
*/
if (flt->lower_lock_type == RW_WRITER) {
KASSERT((pg->flags & PG_BUSY) != 0);
pg->flags &= ~(PG_BUSY);
UVM_PAGE_OWN(pg, NULL);
} else {
KASSERT((pg->flags & PG_BUSY) == 0);
}
KASSERT((pg->flags & PG_BUSY) == 0);
KASSERT(rw_lock_op(pg->uobject->vmobjlock) == flt->lower_lock_type);
const vm_prot_t mapprot =
@ -2253,34 +2215,38 @@ uvm_fault_lower_io(
}
/*
* didn't get the lock? release the page and retry.
* unbusy/release the page.
*/
if ((pg->flags & PG_RELEASED) == 0) {
pg->flags &= ~PG_BUSY;
uvm_pagelock(pg);
uvm_pagewakeup(pg);
uvm_pageunlock(pg);
UVM_PAGE_OWN(pg, NULL);
} else {
cpu_count(CPU_COUNT_FLTPGRELE, 1);
uvm_pagefree(pg);
}
/*
* didn't get the lock? retry.
*/
if (locked == false) {
UVMHIST_LOG(maphist,
" wasn't able to relock after fault: retry",
0,0,0,0);
if ((pg->flags & PG_RELEASED) == 0) {
pg->flags &= ~PG_BUSY;
uvm_pagelock(pg);
uvm_pagewakeup(pg);
uvm_pageunlock(pg);
UVM_PAGE_OWN(pg, NULL);
} else {
cpu_count(CPU_COUNT_FLTPGRELE, 1);
uvm_pagefree(pg);
}
rw_exit(uobj->vmobjlock);
return ERESTART;
}
/*
* we have the data in pg which is busy and
* not released. we are holding object lock (so the page
* we have the data in pg. we are holding object lock (so the page
* can't be released on us).
*/
/* locked: maps(read), amap(if !null), uobj, pg */
/* locked: maps(read), amap(if !null), uobj */
*ruobj = uobj;
*ruobjpage = pg;
@ -2328,12 +2294,7 @@ uvm_fault_lower_direct(
uvm_fault_lower_direct_loan(ufi, flt, uobj, &pg, &uobjpage);
}
KASSERT(pg == uobjpage);
if (flt->lower_lock_type == RW_READER) {
KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) == 0);
} else {
KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) != 0);
}
KASSERT((pg->flags & PG_BUSY) == 0);
return uvm_fault_lower_enter(ufi, flt, uobj, NULL, pg);
}
@ -2375,16 +2336,6 @@ uvm_fault_lower_direct_loan(
pg = uvm_loanbreak(uobjpage);
if (pg == NULL) {
/*
* drop ownership of page, it can't be released
*/
uvm_pagelock(uobjpage);
uvm_pagewakeup(uobjpage);
uvm_pageunlock(uobjpage);
uobjpage->flags &= ~PG_BUSY;
UVM_PAGE_OWN(uobjpage, NULL);
uvmfault_unlockall(ufi, amap, uobj);
UVMHIST_LOG(maphist,
" out of RAM breaking loan, waiting",
@ -2395,6 +2346,17 @@ uvm_fault_lower_direct_loan(
}
*rpg = pg;
*ruobjpage = pg;
/*
* drop ownership of page while still holding object lock,
* which won't be dropped until the page is entered.
*/
uvm_pagelock(pg);
uvm_pagewakeup(pg);
uvm_pageunlock(pg);
pg->flags &= ~PG_BUSY;
UVM_PAGE_OWN(pg, NULL);
}
return 0;
}
@ -2426,6 +2388,8 @@ uvm_fault_lower_promote(
return error;
}
KASSERT(rw_write_held(amap->am_lock));
KASSERT(uobj == NULL ||
rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type);
/*
* If we are going to promote the data to an anon we
@ -2446,11 +2410,6 @@ uvm_fault_lower_promote(
/*
* Fill in the data.
*/
if (flt->lower_lock_type == RW_READER) {
KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) == 0);
} else {
KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) != 0);
}
if (uobjpage != PGO_DONTCARE) {
cpu_count(CPU_COUNT_FLT_PRCOPY, 1);
@ -2467,19 +2426,6 @@ uvm_fault_lower_promote(
*/
}
/*
* dispose of uobjpage. it can't be PG_RELEASED
* since we still hold the object lock.
*/
if ((uobjpage->flags & PG_BUSY) != 0) {
uobjpage->flags &= ~PG_BUSY;
uvm_pagelock(uobjpage);
uvm_pagewakeup(uobjpage);
uvm_pageunlock(uobjpage);
UVM_PAGE_OWN(uobjpage, NULL);
}
UVMHIST_LOG(maphist,
" promote uobjpage %#jx to anon/page %#jx/%#jx",
(uintptr_t)uobjpage, (uintptr_t)anon, (uintptr_t)pg, 0);
@ -2525,18 +2471,20 @@ uvm_fault_lower_enter(
*
* Note: pg is either the uobjpage or the new page in the new anon.
*/
KASSERT(amap == NULL ||
rw_lock_op(amap->am_lock) == flt->upper_lock_type);
KASSERT(uobj == NULL ||
rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type);
KASSERT(anon == NULL || anon->an_lock == amap->am_lock);
if (flt->lower_lock_type == RW_WRITER) {
KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock));
KASSERTMSG((pg->flags & PG_BUSY) != 0,
"page %p should be busy", pg);
} else {
KASSERT(uobj == NULL || rw_read_held(uobj->vmobjlock));
KASSERTMSG(anon != NULL || (pg->flags & PG_BUSY) == 0,
"page %p should not be busy", pg);
}
/*
* note that pg can't be PG_RELEASED or PG_BUSY since we did
* not drop the object lock since the last time we checked.
*/
KASSERT((pg->flags & PG_RELEASED) == 0);
KASSERT((pg->flags & PG_BUSY) == 0);
/*
* all resources are present. we can now map it in and free our
@ -2573,23 +2521,11 @@ uvm_fault_lower_enter(
* we just promoted the page.
*/
if (anon != NULL || flt->lower_lock_type == RW_WRITER) {
if (anon != NULL) {
uvm_pagelock(pg);
uvm_pageenqueue(pg);
uvm_pagewakeup(pg);
uvm_pageunlock(pg);
} else {
KASSERT((pg->flags & PG_BUSY) == 0);
}
/*
* note that pg can't be PG_RELEASED since we did not drop
* the object lock since the last time we checked.
*/
KASSERT((pg->flags & PG_RELEASED) == 0);
if ((pg->flags & PG_BUSY) != 0) {
pg->flags &= ~(PG_BUSY|PG_FAKE);
UVM_PAGE_OWN(pg, NULL);
}
uvmfault_unlockall(ufi, amap, uobj);
@ -2606,20 +2542,6 @@ uvm_fault_lower_enter(
}
uvm_fault_lower_done(ufi, flt, uobj, pg);
/*
* note that pg can't be PG_RELEASED since we did not drop the object
* lock since the last time we checked.
*/
KASSERT((pg->flags & PG_RELEASED) == 0);
if ((pg->flags & PG_BUSY) != 0) {
uvm_pagelock(pg);
uvm_pagewakeup(pg);
uvm_pageunlock(pg);
pg->flags &= ~(PG_BUSY|PG_FAKE);
UVM_PAGE_OWN(pg, NULL);
}
pmap_update(ufi->orig_map->pmap);
uvmfault_unlockall(ufi, amap, uobj);
@ -2636,13 +2558,13 @@ uvm_fault_lower_done(
struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt,
struct uvm_object *uobj, struct vm_page *pg)
{
bool dropswap = false;
UVMHIST_FUNC("uvm_fault_lower_done"); UVMHIST_CALLED(maphist);
uvm_pagelock(pg);
if (flt->wire_paging) {
uvm_pagelock(pg);
uvm_pagewire(pg);
uvm_pageunlock(pg);
if (pg->flags & PG_AOBJ) {
/*
@ -2650,19 +2572,26 @@ uvm_fault_lower_done(
* release its swap resources for others to use.
* since an aobj page with no swap cannot be clean,
* mark it dirty now.
*
* use pg->uobject here. if the page is from a
* tmpfs vnode, the pages are backed by its UAO and
* not the vnode.
*/
KASSERT(uobj != NULL);
KASSERT(uobj->vmobjlock == pg->uobject->vmobjlock);
uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY);
dropswap = true;
uao_dropswap(pg->uobject, pg->offset >> PAGE_SHIFT);
}
} else {
uvm_pageactivate(pg);
}
uvm_pageunlock(pg);
} else if (uvmpdpol_pageactivate_p(pg)) {
/*
* avoid re-activating the page unless needed,
* to avoid false sharing on multiprocessor.
*/
if (dropswap) {
uao_dropswap(uobj, pg->offset >> PAGE_SHIFT);
uvm_pagelock(pg);
uvm_pageactivate(pg);
uvm_pageunlock(pg);
}
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: uvm_loan.c,v 1.100 2020/03/22 18:32:42 ad Exp $ */
/* $NetBSD: uvm_loan.c,v 1.101 2020/05/17 19:38:17 ad Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.100 2020/03/22 18:32:42 ad Exp $");
__KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.101 2020/05/17 19:38:17 ad Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@ -109,7 +109,7 @@ static int uvm_loanuobj(struct uvm_faultinfo *, void ***,
static int uvm_loanzero(struct uvm_faultinfo *, void ***, int);
static void uvm_unloananon(struct vm_anon **, int);
static void uvm_unloanpage(struct vm_page **, int);
static int uvm_loanpage(struct vm_page **, int);
static int uvm_loanpage(struct vm_page **, int, bool);
/*
@ -442,12 +442,11 @@ uvm_loananon(struct uvm_faultinfo *ufi, void ***output, int flags,
*
* => pages should be object-owned and the object should be locked.
* => in the case of error, the object might be unlocked and relocked.
* => caller should busy the pages beforehand.
* => pages will be unbusied.
* => pages will be unbusied (if busied is true).
* => fail with EBUSY if meet a wired page.
*/
static int
uvm_loanpage(struct vm_page **pgpp, int npages)
uvm_loanpage(struct vm_page **pgpp, int npages, bool busied)
{
int i;
int error = 0;
@ -461,7 +460,7 @@ uvm_loanpage(struct vm_page **pgpp, int npages)
KASSERT(pg->uobject == pgpp[0]->uobject);
KASSERT(!(pg->flags & (PG_RELEASED|PG_PAGEOUT)));
KASSERT(rw_write_held(pg->uobject->vmobjlock));
KASSERT(pg->flags & PG_BUSY);
KASSERT(busied == ((pg->flags & PG_BUSY) != 0));
if (pg->wire_count > 0) {
UVMHIST_LOG(loanhist, "wired %#jx", (uintptr_t)pg,
@ -479,7 +478,9 @@ uvm_loanpage(struct vm_page **pgpp, int npages)
uvm_pageunlock(pg);
}
uvm_page_unbusy(pgpp, npages);
if (busied) {
uvm_page_unbusy(pgpp, npages);
}
if (error) {
/*
@ -553,7 +554,7 @@ reget:
if (slock) {
KASSERT(npendloan > 0);
error = uvm_loanpage(pgpp - npendloan,
npendloan);
npendloan, true);
rw_exit(slock);
if (error)
goto fail;
@ -587,7 +588,7 @@ reget:
}
KASSERT(slock != NULL);
KASSERT(npendloan > 0);
error = uvm_loanpage(pgpp - npendloan, npendloan);
error = uvm_loanpage(pgpp - npendloan, npendloan, true);
rw_exit(slock);
if (error)
goto fail;
@ -702,36 +703,45 @@ uvm_loanuobj(struct uvm_faultinfo *ufi, void ***output, int flags, vaddr_t va)
}
/*
* didn't get the lock? release the page and retry.
* unbusy the page.
*/
if (locked == false) {
if (pg->flags & PG_RELEASED) {
uvm_pagefree(pg);
rw_exit(uobj->vmobjlock);
return (0);
}
if ((pg->flags & PG_RELEASED) == 0) {
uvm_pagelock(pg);
uvm_pageactivate(pg);
uvm_pagewakeup(pg);
uvm_pageunlock(pg);
pg->flags &= ~PG_BUSY;
UVM_PAGE_OWN(pg, NULL);
}
/*
* didn't get the lock? release the page and retry.
*/
if (locked == false) {
if (pg->flags & PG_RELEASED) {
uvm_pagefree(pg);
}
rw_exit(uobj->vmobjlock);
return (0);
}
}
KASSERT(uobj == pg->uobject);
/*
* for tmpfs vnodes, the page will be from a UAO rather than
* the vnode. just check the locks match.
*/
KASSERT(uobj->vmobjlock == pg->uobject->vmobjlock);
/*
* at this point we have the page we want ("pg") marked PG_BUSY for us
* and we have all data structures locked. do the loanout. page can
* not be PG_RELEASED (we caught this above).
* at this point we have the page we want ("pg") and we have
* all data structures locked. do the loanout. page can not
* be PG_RELEASED (we caught this above).
*/
if ((flags & UVM_LOAN_TOANON) == 0) {
if (uvm_loanpage(&pg, 1)) {
if (uvm_loanpage(&pg, 1, false)) {
uvmfault_unlockall(ufi, amap, uobj);
return (-1);
}
@ -1099,7 +1109,7 @@ uvm_loan_init(void)
* uvm_loanbreak: break loan on a uobj page
*
* => called with uobj locked
* => the page should be busy
* => the page may be busy; if it's busy, it will be unbusied
* => return value:
* newly allocated page if succeeded
*/
@ -1111,7 +1121,6 @@ uvm_loanbreak(struct vm_page *uobjpage)
KASSERT(uobj != NULL);
KASSERT(rw_write_held(uobj->vmobjlock));
KASSERT(uobjpage->flags & PG_BUSY);
/* alloc new un-owned page */
pg = uvm_pagealloc(NULL, 0, NULL, 0);
@ -1131,8 +1140,10 @@ uvm_loanbreak(struct vm_page *uobjpage)
KASSERT(uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_DIRTY);
pmap_page_protect(uobjpage, VM_PROT_NONE);
/* uobj still locked */
uobjpage->flags &= ~PG_BUSY;
UVM_PAGE_OWN(uobjpage, NULL);
if ((uobjpage->flags & PG_BUSY) != 0) {
uobjpage->flags &= ~PG_BUSY;
UVM_PAGE_OWN(uobjpage, NULL);
}
/*
* if the page is no longer referenced by

View File

@ -1,4 +1,4 @@
/* $NetBSD: uvm_page.h,v 1.102 2020/03/17 18:31:39 ad Exp $ */
/* $NetBSD: uvm_page.h,v 1.103 2020/05/17 19:38:17 ad Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@ -151,20 +151,18 @@
*
* On the ordering of fields:
*
* The fields most heavily used by the page allocator and uvmpdpol are
* clustered together at the start of the structure, so that while under
* global lock it's more likely that only one cache line for each page need
* be touched.
* The fields most heavily used during fault processing are clustered
* together at the start of the structure to reduce cache misses.
* XXX This entire thing should be shrunk to fit in one cache line.
*/
struct vm_page {
/* _LP64: first cache line */
union {
TAILQ_ENTRY(vm_page) queue; /* w: wired page queue
* or uvm_pglistalloc output */
LIST_ENTRY(vm_page) list; /* f: global free page queue */
} pageq;
TAILQ_ENTRY(vm_page) pdqueue; /* p: pagedaemon queue */
kmutex_t interlock; /* s: lock on identity */
uint32_t pqflags; /* i: pagedaemon flags */
uint32_t flags; /* o: object flags */
paddr_t phys_addr; /* o: physical address of pg */
@ -174,6 +172,10 @@ struct vm_page {
struct uvm_object *uobject; /* o,i: object */
voff_t offset; /* o: offset into object */
/* _LP64: second cache line */
kmutex_t interlock; /* s: lock on identity */
TAILQ_ENTRY(vm_page) pdqueue; /* p: pagedaemon queue */
#ifdef __HAVE_VM_PAGE_MD
struct vm_page_md mdpage; /* ?: pmap-specific data */
#endif

View File

@ -1,4 +1,4 @@
/* $NetBSD: uvm_pager.h,v 1.47 2020/03/22 18:32:42 ad Exp $ */
/* $NetBSD: uvm_pager.h,v 1.48 2020/05/17 19:38:17 ad Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@ -152,7 +152,6 @@ struct uvm_pagerops {
#define PGO_JOURNALLOCKED 0x020 /* journal is already locked [get/put] */
#define PGO_LOCKED 0x040 /* fault data structures are locked [get] */
#define PGO_BUSYFAIL 0x080 /* fail if a page is busy [put] */
#define PGO_NOBUSY 0x100 /* don't busy returned pages (read locked) */
#define PGO_OVERWRITE 0x200 /* pages will be overwritten before unlocked */
#define PGO_PASTEOF 0x400 /* allow allocation of pages past EOF */
#define PGO_NOBLOCKALLOC 0x800 /* backing block allocation is not needed */

View File

@ -1,4 +1,4 @@
/* $NetBSD: uvm_pdpolicy.h,v 1.7 2020/02/23 15:46:43 ad Exp $ */
/* $NetBSD: uvm_pdpolicy.h,v 1.8 2020/05/17 19:38:17 ad Exp $ */
/*-
* Copyright (c)2005, 2006 YAMAMOTO Takashi,
@ -48,6 +48,7 @@ void uvmpdpol_pageactivate(struct vm_page *);
void uvmpdpol_pagedeactivate(struct vm_page *);
void uvmpdpol_pagedequeue(struct vm_page *);
void uvmpdpol_pageenqueue(struct vm_page *);
bool uvmpdpol_pageactivate_p(struct vm_page *);
bool uvmpdpol_pageisqueued_p(struct vm_page *);
void uvmpdpol_pagerealize(struct vm_page *);
void uvmpdpol_anfree(struct vm_anon *);

View File

@ -1,4 +1,4 @@
/* $NetBSD: uvm_pdpolicy_clock.c,v 1.36 2020/04/02 16:29:30 maxv Exp $ */
/* $NetBSD: uvm_pdpolicy_clock.c,v 1.37 2020/05/17 19:38:17 ad Exp $ */
/* NetBSD: uvm_pdaemon.c,v 1.72 2006/01/05 10:47:33 yamt Exp $ */
/*-
@ -98,7 +98,7 @@
#else /* defined(PDSIM) */
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clock.c,v 1.36 2020/04/02 16:29:30 maxv Exp $");
__KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clock.c,v 1.37 2020/05/17 19:38:17 ad Exp $");
#include <sys/param.h>
#include <sys/proc.h>
@ -565,6 +565,29 @@ uvmpdpol_pageisqueued_p(struct vm_page *pg)
}
}
bool
uvmpdpol_pageactivate_p(struct vm_page *pg)
{
uint32_t pqflags;
/* consider intent in preference to actual state. */
pqflags = atomic_load_relaxed(&pg->pqflags);
if ((pqflags & PQ_INTENT_SET) != 0) {
pqflags &= PQ_INTENT_MASK;
return pqflags != PQ_INTENT_A && pqflags != PQ_INTENT_E;
} else {
/*
* TODO: Enabling this may be too much of a big hammer,
* since we do get useful information from activations.
* Think about it more and maybe come up with a heuristic
* or something.
*
* return (pqflags & PQ_ACTIVE) == 0;
*/
return true;
}
}
void
uvmpdpol_estimatepageable(int *active, int *inactive)
{

View File

@ -1,4 +1,4 @@
/* $NetBSD: uvm_pdpolicy_clockpro.c,v 1.25 2020/04/10 18:17:56 tsutsui Exp $ */
/* $NetBSD: uvm_pdpolicy_clockpro.c,v 1.26 2020/05/17 19:38:17 ad Exp $ */
/*-
* Copyright (c)2005, 2006 YAMAMOTO Takashi,
@ -43,7 +43,7 @@
#else /* defined(PDSIM) */
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clockpro.c,v 1.25 2020/04/10 18:17:56 tsutsui Exp $");
__KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clockpro.c,v 1.26 2020/05/17 19:38:17 ad Exp $");
#include "opt_ddb.h"
@ -1307,6 +1307,14 @@ uvmpdpol_pageisqueued_p(struct vm_page *pg)
return clockpro_getq(pg) != CLOCKPRO_NOQUEUE;
}
bool
uvmpdpol_pageactivate_p(struct vm_page *pg)
{
/* For now, no heuristic, always receive activations. */
return true;
}
void
uvmpdpol_scaninit(void)
{