NetBSD/sys/uvm/uvm_loan.c

1005 lines
24 KiB
C
Raw Normal View History

/* $NetBSD: uvm_loan.c,v 1.42 2003/05/03 17:54:32 yamt Exp $ */
/*
*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Charles D. Cranor and
* Washington University.
* 4. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1998-02-07 14:07:38 +03:00
*
* from: Id: uvm_loan.c,v 1.1.6.4 1998/02/06 05:08:43 chs Exp
*/
/*
* uvm_loan.c: page loanout handler
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.42 2003/05/03 17:54:32 yamt Exp $");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/malloc.h>
#include <sys/mman.h>
#include <uvm/uvm.h>
/*
2001-05-25 08:06:11 +04:00
* "loaned" pages are pages which are (read-only, copy-on-write) loaned
* from the VM system to other parts of the kernel. this allows page
* copying to be avoided (e.g. you can loan pages from objs/anons to
* the mbuf system).
*
* there are 3 types of loans possible:
* O->K uvm_object page to wired kernel page (e.g. mbuf data area)
* A->K anon page to wired kernel page (e.g. mbuf data area)
* O->A uvm_object to anon loan (e.g. vnode page to an anon)
* note that it possible to have an O page loaned to both an A and K
* at the same time.
*
* loans are tracked by pg->loan_count. an O->A page will have both
* a uvm_object and a vm_anon, but PQ_ANON will not be set. this sort
* of page is considered "owned" by the uvm_object (not the anon).
*
* each loan of a page to the kernel bumps the pg->wire_count. the
* kernel mappings for these pages will be read-only and wired. since
* the page will also be wired, it will not be a candidate for pageout,
* and thus will never be pmap_page_protect()'d with VM_PROT_NONE. a
* write fault in the kernel to one of these pages will not cause
* copy-on-write. instead, the page fault is considered fatal. this
* is because the kernel mapping will have no way to look up the
* object/anon which the page is owned by. this is a good side-effect,
* since a kernel write to a loaned page is an error.
*
2001-05-25 08:06:11 +04:00
* owners that want to free their pages and discover that they are
* loaned out simply "disown" them (the page becomes an orphan). these
* pages should be freed when the last loan is dropped. in some cases
* an anon may "adopt" an orphaned page.
*
* locking: to read pg->loan_count either the owner or the page queues
* must be locked. to modify pg->loan_count, both the owner of the page
* and the PQs must be locked. pg->flags is (as always) locked by
* the owner of the page.
*
* note that locking from the "loaned" side is tricky since the object
* getting the loaned page has no reference to the page's owner and thus
* the owner could "die" at any time. in order to prevent the owner
* from dying the page queues should be locked. this forces us to sometimes
* use "try" locking.
*
* loans are typically broken by the following events:
2001-05-25 08:06:11 +04:00
* 1. user-level xwrite fault to a loaned page
* 2. pageout of clean+inactive O->A loaned page
* 3. owner frees page (e.g. pager flush)
*
* note that loaning a page causes all mappings of the page to become
* read-only (via pmap_page_protect). this could have an unexpected
* effect on normal "wired" pages if one is not careful (XXX).
*/
/*
* local prototypes
*/
2001-05-25 08:06:11 +04:00
static int uvm_loananon __P((struct uvm_faultinfo *, void ***,
int, struct vm_anon *));
static int uvm_loanentry __P((struct uvm_faultinfo *, void ***, int));
2001-05-25 08:06:11 +04:00
static int uvm_loanuobj __P((struct uvm_faultinfo *, void ***,
int, vaddr_t));
static int uvm_loanzero __P((struct uvm_faultinfo *, void ***, int));
static void uvm_unloananon __P((struct vm_anon **, int));
static void uvm_unloanpage __P((struct vm_page **, int));
/*
* inlines
*/
/*
* uvm_loanentry: loan out pages in a map entry (helper fn for uvm_loan())
*
* => "ufi" is the result of a successful map lookup (meaning that
* on entry the map is locked by the caller)
* => we may unlock and then relock the map if needed (for I/O)
* => we put our output result in "output"
* => we always return with the map unlocked
* => possible return values:
* -1 == error, map is unlocked
* 0 == map relock error (try again!), map is unlocked
* >0 == number of pages we loaned, map is unlocked
*/
1998-03-09 03:58:55 +03:00
static __inline int
uvm_loanentry(ufi, output, flags)
struct uvm_faultinfo *ufi;
void ***output;
int flags;
{
vaddr_t curaddr = ufi->orig_rvaddr;
vsize_t togo = ufi->size;
1998-03-09 03:58:55 +03:00
struct vm_aref *aref = &ufi->entry->aref;
struct uvm_object *uobj = ufi->entry->object.uvm_obj;
struct vm_anon *anon;
int rv, result = 0;
/*
* lock us the rest of the way down (we unlock before return)
1998-03-09 03:58:55 +03:00
*/
if (aref->ar_amap)
amap_lock(aref->ar_amap);
1998-03-09 03:58:55 +03:00
if (uobj)
simple_lock(&uobj->vmobjlock);
/*
* loop until done
*/
while (togo) {
/*
* find the page we want. check the anon layer first.
*/
if (aref->ar_amap) {
anon = amap_lookup(aref, curaddr - ufi->entry->start);
} else {
anon = NULL;
}
/* locked: map, amap, uobj */
1998-03-09 03:58:55 +03:00
if (anon) {
rv = uvm_loananon(ufi, output, flags, anon);
} else if (uobj) {
rv = uvm_loanuobj(ufi, output, flags, curaddr);
} else if (UVM_ET_ISCOPYONWRITE(ufi->entry)) {
rv = uvm_loanzero(ufi, output, flags);
} else {
rv = -1;
1998-03-09 03:58:55 +03:00
}
/* locked: if (rv > 0) => map, amap, uobj [o.w. unlocked] */
1998-03-09 03:58:55 +03:00
/* total failure */
if (rv < 0)
return (-1);
1998-03-09 03:58:55 +03:00
/* relock failed, need to do another lookup */
if (rv == 0)
return (result);
1998-03-09 03:58:55 +03:00
/*
* got it... advance to next page
*/
1998-03-09 03:58:55 +03:00
result++;
togo -= PAGE_SIZE;
curaddr += PAGE_SIZE;
}
/*
* unlock what we locked, unlock the maps and return
1998-03-09 03:58:55 +03:00
*/
if (aref->ar_amap)
amap_unlock(aref->ar_amap);
if (uobj)
simple_unlock(&uobj->vmobjlock);
uvmfault_unlockmaps(ufi, FALSE);
return (result);
}
/*
* normal functions
*/
/*
* uvm_loan: loan pages in a map out to anons or to the kernel
2001-05-25 08:06:11 +04:00
*
* => map should be unlocked
* => start and len should be multiples of PAGE_SIZE
* => result is either an array of anon's or vm_pages (depending on flags)
* => flag values: UVM_LOAN_TOANON - loan to anons
* UVM_LOAN_TOPAGE - loan to wired kernel page
* one and only one of these flags must be set!
* => returns 0 (success), or an appropriate error number
*/
1998-03-09 03:58:55 +03:00
int
uvm_loan(map, start, len, v, flags)
1998-03-09 03:58:55 +03:00
struct vm_map *map;
vaddr_t start;
vsize_t len;
void *v;
1998-03-09 03:58:55 +03:00
int flags;
{
1998-03-09 03:58:55 +03:00
struct uvm_faultinfo ufi;
void **result, **output;
int rv, error;
1999-06-03 04:05:45 +04:00
1998-03-09 03:58:55 +03:00
/*
* ensure that one and only one of the flags is set
*/
KASSERT(((flags & UVM_LOAN_TOANON) == 0) ^
((flags & UVM_LOAN_TOPAGE) == 0));
KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
1998-03-09 03:58:55 +03:00
/*
* "output" is a pointer to the current place to put the loaned page.
1998-03-09 03:58:55 +03:00
*/
result = v;
1998-03-09 03:58:55 +03:00
output = &result[0]; /* start at the beginning ... */
/*
* while we've got pages to do
*/
while (len > 0) {
/*
* fill in params for a call to uvmfault_lookup
*/
ufi.orig_map = map;
ufi.orig_rvaddr = start;
ufi.orig_size = len;
2001-05-25 08:06:11 +04:00
1998-03-09 03:58:55 +03:00
/*
* do the lookup, the only time this will fail is if we hit on
* an unmapped region (an error)
*/
if (!uvmfault_lookup(&ufi, FALSE)) {
error = ENOENT;
1998-03-09 03:58:55 +03:00
goto fail;
}
1998-03-09 03:58:55 +03:00
/*
* map now locked. now do the loanout...
1998-03-09 03:58:55 +03:00
*/
1998-03-09 03:58:55 +03:00
rv = uvm_loanentry(&ufi, &output, flags);
if (rv < 0) {
/* all unlocked due to error */
error = EINVAL;
1998-03-09 03:58:55 +03:00
goto fail;
}
1998-03-09 03:58:55 +03:00
/*
* done! the map is unlocked. advance, if possible.
*
* XXXCDC: could be recoded to hold the map lock with
* smarter code (but it only happens on map entry
* boundaries, so it isn't that bad).
1998-03-09 03:58:55 +03:00
*/
if (rv) {
rv <<= PAGE_SHIFT;
len -= rv;
start += rv;
}
1998-03-09 03:58:55 +03:00
}
return 0;
fail:
1998-03-09 03:58:55 +03:00
/*
* failed to complete loans. drop any loans and return failure code.
* map is already unlocked.
1998-03-09 03:58:55 +03:00
*/
1998-03-09 03:58:55 +03:00
if (output - result) {
if (flags & UVM_LOAN_TOANON) {
1998-03-09 03:58:55 +03:00
uvm_unloananon((struct vm_anon **)result,
output - result);
} else {
1998-03-09 03:58:55 +03:00
uvm_unloanpage((struct vm_page **)result,
output - result);
}
1998-03-09 03:58:55 +03:00
}
return (error);
}
/*
* uvm_loananon: loan a page from an anon out
2001-05-25 08:06:11 +04:00
*
* => called with map, amap, uobj locked
* => return value:
* -1 = fatal error, everything is unlocked, abort.
* 0 = lookup in ufi went stale, everything unlocked, relookup and
* try again
* 1 = got it, everything still locked
*/
1998-03-09 03:58:55 +03:00
int
uvm_loananon(ufi, output, flags, anon)
struct uvm_faultinfo *ufi;
void ***output;
int flags;
struct vm_anon *anon;
{
1998-03-09 03:58:55 +03:00
struct vm_page *pg;
int error;
1998-03-09 03:58:55 +03:00
/*
* if we are loaning to "another" anon then it is easy, we just
1998-03-09 03:58:55 +03:00
* bump the reference count on the current anon and return a
* pointer to it (it becomes copy-on-write shared).
1998-03-09 03:58:55 +03:00
*/
1998-03-09 03:58:55 +03:00
if (flags & UVM_LOAN_TOANON) {
simple_lock(&anon->an_lock);
pg = anon->u.an_page;
if (pg && (pg->pqflags & PQ_ANON) != 0 && anon->an_ref == 1) {
pmap_page_protect(pg, VM_PROT_READ);
}
1998-03-09 03:58:55 +03:00
anon->an_ref++;
**output = anon;
(*output)++;
1998-03-09 03:58:55 +03:00
simple_unlock(&anon->an_lock);
return (1);
1998-03-09 03:58:55 +03:00
}
/*
* we are loaning to a kernel-page. we need to get the page
* resident so we can wire it. uvmfault_anonget will handle
* this for us.
*/
simple_lock(&anon->an_lock);
error = uvmfault_anonget(ufi, ufi->entry->aref.ar_amap, anon);
1998-03-09 03:58:55 +03:00
/*
* if we were unable to get the anon, then uvmfault_anonget has
* unlocked everything and returned an error code.
*/
if (error) {
1998-03-09 03:58:55 +03:00
/* need to refault (i.e. refresh our lookup) ? */
if (error == ERESTART) {
return (0);
}
1998-03-09 03:58:55 +03:00
/* "try again"? sleep a bit and retry ... */
if (error == EAGAIN) {
tsleep(&lbolt, PVM, "loanagain", 0);
return (0);
1998-03-09 03:58:55 +03:00
}
/* otherwise flag it as an error */
return (-1);
1998-03-09 03:58:55 +03:00
}
/*
* we have the page and its owner locked: do the loan now.
*/
pg = anon->u.an_page;
uvm_lock_pageq();
KASSERT(pg->wire_count == 0);
if (pg->loan_count == 0) {
pmap_page_protect(pg, VM_PROT_READ);
}
1998-03-09 03:58:55 +03:00
pg->loan_count++;
uvm_pagedequeue(pg);
1998-03-09 03:58:55 +03:00
uvm_unlock_pageq();
**output = pg;
(*output)++;
1998-03-09 03:58:55 +03:00
/* unlock anon and return success */
if (pg->uobject) /* XXXCDC: what if this is our uobj? bad */
1998-03-09 03:58:55 +03:00
simple_unlock(&pg->uobject->vmobjlock);
simple_unlock(&anon->an_lock);
return (1);
}
/*
* uvm_loanuobjpages: loan pages from a uobj out (O->K)
*
* => called with uobj locked.
* => caller should own the pages.
*/
void
uvm_loanuobjpages(pgpp, npages)
struct vm_page **pgpp;
int npages;
{
int i;
for (i = 0; i < npages; i++) {
struct vm_page *pg = pgpp[i];
KASSERT(pg->uobject != NULL);
KASSERT(!(pg->flags & (PG_RELEASED|PG_PAGEOUT)));
LOCK_ASSERT(simple_lock_held(&pg->uobject->vmobjlock));
KASSERT(pg->flags & PG_BUSY);
uvm_lock_pageq();
if (pg->loan_count == 0) {
pmap_page_protect(pg, VM_PROT_READ);
}
pg->loan_count++;
uvm_pagedequeue(pg);
uvm_unlock_pageq();
if (pg->flags & PG_WANTED) {
wakeup(pg);
}
pg->flags &= ~(PG_WANTED|PG_BUSY);
UVM_PAGE_OWN(pg, NULL);
}
}
/*
* uvm_loanuobj: loan a page from a uobj out
*
* => called with map, amap, uobj locked
* => return value:
* -1 = fatal error, everything is unlocked, abort.
* 0 = lookup in ufi went stale, everything unlocked, relookup and
* try again
* 1 = got it, everything still locked
*/
static int
1998-03-09 03:58:55 +03:00
uvm_loanuobj(ufi, output, flags, va)
struct uvm_faultinfo *ufi;
void ***output;
int flags;
vaddr_t va;
{
1998-03-09 03:58:55 +03:00
struct vm_amap *amap = ufi->entry->aref.ar_amap;
struct uvm_object *uobj = ufi->entry->object.uvm_obj;
struct vm_page *pg;
struct vm_anon *anon;
int error, npages;
1998-03-09 03:58:55 +03:00
boolean_t locked;
/*
* first we must make sure the page is resident.
*
* XXXCDC: duplicate code with uvm_fault().
*/
if (uobj->pgops->pgo_get) { /* try locked pgo_get */
1998-03-09 03:58:55 +03:00
npages = 1;
pg = NULL;
error = (*uobj->pgops->pgo_get)(uobj,
va - ufi->entry->start + ufi->entry->offset,
1998-03-09 03:58:55 +03:00
&pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_LOCKED);
} else {
error = EIO; /* must have pgo_get op */
1998-03-09 03:58:55 +03:00
}
/*
* check the result of the locked pgo_get. if there is a problem,
* then we fail the loan.
*/
if (error && error != EBUSY) {
1998-03-09 03:58:55 +03:00
uvmfault_unlockall(ufi, amap, uobj, NULL);
return (-1);
1998-03-09 03:58:55 +03:00
}
/*
* if we need to unlock for I/O, do so now.
*/
if (error == EBUSY) {
1998-03-09 03:58:55 +03:00
uvmfault_unlockall(ufi, amap, NULL, NULL);
1998-03-09 03:58:55 +03:00
/* locked: uobj */
npages = 1;
error = (*uobj->pgops->pgo_get)(uobj,
va - ufi->entry->start + ufi->entry->offset,
&pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_SYNCIO);
1998-03-09 03:58:55 +03:00
/* locked: <nothing> */
2001-05-25 08:06:11 +04:00
if (error) {
if (error == EAGAIN) {
tsleep(&lbolt, PVM, "fltagain2", 0);
return (0);
2001-05-25 08:06:11 +04:00
}
return (-1);
1998-03-09 03:58:55 +03:00
}
/*
* pgo_get was a success. attempt to relock everything.
*/
locked = uvmfault_relock(ufi);
if (locked && amap)
amap_lock(amap);
1998-03-09 03:58:55 +03:00
simple_lock(&uobj->vmobjlock);
/*
* verify that the page has not be released and re-verify
* that amap slot is still free. if there is a problem we
* drop our lock (thus force a lookup refresh/retry).
*/
2001-05-25 08:06:11 +04:00
1998-03-09 03:58:55 +03:00
if ((pg->flags & PG_RELEASED) != 0 ||
(locked && amap && amap_lookup(&ufi->entry->aref,
ufi->orig_rvaddr - ufi->entry->start))) {
1998-03-09 03:58:55 +03:00
if (locked)
uvmfault_unlockall(ufi, amap, NULL, NULL);
locked = FALSE;
2001-05-25 08:06:11 +04:00
}
1998-03-09 03:58:55 +03:00
/*
* didn't get the lock? release the page and retry.
*/
if (locked == FALSE) {
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
if (pg->flags & PG_WANTED) {
wakeup(pg);
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
}
1998-03-09 03:58:55 +03:00
if (pg->flags & PG_RELEASED) {
uvm_lock_pageq();
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
uvm_pagefree(pg);
uvm_unlock_pageq();
1998-03-09 03:58:55 +03:00
return (0);
}
uvm_lock_pageq();
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
uvm_pageactivate(pg);
1998-03-09 03:58:55 +03:00
uvm_unlock_pageq();
pg->flags &= ~(PG_BUSY|PG_WANTED);
UVM_PAGE_OWN(pg, NULL);
simple_unlock(&uobj->vmobjlock);
return (0);
}
}
/*
* at this point we have the page we want ("pg") marked PG_BUSY for us
* and we have all data structures locked. do the loanout. page can
1998-03-09 03:58:55 +03:00
* not be PG_RELEASED (we caught this above).
*/
if ((flags & UVM_LOAN_TOANON) == 0) {
uvm_loanuobjpages(&pg, 1);
**output = pg;
(*output)++;
return (1);
1998-03-09 03:58:55 +03:00
}
/*
* must be a loan to an anon. check to see if there is already
* an anon associated with this page. if so, then just return
2001-05-25 08:06:11 +04:00
* a reference to this object. the page should already be
1998-03-09 03:58:55 +03:00
* mapped read-only because it is already on loan.
*/
if (pg->uanon) {
anon = pg->uanon;
simple_lock(&anon->an_lock);
anon->an_ref++;
simple_unlock(&anon->an_lock);
if (pg->flags & PG_WANTED) {
wakeup(pg);
}
1998-03-09 03:58:55 +03:00
pg->flags &= ~(PG_WANTED|PG_BUSY);
UVM_PAGE_OWN(pg, NULL);
**output = anon;
(*output)++;
return (1);
1998-03-09 03:58:55 +03:00
}
2001-05-25 08:06:11 +04:00
1998-03-09 03:58:55 +03:00
/*
* need to allocate a new anon
*/
anon = uvm_analloc();
if (anon == NULL) {
if (pg->flags & PG_WANTED) {
wakeup(pg);
}
1998-03-09 03:58:55 +03:00
pg->flags &= ~(PG_WANTED|PG_BUSY);
UVM_PAGE_OWN(pg, NULL);
uvmfault_unlockall(ufi, amap, uobj, NULL);
return (-1);
1998-03-09 03:58:55 +03:00
}
anon->u.an_page = pg;
pg->uanon = anon;
uvm_lock_pageq();
if (pg->loan_count == 0) {
pmap_page_protect(pg, VM_PROT_READ);
}
1998-03-09 03:58:55 +03:00
pg->loan_count++;
uvm_pageactivate(pg);
uvm_unlock_pageq();
if (pg->flags & PG_WANTED) {
wakeup(pg);
}
1998-03-09 03:58:55 +03:00
pg->flags &= ~(PG_WANTED|PG_BUSY);
UVM_PAGE_OWN(pg, NULL);
simple_unlock(&anon->an_lock);
**output = anon;
(*output)++;
return (1);
}
/*
* uvm_loanzero: loan a zero-fill page out
*
* => called with map, amap, uobj locked
* => return value:
* -1 = fatal error, everything is unlocked, abort.
* 0 = lookup in ufi went stale, everything unlocked, relookup and
* try again
* 1 = got it, everything still locked
*/
static struct uvm_object uvm_loanzero_object;
static int
1998-03-09 03:58:55 +03:00
uvm_loanzero(ufi, output, flags)
struct uvm_faultinfo *ufi;
void ***output;
int flags;
{
1998-03-09 03:58:55 +03:00
struct vm_anon *anon;
struct vm_page *pg;
struct uvm_object *uobj = ufi->entry->object.uvm_obj;
struct vm_amap *amap = ufi->entry->aref.ar_amap;
1998-03-09 03:58:55 +03:00
simple_lock(&uvm_loanzero_object.vmobjlock);
/*
* first, get ahold of our single zero page.
*/
if (__predict_false((pg =
TAILQ_FIRST(&uvm_loanzero_object.memq)) == NULL)) {
while ((pg = uvm_pagealloc(&uvm_loanzero_object, 0, NULL,
UVM_PGA_ZERO)) == NULL) {
simple_unlock(&uvm_loanzero_object.vmobjlock);
uvmfault_unlockall(ufi, amap, uobj, NULL);
uvm_wait("loanzero");
if (!uvmfault_relock(ufi)) {
return (0);
}
if (amap) {
amap_lock(amap);
}
if (uobj) {
simple_lock(&uobj->vmobjlock);
}
simple_lock(&uvm_loanzero_object.vmobjlock);
1998-03-09 03:58:55 +03:00
}
2001-05-25 08:06:11 +04:00
/* got a zero'd page. */
pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE);
pg->flags |= PG_RDONLY;
1998-03-09 03:58:55 +03:00
UVM_PAGE_OWN(pg, NULL);
}
if ((flags & UVM_LOAN_TOANON) == 0) { /* loaning to kernel-page */
uvm_lock_pageq();
pg->loan_count++;
uvm_unlock_pageq();
simple_unlock(&uvm_loanzero_object.vmobjlock);
1998-03-09 03:58:55 +03:00
**output = pg;
(*output)++;
return (1);
1998-03-09 03:58:55 +03:00
}
/*
* loaning to an anon. check to see if there is already an anon
* associated with this page. if so, then just return a reference
* to this object.
*/
if (pg->uanon) {
anon = pg->uanon;
simple_lock(&anon->an_lock);
anon->an_ref++;
simple_unlock(&anon->an_lock);
simple_unlock(&uvm_loanzero_object.vmobjlock);
**output = anon;
(*output)++;
return (1);
}
1998-03-09 03:58:55 +03:00
/*
* need to allocate a new anon
*/
1998-03-09 03:58:55 +03:00
anon = uvm_analloc();
if (anon == NULL) {
/* out of swap causes us to fail */
simple_unlock(&uvm_loanzero_object.vmobjlock);
uvmfault_unlockall(ufi, amap, uobj, NULL);
return (-1);
1998-03-09 03:58:55 +03:00
}
anon->u.an_page = pg;
pg->uanon = anon;
1998-03-09 03:58:55 +03:00
uvm_lock_pageq();
pg->loan_count++;
1998-03-09 03:58:55 +03:00
uvm_pageactivate(pg);
uvm_unlock_pageq();
simple_unlock(&uvm_loanzero_object.vmobjlock);
1998-03-09 03:58:55 +03:00
**output = anon;
(*output)++;
return (1);
}
/*
* uvm_unloananon: kill loans on anons (basically a normal ref drop)
*
* => we expect all our resources to be unlocked
*/
static void
uvm_unloananon(aloans, nanons)
1998-03-09 03:58:55 +03:00
struct vm_anon **aloans;
int nanons;
{
1998-03-09 03:58:55 +03:00
struct vm_anon *anon;
1998-03-09 03:58:55 +03:00
while (nanons-- > 0) {
int refs;
1998-03-09 03:58:55 +03:00
anon = *aloans++;
simple_lock(&anon->an_lock);
refs = --anon->an_ref;
simple_unlock(&anon->an_lock);
if (refs == 0) {
uvm_anfree(anon);
1998-03-09 03:58:55 +03:00
}
}
}
/*
* uvm_unloanpage: kill loans on pages loaned out to the kernel
*
* => we expect all our resources to be unlocked
*/
static void
uvm_unloanpage(ploans, npages)
1998-03-09 03:58:55 +03:00
struct vm_page **ploans;
int npages;
{
1998-03-09 03:58:55 +03:00
struct vm_page *pg;
struct simplelock *slock;
1998-03-09 03:58:55 +03:00
uvm_lock_pageq();
while (npages-- > 0) {
pg = *ploans++;
1998-03-09 03:58:55 +03:00
/*
* do a little dance to acquire the object or anon lock
* as appropriate. we are locking in the wrong order,
* so we have to do a try-lock here.
*/
slock = NULL;
while (pg->uobject != NULL || pg->uanon != NULL) {
if (pg->uobject != NULL) {
slock = &pg->uobject->vmobjlock;
} else {
slock = &pg->uanon->an_lock;
}
if (simple_lock_try(slock)) {
break;
}
uvm_unlock_pageq();
uvm_lock_pageq();
slock = NULL;
}
/*
* drop our loan. if page is owned by an anon but
* PQ_ANON is not set, the page was loaned to the anon
* from an object which dropped ownership, so resolve
* this by turning the anon's loan into real ownership
* (ie. decrement loan_count again and set PQ_ANON).
* after all this, if there are no loans left, put the
* page back a paging queue (if the page is owned by
* an anon) or free it (if the page is now unowned).
1998-03-09 03:58:55 +03:00
*/
KASSERT(pg->loan_count > 0);
pg->loan_count--;
if (pg->uobject == NULL && pg->uanon != NULL &&
(pg->pqflags & PQ_ANON) == 0) {
KASSERT(pg->loan_count > 0);
pg->loan_count--;
pg->pqflags |= PQ_ANON;
}
if (pg->loan_count == 0) {
if (pg->uobject == NULL && pg->uanon == NULL) {
KASSERT((pg->flags & PG_BUSY) == 0);
uvm_pagefree(pg);
} else {
uvm_pageactivate(pg);
}
} else if (pg->loan_count == 1 && pg->uobject != NULL &&
pg->uanon != NULL) {
uvm_pageactivate(pg);
}
if (slock != NULL) {
simple_unlock(slock);
1998-03-09 03:58:55 +03:00
}
}
uvm_unlock_pageq();
}
/*
* uvm_unloan: kill loans on pages or anons.
*/
void
uvm_unloan(void *v, int npages, int flags)
{
if (flags & UVM_LOAN_TOANON) {
uvm_unloananon(v, npages);
} else {
uvm_unloanpage(v, npages);
}
}
/*
* Minimal pager for uvm_loanzero_object. We need to provide a "put"
* method, because the page can end up on a paging queue, and the
* page daemon will want to call pgo_put when it encounters the page
* on the inactive list.
*/
static int
ulz_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
{
struct vm_page *pg;
KDASSERT(uobj == &uvm_loanzero_object);
/*
* Don't need to do any work here if we're not freeing pages.
*/
if ((flags & PGO_FREE) == 0) {
simple_unlock(&uobj->vmobjlock);
return 0;
}
/*
* we don't actually want to ever free the uvm_loanzero_page, so
* just reactivate or dequeue it.
*/
pg = TAILQ_FIRST(&uobj->memq);
KASSERT(pg != NULL);
KASSERT(TAILQ_NEXT(pg, listq) == NULL);
uvm_lock_pageq();
if (pg->uanon)
uvm_pageactivate(pg);
else
uvm_pagedequeue(pg);
uvm_unlock_pageq();
simple_unlock(&uobj->vmobjlock);
return 0;
}
static struct uvm_pagerops ulz_pager = {
NULL, /* init */
NULL, /* reference */
NULL, /* detach */
NULL, /* fault */
NULL, /* get */
ulz_put, /* put */
};
/*
* uvm_loan_init(): initialize the uvm_loan() facility.
*/
void
uvm_loan_init(void)
{
simple_lock_init(&uvm_loanzero_object.vmobjlock);
TAILQ_INIT(&uvm_loanzero_object.memq);
uvm_loanzero_object.pgops = &ulz_pager;
}
/*
* uvm_loanbreak: break loan on a uobj page
*
* => called with uobj locked
* => the page should be busy
* => return value:
* newly allocated page if succeeded
*/
struct vm_page *
uvm_loanbreak(struct vm_page *uobjpage)
{
struct vm_page *pg;
struct uvm_object *uobj = uobjpage->uobject;
voff_t offset;
KASSERT(uobj != NULL);
LOCK_ASSERT(simple_lock_held(&uobj->vmobjlock));
KASSERT(uobjpage->flags & PG_BUSY);
/* alloc new un-owned page */
pg = uvm_pagealloc(NULL, 0, NULL, 0);
if (pg == NULL)
return NULL;
/*
* copy the data from the old page to the new
* one and clear the fake/clean flags on the
* new page (keep it busy). force a reload
* of the old page by clearing it from all
* pmaps. then lock the page queues to
* rename the pages.
*/
uvm_pagecopy(uobjpage, pg); /* old -> new */
pg->flags &= ~(PG_FAKE|PG_CLEAN);
pmap_page_protect(uobjpage, VM_PROT_NONE);
if (uobjpage->flags & PG_WANTED)
wakeup(uobjpage);
/* uobj still locked */
uobjpage->flags &= ~(PG_WANTED|PG_BUSY);
UVM_PAGE_OWN(uobjpage, NULL);
uvm_lock_pageq();
offset = uobjpage->offset;
uvm_pagerealloc(uobjpage, NULL, 0);
/*
* if the page is no longer referenced by
* an anon (i.e. we are breaking an O->K
* loan), then remove it from any pageq's.
*/
if (uobjpage->uanon == NULL)
uvm_pagedequeue(uobjpage);
/*
* at this point we have absolutely no
* control over uobjpage
*/
/* install new page */
uvm_pageactivate(pg);
uvm_pagerealloc(pg, uobj, offset);
uvm_unlock_pageq();
/*
* done! loan is broken and "pg" is
* PG_BUSY. it can now replace uobjpage.
*/
return pg;
}