NetBSD/sys/uvm/uvm_pdaemon.c

1042 lines
23 KiB
C
Raw Normal View History

/* $NetBSD: uvm_pdaemon.c,v 1.97 2008/12/13 11:26:57 ad Exp $ */
2001-05-25 08:06:11 +04:00
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
2001-05-25 08:06:11 +04:00
* Copyright (c) 1991, 1993, The Regents of the University of California.
*
* All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* The Mach Operating System project at Carnegie-Mellon University.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Charles D. Cranor,
2001-05-25 08:06:11 +04:00
* Washington University, the University of California, Berkeley and
* its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94
1998-02-07 14:07:38 +03:00
* from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp
*
*
* Copyright (c) 1987, 1990 Carnegie-Mellon University.
* All rights reserved.
2001-05-25 08:06:11 +04:00
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
2001-05-25 08:06:11 +04:00
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
2001-05-25 08:06:11 +04:00
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* uvm_pdaemon.c: the page daemon
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvm_pdaemon.c,v 1.97 2008/12/13 11:26:57 ad Exp $");
#include "opt_uvmhist.h"
2005-11-29 18:45:28 +03:00
#include "opt_readahead.h"
#include <sys/param.h>
#include <sys/proc.h>
#include <sys/systm.h>
#include <sys/kernel.h>
1998-07-24 00:36:09 +04:00
#include <sys/pool.h>
#include <sys/buf.h>
#include <sys/module.h>
#include <sys/atomic.h>
#include <uvm/uvm.h>
#include <uvm/uvm_pdpolicy.h>
/*
2002-01-21 17:42:26 +03:00
* UVMPD_NUMDIRTYREACTS is how many dirty pages the pagedaemon will reactivate
* in a pass thru the inactive list when swap is full. the value should be
* "small"... if it's too large we'll cycle the active pages thru the inactive
* queue too quickly to for them to be referenced and avoid being freed.
*/
2008-01-02 14:48:20 +03:00
#define UVMPD_NUMDIRTYREACTS 16
2008-01-02 14:48:20 +03:00
#define UVMPD_NUMTRYLOCKOWNER 16
/*
* local prototypes
*/
2005-06-27 06:19:48 +04:00
static void uvmpd_scan(void);
static void uvmpd_scan_queue(void);
2005-06-27 06:19:48 +04:00
static void uvmpd_tune(void);
2008-01-02 14:48:20 +03:00
unsigned int uvm_pagedaemon_waiters;
/*
* XXX hack to avoid hangs when large processes fork.
*/
u_int uvm_extrapages;
/*
* uvm_wait: wait (sleep) for the page daemon to free some pages
*
* => should be called with all locks released
* => should _not_ be called by the page daemon (to avoid deadlock)
*/
1999-11-05 00:51:42 +03:00
void
2005-06-27 06:19:48 +04:00
uvm_wait(const char *wmsg)
1998-03-09 03:58:55 +03:00
{
int timo = 0;
2008-01-02 14:48:20 +03:00
mutex_spin_enter(&uvm_fpageqlock);
1998-03-09 03:58:55 +03:00
/*
* check for page daemon going to sleep (waiting for itself)
*/
if (curlwp == uvm.pagedaemon_lwp && uvmexp.paging == 0) {
1998-03-09 03:58:55 +03:00
/*
* now we have a problem: the pagedaemon wants to go to
* sleep until it frees more memory. but how can it
* free more memory if it is asleep? that is a deadlock.
* we have two options:
* [1] panic now
* [2] put a timeout on the sleep, thus causing the
* pagedaemon to only pause (rather than sleep forever)
*
* note that option [2] will only help us if we get lucky
* and some other process on the system breaks the deadlock
* by exiting or freeing memory (thus allowing the pagedaemon
* to continue). for now we panic if DEBUG is defined,
* otherwise we hope for the best with option [2] (better
* yet, this should never happen in the first place!).
*/
printf("pagedaemon: deadlock detected!\n");
timo = hz >> 3; /* set timeout */
#if defined(DEBUG)
1998-03-09 03:58:55 +03:00
/* DEBUG: panic so we can debug it */
panic("pagedaemon deadlock");
#endif
1998-03-09 03:58:55 +03:00
}
2008-01-02 14:48:20 +03:00
uvm_pagedaemon_waiters++;
wakeup(&uvm.pagedaemon); /* wake the daemon! */
2008-01-02 14:48:20 +03:00
UVM_UNLOCK_AND_WAIT(&uvmexp.free, &uvm_fpageqlock, false, wmsg, timo);
}
/*
* uvm_kick_pdaemon: perform checks to determine if we need to
* give the pagedaemon a nudge, and do so if necessary.
2008-01-02 14:48:20 +03:00
*
* => called with uvm_fpageqlock held.
*/
void
uvm_kick_pdaemon(void)
{
2008-01-02 14:48:20 +03:00
KASSERT(mutex_owned(&uvm_fpageqlock));
if (uvmexp.free + uvmexp.paging < uvmexp.freemin ||
(uvmexp.free + uvmexp.paging < uvmexp.freetarg &&
uvmpdpol_needsscan_p())) {
wakeup(&uvm.pagedaemon);
}
}
/*
* uvmpd_tune: tune paging parameters
*
* => called when ever memory is added (or removed?) to the system
* => caller must call with page queues locked
*/
2005-06-27 06:19:48 +04:00
static void
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
uvmpd_tune(void)
{
int val;
1998-03-09 03:58:55 +03:00
UVMHIST_FUNC("uvmpd_tune"); UVMHIST_CALLED(pdhist);
/*
* try to keep 0.5% of available RAM free, but limit to between
* 128k and 1024k per-CPU. XXX: what are these values good for?
*/
val = uvmexp.npages / 200;
val = MAX(val, (128*1024) >> PAGE_SHIFT);
val = MIN(val, (1024*1024) >> PAGE_SHIFT);
val *= ncpu;
/* Make sure there's always a user page free. */
if (val < uvmexp.reserve_kernel + 1)
val = uvmexp.reserve_kernel + 1;
uvmexp.freemin = val;
/* Calculate free target. */
val = (uvmexp.freemin * 4) / 3;
if (val <= uvmexp.freemin)
val = uvmexp.freemin + 1;
uvmexp.freetarg = val + atomic_swap_uint(&uvm_extrapages, 0);
1998-03-09 03:58:55 +03:00
uvmexp.wiredmax = uvmexp.npages / 3;
UVMHIST_LOG(pdhist, "<- done, freemin=%d, freetarg=%d, wiredmax=%d",
uvmexp.freemin, uvmexp.freetarg, uvmexp.wiredmax, 0);
}
/*
* uvm_pageout: the main loop for the pagedaemon
*/
1998-03-09 03:58:55 +03:00
void
uvm_pageout(void *arg)
{
int bufcnt, npages = 0;
int extrapages = 0;
struct pool *pp;
uint64_t where;
1998-03-09 03:58:55 +03:00
UVMHIST_FUNC("uvm_pageout"); UVMHIST_CALLED(pdhist);
1998-03-09 03:58:55 +03:00
UVMHIST_LOG(pdhist,"<starting uvm pagedaemon>", 0, 0, 0, 0);
/*
* ensure correct priority and set paging parameters...
*/
uvm.pagedaemon_lwp = curlwp;
2008-01-02 14:48:20 +03:00
mutex_enter(&uvm_pageqlock);
1998-03-09 03:58:55 +03:00
npages = uvmexp.npages;
uvmpd_tune();
2008-01-02 14:48:20 +03:00
mutex_exit(&uvm_pageqlock);
1998-03-09 03:58:55 +03:00
/*
* main loop
*/
for (;;) {
bool needsscan, needsfree;
1998-03-09 03:58:55 +03:00
2008-01-02 14:48:20 +03:00
mutex_spin_enter(&uvm_fpageqlock);
if (uvm_pagedaemon_waiters == 0 || uvmexp.paging > 0) {
UVMHIST_LOG(pdhist," <<SLEEPING>>",0,0,0,0);
UVM_UNLOCK_AND_WAIT(&uvm.pagedaemon,
&uvm_fpageqlock, false, "pgdaemon", 0);
uvmexp.pdwoke++;
UVMHIST_LOG(pdhist," <<WOKE UP>>",0,0,0,0);
} else {
mutex_spin_exit(&uvm_fpageqlock);
}
1998-03-09 03:58:55 +03:00
/*
* now lock page queues and recompute inactive count
*/
2008-01-02 14:48:20 +03:00
mutex_enter(&uvm_pageqlock);
if (npages != uvmexp.npages || extrapages != uvm_extrapages) {
1998-03-09 03:58:55 +03:00
npages = uvmexp.npages;
extrapages = uvm_extrapages;
2008-01-02 14:48:20 +03:00
mutex_spin_enter(&uvm_fpageqlock);
1998-03-09 03:58:55 +03:00
uvmpd_tune();
2008-01-02 14:48:20 +03:00
mutex_spin_exit(&uvm_fpageqlock);
1998-03-09 03:58:55 +03:00
}
uvmpdpol_tune();
1998-03-09 03:58:55 +03:00
/*
* Estimate a hint. Note that bufmem are returned to
* system only when entire pool page is empty.
*/
2008-01-02 14:48:20 +03:00
mutex_spin_enter(&uvm_fpageqlock);
bufcnt = uvmexp.freetarg - uvmexp.free;
if (bufcnt < 0)
bufcnt = 0;
UVMHIST_LOG(pdhist," free/ftarg=%d/%d",
uvmexp.free, uvmexp.freetarg, 0,0);
1998-03-09 03:58:55 +03:00
needsfree = uvmexp.free + uvmexp.paging < uvmexp.freetarg;
needsscan = needsfree || uvmpdpol_needsscan_p();
2008-01-02 14:48:20 +03:00
1998-03-09 03:58:55 +03:00
/*
* scan if needed
*/
if (needsscan) {
mutex_spin_exit(&uvm_fpageqlock);
1998-03-09 03:58:55 +03:00
uvmpd_scan();
mutex_spin_enter(&uvm_fpageqlock);
}
/*
* if there's any free memory to be had,
* wake up any waiters.
*/
if (uvmexp.free > uvmexp.reserve_kernel ||
uvmexp.paging == 0) {
wakeup(&uvmexp.free);
2008-01-02 14:48:20 +03:00
uvm_pagedaemon_waiters = 0;
}
2008-01-02 14:48:20 +03:00
mutex_spin_exit(&uvm_fpageqlock);
1998-03-09 03:58:55 +03:00
/*
* scan done. unlock page queues (the only lock we are holding)
1998-03-09 03:58:55 +03:00
*/
2008-01-02 14:48:20 +03:00
mutex_exit(&uvm_pageqlock);
/*
* if we don't need free memory, we're done.
*/
if (!needsfree)
continue;
/*
* start draining pool resources now that we're not
* holding any locks.
*/
pool_drain_start(&pp, &where);
/*
* kill unused metadata buffers.
*/
2008-01-02 14:48:20 +03:00
mutex_enter(&bufcache_lock);
buf_drain(bufcnt << PAGE_SHIFT);
2008-01-02 14:48:20 +03:00
mutex_exit(&bufcache_lock);
/*
* complete draining the pools.
*/
pool_drain_end(pp, where);
}
/*NOTREACHED*/
}
/*
* uvm_aiodone_worker: a workqueue callback for the aiodone daemon.
*/
void
uvm_aiodone_worker(struct work *wk, void *dummy)
{
struct buf *bp = (void *)wk;
KASSERT(&bp->b_work == wk);
/*
* process an i/o that's done.
*/
(*bp->b_iodone)(bp);
2008-01-02 14:48:20 +03:00
}
void
uvm_pageout_start(int npages)
{
mutex_spin_enter(&uvm_fpageqlock);
uvmexp.paging += npages;
mutex_spin_exit(&uvm_fpageqlock);
}
void
uvm_pageout_done(int npages)
{
mutex_spin_enter(&uvm_fpageqlock);
KASSERT(uvmexp.paging >= npages);
uvmexp.paging -= npages;
/*
* wake up either of pagedaemon or LWPs waiting for it.
*/
if (uvmexp.free <= uvmexp.reserve_kernel) {
wakeup(&uvm.pagedaemon);
} else {
wakeup(&uvmexp.free);
2008-01-02 14:48:20 +03:00
uvm_pagedaemon_waiters = 0;
1998-03-09 03:58:55 +03:00
}
2008-01-02 14:48:20 +03:00
mutex_spin_exit(&uvm_fpageqlock);
}
/*
* uvmpd_trylockowner: trylock the page's owner.
*
* => called with pageq locked.
* => resolve orphaned O->A loaned page.
2008-01-02 14:48:20 +03:00
* => return the locked mutex on success. otherwise, return NULL.
*/
2008-01-02 14:48:20 +03:00
kmutex_t *
uvmpd_trylockowner(struct vm_page *pg)
{
struct uvm_object *uobj = pg->uobject;
2008-01-02 14:48:20 +03:00
kmutex_t *slock;
KASSERT(mutex_owned(&uvm_pageqlock));
if (uobj != NULL) {
slock = &uobj->vmobjlock;
} else {
struct vm_anon *anon = pg->uanon;
KASSERT(anon != NULL);
slock = &anon->an_lock;
}
2008-01-02 14:48:20 +03:00
if (!mutex_tryenter(slock)) {
return NULL;
}
if (uobj == NULL) {
/*
* set PQ_ANON if it isn't set already.
*/
if ((pg->pqflags & PQ_ANON) == 0) {
KASSERT(pg->loan_count > 0);
pg->loan_count--;
pg->pqflags |= PQ_ANON;
/* anon now owns it */
}
}
return slock;
}
2006-02-12 12:19:59 +03:00
#if defined(VMSWAP)
struct swapcluster {
int swc_slot;
int swc_nallocated;
int swc_nused;
struct vm_page *swc_pages[howmany(MAXPHYS, MIN_PAGE_SIZE)];
2006-02-12 12:19:59 +03:00
};
static void
swapcluster_init(struct swapcluster *swc)
{
swc->swc_slot = 0;
2008-01-02 14:48:20 +03:00
swc->swc_nused = 0;
2006-02-12 12:19:59 +03:00
}
static int
swapcluster_allocslots(struct swapcluster *swc)
{
int slot;
int npages;
if (swc->swc_slot != 0) {
return 0;
}
/* Even with strange MAXPHYS, the shift
implicitly rounds down to a page. */
npages = MAXPHYS >> PAGE_SHIFT;
2007-02-22 09:05:00 +03:00
slot = uvm_swap_alloc(&npages, true);
2006-02-12 12:19:59 +03:00
if (slot == 0) {
return ENOMEM;
}
swc->swc_slot = slot;
swc->swc_nallocated = npages;
swc->swc_nused = 0;
return 0;
}
static int
swapcluster_add(struct swapcluster *swc, struct vm_page *pg)
{
int slot;
struct uvm_object *uobj;
KASSERT(swc->swc_slot != 0);
KASSERT(swc->swc_nused < swc->swc_nallocated);
KASSERT((pg->pqflags & PQ_SWAPBACKED) != 0);
slot = swc->swc_slot + swc->swc_nused;
uobj = pg->uobject;
if (uobj == NULL) {
2008-01-02 14:48:20 +03:00
KASSERT(mutex_owned(&pg->uanon->an_lock));
2006-02-12 12:19:59 +03:00
pg->uanon->an_swslot = slot;
} else {
int result;
2008-01-02 14:48:20 +03:00
KASSERT(mutex_owned(&uobj->vmobjlock));
2006-02-12 12:19:59 +03:00
result = uao_set_swslot(uobj, pg->offset >> PAGE_SHIFT, slot);
if (result == -1) {
return ENOMEM;
}
}
swc->swc_pages[swc->swc_nused] = pg;
swc->swc_nused++;
return 0;
}
static void
swapcluster_flush(struct swapcluster *swc, bool now)
2006-02-12 12:19:59 +03:00
{
int slot;
int nused;
int nallocated;
int error;
if (swc->swc_slot == 0) {
return;
}
KASSERT(swc->swc_nused <= swc->swc_nallocated);
slot = swc->swc_slot;
nused = swc->swc_nused;
nallocated = swc->swc_nallocated;
/*
* if this is the final pageout we could have a few
* unused swap blocks. if so, free them now.
*/
if (nused < nallocated) {
if (!now) {
return;
}
uvm_swap_free(slot + nused, nallocated - nused);
}
/*
* now start the pageout.
*/
if (nused > 0) {
uvmexp.pdpageouts++;
uvm_pageout_start(nused);
error = uvm_swap_put(slot, swc->swc_pages, nused, 0);
KASSERT(error == 0 || error == ENOMEM);
}
2006-02-12 12:19:59 +03:00
/*
* zero swslot to indicate that we are
* no longer building a swap-backed cluster.
*/
swc->swc_slot = 0;
2008-01-02 14:48:20 +03:00
swc->swc_nused = 0;
}
static int
swapcluster_nused(struct swapcluster *swc)
{
return swc->swc_nused;
2006-02-12 12:19:59 +03:00
}
/*
* uvmpd_dropswap: free any swap allocated to this page.
*
* => called with owner locked.
2007-02-22 09:05:00 +03:00
* => return true if a page had an associated slot.
*/
static bool
uvmpd_dropswap(struct vm_page *pg)
{
2007-02-22 09:05:00 +03:00
bool result = false;
struct vm_anon *anon = pg->uanon;
if ((pg->pqflags & PQ_ANON) && anon->an_swslot) {
uvm_swap_free(anon->an_swslot, 1);
anon->an_swslot = 0;
pg->flags &= ~PG_CLEAN;
2007-02-22 09:05:00 +03:00
result = true;
} else if (pg->pqflags & PQ_AOBJ) {
int slot = uao_set_swslot(pg->uobject,
pg->offset >> PAGE_SHIFT, 0);
if (slot) {
uvm_swap_free(slot, 1);
pg->flags &= ~PG_CLEAN;
2007-02-22 09:05:00 +03:00
result = true;
}
}
return result;
}
/*
* uvmpd_trydropswap: try to free any swap allocated to this page.
*
2007-02-22 09:05:00 +03:00
* => return true if a slot is successfully freed.
*/
bool
uvmpd_trydropswap(struct vm_page *pg)
{
2008-01-02 14:48:20 +03:00
kmutex_t *slock;
bool result;
if ((pg->flags & PG_BUSY) != 0) {
2007-02-22 09:05:00 +03:00
return false;
}
/*
* lock the page's owner.
*/
slock = uvmpd_trylockowner(pg);
if (slock == NULL) {
2007-02-22 09:05:00 +03:00
return false;
}
/*
* skip this page if it's busy.
*/
if ((pg->flags & PG_BUSY) != 0) {
2008-01-02 14:48:20 +03:00
mutex_exit(slock);
2007-02-22 09:05:00 +03:00
return false;
}
result = uvmpd_dropswap(pg);
2008-01-02 14:48:20 +03:00
mutex_exit(slock);
return result;
}
2006-02-12 12:19:59 +03:00
#endif /* defined(VMSWAP) */
/*
* uvmpd_scan_queue: scan an replace candidate list for pages
* to clean or free.
*
* => called with page queues locked
* => we work on meeting our free target by converting inactive pages
* into free pages.
* => we handle the building of swap-backed clusters
*/
2005-06-27 06:19:48 +04:00
static void
uvmpd_scan_queue(void)
1998-03-09 03:58:55 +03:00
{
struct vm_page *p;
1998-03-09 03:58:55 +03:00
struct uvm_object *uobj;
struct vm_anon *anon;
#if defined(VMSWAP)
2006-02-12 12:19:59 +03:00
struct swapcluster swc;
#endif /* defined(VMSWAP) */
int dirtyreacts;
2008-01-02 14:48:20 +03:00
int lockownerfail;
kmutex_t *slock;
UVMHIST_FUNC("uvmpd_scan_queue"); UVMHIST_CALLED(pdhist);
1998-03-09 03:58:55 +03:00
/*
* swslot is non-zero if we are building a swap cluster. we want
* to stay in the loop while we have a page to scan or we have
1998-03-09 03:58:55 +03:00
* a swap-cluster to build.
*/
2006-02-12 12:19:59 +03:00
#if defined(VMSWAP)
swapcluster_init(&swc);
#endif /* defined(VMSWAP) */
dirtyreacts = 0;
2008-01-02 14:48:20 +03:00
lockownerfail = 0;
uvmpdpol_scaninit();
while (/* CONSTCOND */ 1) {
2006-02-12 12:19:59 +03:00
/*
* see if we've met the free target.
*/
2008-01-02 14:48:20 +03:00
if (uvmexp.free + uvmexp.paging
#if defined(VMSWAP)
+ swapcluster_nused(&swc)
#endif /* defined(VMSWAP) */
>= uvmexp.freetarg << 2 ||
2006-02-12 12:19:59 +03:00
dirtyreacts == UVMPD_NUMDIRTYREACTS) {
UVMHIST_LOG(pdhist," met free target: "
"exit loop", 0, 0, 0, 0);
break;
}
p = uvmpdpol_selectvictim();
if (p == NULL) {
break;
}
KASSERT(uvmpdpol_pageisqueued_p(p));
KASSERT(p->wire_count == 0);
2006-02-12 12:19:59 +03:00
/*
* we are below target and have a new page to consider.
2006-02-12 12:19:59 +03:00
*/
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
2006-02-12 12:19:59 +03:00
anon = p->uanon;
uobj = p->uobject;
1998-03-09 03:58:55 +03:00
2006-02-12 12:19:59 +03:00
/*
* first we attempt to lock the object that this page
* belongs to. if our attempt fails we skip on to
* the next page (no harm done). it is important to
* "try" locking the object as we are locking in the
* wrong order (pageq -> object) and we don't want to
* deadlock.
*
* the only time we expect to see an ownerless page
* (i.e. a page with no uobject and !PQ_ANON) is if an
* anon has loaned a page from a uvm_object and the
* uvm_object has dropped the ownership. in that
* case, the anon can "take over" the loaned page
* and make it its own.
*/
slock = uvmpd_trylockowner(p);
if (slock == NULL) {
2008-01-02 14:48:20 +03:00
/*
* yield cpu to make a chance for an LWP holding
* the lock run. otherwise we can busy-loop too long
* if the page queue is filled with a lot of pages
* from few objects.
*/
lockownerfail++;
if (lockownerfail > UVMPD_NUMTRYLOCKOWNER) {
mutex_exit(&uvm_pageqlock);
/* XXX Better than yielding but inadequate. */
kpause("livelock", false, 1, NULL);
mutex_enter(&uvm_pageqlock);
lockownerfail = 0;
}
continue;
}
if (p->flags & PG_BUSY) {
2008-01-02 14:48:20 +03:00
mutex_exit(slock);
uvmexp.pdbusy++;
continue;
}
2006-02-12 12:19:59 +03:00
/* does the page belong to an object? */
if (uobj != NULL) {
uvmexp.pdobscan++;
} else {
#if defined(VMSWAP)
KASSERT(anon != NULL);
uvmexp.pdanscan++;
#else /* defined(VMSWAP) */
2006-02-12 12:19:59 +03:00
panic("%s: anon", __func__);
#endif /* defined(VMSWAP) */
2006-02-12 12:19:59 +03:00
}
1998-03-09 03:58:55 +03:00
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
2006-02-12 12:19:59 +03:00
/*
* we now have the object and the page queues locked.
* if the page is not swap-backed, call the object's
* pager to flush and free the page.
*/
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
2005-11-29 18:45:28 +03:00
#if defined(READAHEAD_STATS)
if ((p->pqflags & PQ_READAHEAD) != 0) {
p->pqflags &= ~PQ_READAHEAD;
2006-02-12 12:19:59 +03:00
uvm_ra_miss.ev_count++;
}
2005-11-29 18:45:28 +03:00
#endif /* defined(READAHEAD_STATS) */
2006-02-12 12:19:59 +03:00
if ((p->pqflags & PQ_SWAPBACKED) == 0) {
KASSERT(uobj != NULL);
2008-01-02 14:48:20 +03:00
mutex_exit(&uvm_pageqlock);
2006-02-12 12:19:59 +03:00
(void) (uobj->pgops->pgo_put)(uobj, p->offset,
p->offset + PAGE_SIZE, PGO_CLEANIT|PGO_FREE);
2008-01-02 14:48:20 +03:00
mutex_enter(&uvm_pageqlock);
2006-02-12 12:19:59 +03:00
continue;
}
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
2006-02-12 12:19:59 +03:00
/*
* the page is swap-backed. remove all the permissions
* from the page so we can sync the modified info
* without any race conditions. if the page is clean
* we can free it now and continue.
*/
2006-02-12 12:19:59 +03:00
pmap_page_protect(p, VM_PROT_NONE);
if ((p->flags & PG_CLEAN) && pmap_clear_modify(p)) {
p->flags &= ~(PG_CLEAN);
}
if (p->flags & PG_CLEAN) {
int slot;
int pageidx;
2006-02-12 12:19:59 +03:00
pageidx = p->offset >> PAGE_SHIFT;
uvm_pagefree(p);
uvmexp.pdfreed++;
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
/*
2006-02-12 12:19:59 +03:00
* for anons, we need to remove the page
* from the anon ourselves. for aobjs,
* pagefree did that for us.
*/
2006-02-12 12:19:59 +03:00
if (anon) {
KASSERT(anon->an_swslot != 0);
anon->an_page = NULL;
slot = anon->an_swslot;
} else {
slot = uao_find_swslot(uobj, pageidx);
1998-03-09 03:58:55 +03:00
}
2008-01-02 14:48:20 +03:00
mutex_exit(slock);
2006-02-12 12:19:59 +03:00
if (slot > 0) {
/* this page is now only in swap. */
mutex_enter(&uvm_swap_data_lock);
2006-02-12 12:19:59 +03:00
KASSERT(uvmexp.swpgonly < uvmexp.swpginuse);
uvmexp.swpgonly++;
mutex_exit(&uvm_swap_data_lock);
}
2006-02-12 12:19:59 +03:00
continue;
}
#if defined(VMSWAP)
2006-02-12 12:19:59 +03:00
/*
* this page is dirty, skip it if we'll have met our
* free target when all the current pageouts complete.
*/
1998-03-09 03:58:55 +03:00
2006-02-12 12:19:59 +03:00
if (uvmexp.free + uvmexp.paging > uvmexp.freetarg << 2) {
2008-01-02 14:48:20 +03:00
mutex_exit(slock);
2006-02-12 12:19:59 +03:00
continue;
}
2006-02-12 12:19:59 +03:00
/*
* free any swap space allocated to the page since
* we'll have to write it again with its new data.
*/
1998-03-09 03:58:55 +03:00
uvmpd_dropswap(p);
1998-03-09 03:58:55 +03:00
/*
2006-02-12 12:19:59 +03:00
* start new swap pageout cluster (if necessary).
*
* if swap is full reactivate this page so that
* we eventually cycle all pages through the
* inactive queue.
1998-03-09 03:58:55 +03:00
*/
2006-02-12 12:19:59 +03:00
if (swapcluster_allocslots(&swc)) {
dirtyreacts++;
uvm_pageactivate(p);
2008-01-02 14:48:20 +03:00
mutex_exit(slock);
2006-02-12 12:19:59 +03:00
continue;
1998-03-09 03:58:55 +03:00
}
/*
2006-02-12 12:19:59 +03:00
* at this point, we're definitely going reuse this
* page. mark the page busy and delayed-free.
* we should remove the page from the page queues
* so we don't ever look at it again.
* adjust counters and such.
1998-03-09 03:58:55 +03:00
*/
2006-02-12 12:19:59 +03:00
p->flags |= PG_BUSY;
UVM_PAGE_OWN(p, "scan_queue");
2006-02-12 12:19:59 +03:00
p->flags |= PG_PAGEOUT;
uvm_pagedequeue(p);
uvmexp.pgswapout++;
2008-01-02 14:48:20 +03:00
mutex_exit(&uvm_pageqlock);
1998-03-09 03:58:55 +03:00
/*
2006-02-12 12:19:59 +03:00
* add the new page to the cluster.
1998-03-09 03:58:55 +03:00
*/
2006-02-12 12:19:59 +03:00
if (swapcluster_add(&swc, p)) {
p->flags &= ~(PG_BUSY|PG_PAGEOUT);
UVM_PAGE_OWN(p, NULL);
2008-01-02 14:48:20 +03:00
mutex_enter(&uvm_pageqlock);
dirtyreacts++;
2006-02-12 12:19:59 +03:00
uvm_pageactivate(p);
2008-01-02 14:48:20 +03:00
mutex_exit(slock);
2006-02-12 12:19:59 +03:00
continue;
}
2008-01-02 14:48:20 +03:00
mutex_exit(slock);
2006-02-12 12:19:59 +03:00
2007-02-22 09:05:00 +03:00
swapcluster_flush(&swc, false);
2008-01-02 14:48:20 +03:00
mutex_enter(&uvm_pageqlock);
2006-02-12 12:19:59 +03:00
1998-03-09 03:58:55 +03:00
/*
* the pageout is in progress. bump counters and set up
* for the next loop.
1998-03-09 03:58:55 +03:00
*/
uvmexp.pdpending++;
#else /* defined(VMSWAP) */
uvm_pageactivate(p);
2008-01-02 14:48:20 +03:00
mutex_exit(slock);
#endif /* defined(VMSWAP) */
}
2006-02-12 12:19:59 +03:00
#if defined(VMSWAP)
2008-01-02 14:48:20 +03:00
mutex_exit(&uvm_pageqlock);
2007-02-22 09:05:00 +03:00
swapcluster_flush(&swc, true);
2008-01-02 14:48:20 +03:00
mutex_enter(&uvm_pageqlock);
2006-02-12 12:19:59 +03:00
#endif /* defined(VMSWAP) */
}
/*
* uvmpd_scan: scan the page queues and attempt to meet our targets.
*
* => called with pageq's locked
*/
2005-06-27 06:19:48 +04:00
static void
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
uvmpd_scan(void)
{
int swap_shortage, pages_freed;
1998-03-09 03:58:55 +03:00
UVMHIST_FUNC("uvmpd_scan"); UVMHIST_CALLED(pdhist);
a whole bunch of changes to improve performance and robustness under load: - remove special treatment of pager_map mappings in pmaps. this is required now, since I've removed the globals that expose the address range. pager_map now uses pmap_kenter_pa() instead of pmap_enter(), so there's no longer any need to special-case it. - eliminate struct uvm_vnode by moving its fields into struct vnode. - rewrite the pageout path. the pager is now responsible for handling the high-level requests instead of only getting control after a bunch of work has already been done on its behalf. this will allow us to UBCify LFS, which needs tighter control over its pages than other filesystems do. writing a page to disk no longer requires making it read-only, which allows us to write wired pages without causing all kinds of havoc. - use a new PG_PAGEOUT flag to indicate that a page should be freed on behalf of the pagedaemon when it's unlocked. this flag is very similar to PG_RELEASED, but unlike PG_RELEASED, PG_PAGEOUT can be cleared if the pageout fails due to eg. an indirect-block buffer being locked. this allows us to remove the "version" field from struct vm_page, and together with shrinking "loan_count" from 32 bits to 16, struct vm_page is now 4 bytes smaller. - no longer use PG_RELEASED for swap-backed pages. if the page is busy because it's being paged out, we can't release the swap slot to be reallocated until that write is complete, but unlike with vnodes we don't keep a count of in-progress writes so there's no good way to know when the write is done. instead, when we need to free a busy swap-backed page, just sleep until we can get it busy ourselves. - implement a fast-path for extending writes which allows us to avoid zeroing new pages. this substantially reduces cpu usage. - encapsulate the data used by the genfs code in a struct genfs_node, which must be the first element of the filesystem-specific vnode data for filesystems which use genfs_{get,put}pages(). - eliminate many of the UVM pagerops, since they aren't needed anymore now that the pager "put" operation is a higher-level operation. - enhance the genfs code to allow NFS to use the genfs_{get,put}pages instead of a modified copy. - clean up struct vnode by removing all the fields that used to be used by the vfs_cluster.c code (which we don't use anymore with UBC). - remove kmem_object and mb_object since they were useless. instead of allocating pages to these objects, we now just allocate pages with no object. such pages are mapped in the kernel until they are freed, so we can use the mapping to find the page to free it. this allows us to remove splvm() protection in several places. The sum of all these changes improves write throughput on my decstation 5000/200 to within 1% of the rate of NetBSD 1.5 and reduces the elapsed time for "make release" of a NetBSD 1.5 source tree on my 128MB pc to 10% less than a 1.5 kernel took.
2001-09-16 00:36:31 +04:00
uvmexp.pdrevs++;
1998-03-09 03:58:55 +03:00
/*
* work on meeting our targets. first we work on our free target
* by converting inactive pages into free pages. then we work on
* meeting our inactive target by converting active pages to
* inactive ones.
1998-03-09 03:58:55 +03:00
*/
UVMHIST_LOG(pdhist, " starting 'free' loop",0,0,0,0);
pages_freed = uvmexp.pdfreed;
uvmpd_scan_queue();
pages_freed = uvmexp.pdfreed - pages_freed;
1998-03-09 03:58:55 +03:00
/*
* detect if we're not going to be able to page anything out
* until we free some swap resources from active pages.
*/
swap_shortage = 0;
if (uvmexp.free < uvmexp.freetarg &&
uvmexp.swpginuse >= uvmexp.swpgavail &&
!uvm_swapisfull() &&
pages_freed == 0) {
swap_shortage = uvmexp.freetarg - uvmexp.free;
}
uvmpdpol_balancequeue(swap_shortage);
/*
* swap out some processes if we are still below the minimum
* free target. we need to unlock the page queues for this.
*/
if (uvmexp.free < uvmexp.freemin && uvmexp.nswapdev != 0 &&
uvm.swapout_enabled) {
uvmexp.pdswout++;
UVMHIST_LOG(pdhist," free %d < min %d: swapout",
uvmexp.free, uvmexp.freemin, 0, 0);
mutex_exit(&uvm_pageqlock);
uvm_swapout_threads();
mutex_enter(&uvm_pageqlock);
}
/*
* if still below the minimum target, try unloading kernel
* modules.
*/
if (uvmexp.free < uvmexp.freemin) {
module_thread_kick();
}
}
/*
* uvm_reclaimable: decide whether to wait for pagedaemon.
*
2007-02-22 09:05:00 +03:00
* => return true if it seems to be worth to do uvm_wait.
*
* XXX should be tunable.
* XXX should consider pools, etc?
*/
bool
uvm_reclaimable(void)
{
int filepages;
int active, inactive;
/*
* if swap is not full, no problem.
*/
if (!uvm_swapisfull()) {
2007-02-22 09:05:00 +03:00
return true;
}
/*
* file-backed pages can be reclaimed even when swap is full.
* if we have more than 1/16 of pageable memory or 5MB, try to reclaim.
*
* XXX assume the worst case, ie. all wired pages are file-backed.
2005-05-05 03:23:28 +04:00
*
* XXX should consider about other reclaimable memory.
* XXX ie. pools, traditional buffer cache.
*/
filepages = uvmexp.filepages + uvmexp.execpages - uvmexp.wired;
uvm_estimatepageable(&active, &inactive);
if (filepages >= MIN((active + inactive) >> 4,
5 * 1024 * 1024 >> PAGE_SHIFT)) {
2007-02-22 09:05:00 +03:00
return true;
}
/*
* kill the process, fail allocation, etc..
*/
2007-02-22 09:05:00 +03:00
return false;
}
void
uvm_estimatepageable(int *active, int *inactive)
{
uvmpdpol_estimatepageable(active, inactive);
}