NetBSD/sys/vm/vm_pageout.c
ross c297f000b7 NFS swap fix. Most files here are modified purely to add a tsleep() wmesg
string. The actual functional changes are:

	1. in vm_page_alloc(), restore some long lost code by Paul Kranenburg
	that reserves the last few pages for kernel objects, adding an
	additional escape for the pageout daemon.

	2. In vm_wait() (replaces VM_WAIT), recognize the amusing fatal
	deadlock where the pagedemon sleeps on a channel that only it ever
	wakes up, and add a timeout and printf. Mod 1 should generally
	prevent this from happening in any case.

	3. Fix a livelock in vm_pageout_page() caused by a pre-wakeup of
	page consumers prior to the pageout, which can easily fail over
	memory issues in NFS and IP code. Also, ++ cnt.v_pageouts only
	if the pageout succeeded.
1998-01-31 04:02:39 +00:00

608 lines
16 KiB
C

/* $NetBSD: vm_pageout.c,v 1.28 1998/01/31 04:02:45 ross Exp $ */
/*
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* The Mach Operating System project at Carnegie-Mellon University.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94
*
*
* Copyright (c) 1987, 1990 Carnegie-Mellon University.
* All rights reserved.
*
* Authors: Avadis Tevanian, Jr., Michael Wayne Young
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* The proverbial page-out daemon.
*/
#include <sys/param.h>
#include <sys/proc.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <vm/vm.h>
#include <vm/vm_page.h>
#include <vm/vm_pageout.h>
#ifndef VM_PAGE_FREE_MIN
#define VM_PAGE_FREE_MIN (cnt.v_free_count / 20)
#endif
#ifndef VM_PAGE_FREE_TARGET
#define VM_PAGE_FREE_TARGET ((cnt.v_free_min * 4) / 3)
#endif
int vm_page_free_min_min = 16 * 1024;
int vm_page_free_min_max = 256 * 1024;
int vm_pages_needed; /* Event on which pageout daemon sleeps */
int vm_page_max_wired = 0; /* XXX max # of wired pages system-wide */
#ifdef CLUSTERED_PAGEOUT
#define MAXPOCLUSTER (MAXPHYS/NBPG) /* XXX */
int doclustered_pageout = 1;
#endif
/*
* Activate the pageout daemon and sleep awaiting more free memory
*/
void vm_wait(msg)
char *msg;
{
int timo = 0;
if(curproc == pageout_daemon) {
/*
* We might be toast here, but IF some paging operations
* are pending then pages will magically appear. We
* usually can't return an error because callers of
* malloc who can wait generally don't check for
* failure.
*
* Only the pageout_daemon wakes up this channel!
*/
printf("pageout daemon has stalled\n");
timo = hz >> 3;
}
simple_lock(&vm_pages_needed_lock);
thread_wakeup(&vm_pages_needed);
thread_sleep_msg(&cnt.v_free_count, &vm_pages_needed_lock, FALSE, msg,
timo);
}
/*
* vm_pageout_scan does the dirty work for the pageout daemon.
*/
void
vm_pageout_scan()
{
register vm_page_t m, next;
register int page_shortage;
register int s;
register int pages_freed;
int free;
vm_object_t object;
/*
* Only continue when we want more pages to be "free"
*/
cnt.v_rev++;
s = splimp();
simple_lock(&vm_page_queue_free_lock);
free = cnt.v_free_count;
simple_unlock(&vm_page_queue_free_lock);
splx(s);
#ifndef __SWAP_BROKEN /* XXX */
if (free < cnt.v_free_target) {
swapout_threads();
/*
* Be sure the pmap system is updated so
* we can scan the inactive queue.
*/
pmap_update();
}
#endif /* XXX */
/*
* Acquire the resident page system lock,
* as we may be changing what's resident quite a bit.
*/
vm_page_lock_queues();
/*
* Start scanning the inactive queue for pages we can free.
* We keep scanning until we have enough free pages or
* we have scanned through the entire queue. If we
* encounter dirty pages, we start cleaning them.
*/
pages_freed = 0;
for (m = vm_page_queue_inactive.tqh_first; m != NULL; m = next) {
s = splimp();
simple_lock(&vm_page_queue_free_lock);
free = cnt.v_free_count;
simple_unlock(&vm_page_queue_free_lock);
splx(s);
if (free >= cnt.v_free_target)
break;
cnt.v_scan++;
next = m->pageq.tqe_next;
/*
* If the page has been referenced, move it back to the
* active queue.
*/
if (pmap_is_referenced(VM_PAGE_TO_PHYS(m))) {
vm_page_activate(m);
cnt.v_reactivated++;
continue;
}
/*
* If the page is clean, free it up.
*/
if (m->flags & PG_CLEAN) {
object = m->object;
if (vm_object_lock_try(object)) {
pmap_page_protect(VM_PAGE_TO_PHYS(m),
VM_PROT_NONE);
vm_page_free(m);
pages_freed++;
cnt.v_dfree++;
vm_object_unlock(object);
}
continue;
}
/*
* If the page is dirty but already being washed, skip it.
*/
if ((m->flags & PG_LAUNDRY) == 0)
continue;
/*
* Otherwise the page is dirty and still in the laundry,
* so we start the cleaning operation and remove it from
* the laundry.
*/
object = m->object;
if (!vm_object_lock_try(object))
continue;
#ifdef CLUSTERED_PAGEOUT
if (object->pager &&
vm_pager_cancluster(object->pager, PG_CLUSTERPUT))
vm_pageout_cluster(m, object);
else
#endif
vm_pageout_page(m, object);
thread_wakeup(object);
vm_object_unlock(object);
/*
* Former next page may no longer even be on the inactive
* queue (due to potential blocking in the pager with the
* queues unlocked). If it isn't, we just start over.
*/
if (next && (next->flags & PG_INACTIVE) == 0)
next = vm_page_queue_inactive.tqh_first;
}
/*
* Compute the page shortage. If we are still very low on memory
* be sure that we will move a minimal amount of pages from active
* to inactive.
*/
page_shortage = cnt.v_inactive_target - cnt.v_inactive_count;
if (page_shortage <= 0 && pages_freed == 0)
page_shortage = 1;
while (page_shortage > 0) {
/*
* Move some more pages from active to inactive.
*/
if ((m = vm_page_queue_active.tqh_first) == NULL)
break;
vm_page_deactivate(m);
page_shortage--;
}
vm_page_unlock_queues();
}
/*
* Called with object and page queues locked.
* If reactivate is TRUE, a pager error causes the page to be
* put back on the active queue, ow it is left on the inactive queue.
*/
void
vm_pageout_page(m, object)
vm_page_t m;
vm_object_t object;
{
vm_pager_t pager;
int pageout_status;
/*
* We set the busy bit to cause potential page faults on
* this page to block.
*
* We also set pageout-in-progress to keep the object from
* disappearing during pageout. This guarantees that the
* page won't move from the inactive queue. (However, any
* other page on the inactive queue may move!)
*/
pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE);
m->flags |= PG_BUSY;
/*
* Try to collapse the object before making a pager for it.
* We must unlock the page queues first.
*/
vm_page_unlock_queues();
if (object->pager == NULL)
vm_object_collapse(object);
vm_object_paging_begin(object);
vm_object_unlock(object);
/*
* We _used_ to wakeup page consumers here, "in case the following
* operations block". That leads to livelock if the pageout fails,
* which is actually quite a common thing for NFS paging.
*/
/*
* If there is no pager for the page, use the default pager.
* If there is no place to put the page at the moment,
* leave it in the laundry and hope that there will be
* paging space later.
*/
if ((pager = object->pager) == NULL) {
pager = vm_pager_allocate(PG_DFLT, (caddr_t)0, object->size,
VM_PROT_ALL, (vm_offset_t)0);
if (pager != NULL)
vm_object_setpager(object, pager, 0, FALSE);
}
pageout_status = pager ? vm_pager_put(pager, m, FALSE) : VM_PAGER_FAIL;
vm_object_lock(object);
vm_page_lock_queues();
switch (pageout_status) {
case VM_PAGER_OK:
case VM_PAGER_PEND:
/* hmm, don't wakeup if memory is _very_ low? */
thread_wakeup(&cnt.v_free_count);
cnt.v_pageouts++;
cnt.v_pgpgout++;
m->flags &= ~PG_LAUNDRY;
break;
case VM_PAGER_BAD:
/*
* Page outside of range of object. Right now we
* essentially lose the changes by pretending it
* worked.
*
* XXX dubious, what should we do?
*/
m->flags &= ~PG_LAUNDRY;
m->flags |= PG_CLEAN;
pmap_clear_modify(VM_PAGE_TO_PHYS(m));
break;
case VM_PAGER_AGAIN:
{
/*
* FAIL on a write is interpreted to mean a resource
* shortage, so we put pause for awhile and try again.
* XXX could get stuck here.
*/
vm_page_unlock_queues();
vm_object_unlock(object);
(void) tsleep((caddr_t)&vm_pages_needed, PZERO|PCATCH,
"pageout", hz>>3);
vm_object_lock(object);
vm_page_lock_queues();
break;
}
case VM_PAGER_FAIL:
case VM_PAGER_ERROR:
/*
* If page couldn't be paged out, then reactivate
* the page so it doesn't clog the inactive list.
* (We will try paging out it again later).
*/
vm_page_activate(m);
cnt.v_reactivated++;
break;
}
pmap_clear_reference(VM_PAGE_TO_PHYS(m));
/*
* If the operation is still going, leave the page busy
* to block all other accesses. Also, leave the paging
* in progress indicator set so that we don't attempt an
* object collapse.
*/
if (pageout_status != VM_PAGER_PEND) {
m->flags &= ~PG_BUSY;
PAGE_WAKEUP(m);
vm_object_paging_end(object);
}
}
#ifdef CLUSTERED_PAGEOUT
#define PAGEOUTABLE(p) \
((((p)->flags & (PG_INACTIVE|PG_CLEAN|PG_LAUNDRY)) == \
(PG_INACTIVE|PG_LAUNDRY)) && !pmap_is_referenced(VM_PAGE_TO_PHYS(p)))
/*
* Attempt to pageout as many contiguous (to ``m'') dirty pages as possible
* from ``object''. Using information returned from the pager, we assemble
* a sorted list of contiguous dirty pages and feed them to the pager in one
* chunk. Called with paging queues and object locked. Also, object must
* already have a pager.
*/
void
vm_pageout_cluster(m, object)
vm_page_t m;
vm_object_t object;
{
vm_offset_t offset, loff, hoff;
vm_page_t plist[MAXPOCLUSTER], *plistp, p;
int postatus, ix, count;
cnt.v_pageouts++;
/*
* Determine the range of pages that can be part of a cluster
* for this object/offset. If it is only our single page, just
* do it normally.
*/
vm_pager_cluster(object->pager, m->offset, &loff, &hoff);
if (hoff - loff == PAGE_SIZE) {
vm_pageout_page(m, object);
return;
}
plistp = plist;
/*
* Target page is always part of the cluster.
*/
pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE);
m->flags |= PG_BUSY;
plistp[atop(m->offset - loff)] = m;
count = 1;
/*
* Backup from the given page til we find one not fulfilling
* the pageout criteria or we hit the lower bound for the
* cluster. For each page determined to be part of the
* cluster, unmap it and busy it out so it won't change.
*/
ix = atop(m->offset - loff);
offset = m->offset;
while (offset > loff && count < MAXPOCLUSTER-1) {
p = vm_page_lookup(object, offset - PAGE_SIZE);
if (p == NULL || !PAGEOUTABLE(p))
break;
pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE);
p->flags |= PG_BUSY;
plistp[--ix] = p;
offset -= PAGE_SIZE;
count++;
}
plistp += atop(offset - loff);
loff = offset;
/*
* Now do the same moving forward from the target.
*/
ix = atop(m->offset - loff) + 1;
offset = m->offset + PAGE_SIZE;
while (offset < hoff && count < MAXPOCLUSTER) {
p = vm_page_lookup(object, offset);
if (p == NULL || !PAGEOUTABLE(p))
break;
pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE);
p->flags |= PG_BUSY;
plistp[ix++] = p;
offset += PAGE_SIZE;
count++;
}
hoff = offset;
/*
* Pageout the page.
* Unlock everything and do a wakeup prior to the pager call
* in case it blocks.
*/
vm_page_unlock_queues();
vm_object_paging_begin(object);
vm_object_unlock(object);
again:
thread_wakeup(&cnt.v_free_count);
postatus = vm_pager_put_pages(object->pager, plistp, count, FALSE);
/*
* XXX rethink this
*/
if (postatus == VM_PAGER_AGAIN) {
(void) tsleep((caddr_t)&vm_pages_needed, PZERO|PCATCH,
"pageout", hz);
goto again;
} else if (postatus == VM_PAGER_BAD)
panic("vm_pageout_cluster: VM_PAGER_BAD");
vm_object_lock(object);
vm_page_lock_queues();
/*
* Loop through the affected pages, reflecting the outcome of
* the operation.
*/
for (ix = 0; ix < count; ix++) {
p = *plistp++;
switch (postatus) {
case VM_PAGER_OK:
case VM_PAGER_PEND:
cnt.v_pgpgout++;
p->flags &= ~PG_LAUNDRY;
break;
case VM_PAGER_FAIL:
case VM_PAGER_ERROR:
/*
* Pageout failed, reactivate the target page so it
* doesn't clog the inactive list. Other pages are
* left as they are.
*/
if (p == m) {
vm_page_activate(p);
cnt.v_reactivated++;
}
break;
}
pmap_clear_reference(VM_PAGE_TO_PHYS(p));
/*
* If the operation is still going, leave the page busy
* to block all other accesses.
*/
if (postatus != VM_PAGER_PEND) {
p->flags &= ~PG_BUSY;
PAGE_WAKEUP(p);
}
}
/*
* If the operation is still going, leave the paging in progress
* indicator set so that we don't attempt an object collapse.
*/
if (postatus != VM_PAGER_PEND)
vm_object_paging_end(object);
}
#endif
/*
* vm_pageout is the high level pageout daemon.
*/
void
vm_pageout()
{
pageout_daemon = curproc;
(void) spl0();
/*
* Initialize some paging parameters.
*/
if (cnt.v_free_min == 0) {
cnt.v_free_min = VM_PAGE_FREE_MIN;
vm_page_free_min_min /= cnt.v_page_size;
vm_page_free_min_max /= cnt.v_page_size;
if (cnt.v_free_min < vm_page_free_min_min)
cnt.v_free_min = vm_page_free_min_min;
if (cnt.v_free_min > vm_page_free_min_max)
cnt.v_free_min = vm_page_free_min_max;
}
if (cnt.v_free_target == 0)
cnt.v_free_target = VM_PAGE_FREE_TARGET;
if (cnt.v_free_target <= cnt.v_free_min)
cnt.v_free_target = cnt.v_free_min + 1;
/* XXX does not really belong here */
if (vm_page_max_wired == 0)
vm_page_max_wired = cnt.v_free_count / 3;
/*
* The pageout daemon is never done, so loop
* forever.
*/
simple_lock(&vm_pages_needed_lock);
while (TRUE) {
thread_sleep_msg(&vm_pages_needed, &vm_pages_needed_lock,
FALSE, "paged", 0);
/*
* Compute the inactive target for this scan.
* We need to keep a reasonable amount of memory in the
* inactive list to better simulate LRU behavior.
*/
cnt.v_inactive_target =
(cnt.v_active_count + cnt.v_inactive_count) / 3;
if (cnt.v_inactive_target <= cnt.v_free_target)
cnt.v_inactive_target = cnt.v_free_target + 1;
/*
* Only make a scan if we are likely to do something.
* Otherwise we might have been awakened by a pager
* to clean up async pageouts.
*/
if (cnt.v_free_count < cnt.v_free_target ||
cnt.v_inactive_count < cnt.v_inactive_target)
vm_pageout_scan();
vm_pager_sync();
simple_lock(&vm_pages_needed_lock);
thread_wakeup(&cnt.v_free_count);
}
}