NetBSD/sys/vm/swap_pager.c

1280 lines
32 KiB
C

/* $NetBSD: swap_pager.c,v 1.30 1997/01/03 18:03:17 mrg Exp $ */
/*
* Copyright (c) 1990 University of Utah.
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* the Systems Programming Group of the University of Utah Computer
* Science Department.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
*
* @(#)swap_pager.c 8.9 (Berkeley) 3/21/94
*/
/*
* Quick hack to page to dedicated partition(s).
* TODO:
* Add multiprocessor locks
* Deal with async writes in a better fashion
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/buf.h>
#include <sys/map.h>
#include <sys/vnode.h>
#include <sys/malloc.h>
#include <miscfs/specfs/specdev.h>
#include <vm/vm.h>
#include <vm/vm_page.h>
#include <vm/vm_pageout.h>
#include <vm/swap_pager.h>
#define NSWSIZES 16 /* size of swtab */
#define MAXDADDRS 64 /* max # of disk addrs for fixed allocations */
#ifndef NPENDINGIO
#define NPENDINGIO 64 /* max # of pending cleans */
#endif
#ifdef DEBUG
int swpagerdebug = 0x100;
#define SDB_FOLLOW 0x001
#define SDB_INIT 0x002
#define SDB_ALLOC 0x004
#define SDB_IO 0x008
#define SDB_WRITE 0x010
#define SDB_FAIL 0x020
#define SDB_ALLOCBLK 0x040
#define SDB_FULL 0x080
#define SDB_ANOM 0x100
#define SDB_ANOMPANIC 0x200
#define SDB_CLUSTER 0x400
#define SDB_PARANOIA 0x800
#endif
TAILQ_HEAD(swpclean, swpagerclean);
struct swpagerclean {
TAILQ_ENTRY(swpagerclean) spc_list;
int spc_flags;
struct buf *spc_bp;
sw_pager_t spc_swp;
vm_offset_t spc_kva;
vm_page_t spc_m;
int spc_npages;
} swcleanlist[NPENDINGIO];
typedef struct swpagerclean *swp_clean_t;
/* spc_flags values */
#define SPC_FREE 0x00
#define SPC_BUSY 0x01
#define SPC_DONE 0x02
#define SPC_ERROR 0x04
struct swtab {
vm_size_t st_osize; /* size of object (bytes) */
int st_bsize; /* vs. size of swap block (DEV_BSIZE units) */
#ifdef DEBUG
u_long st_inuse; /* number in this range in use */
u_long st_usecnt; /* total used of this size */
#endif
} swtab[NSWSIZES+1];
#ifdef DEBUG
int swap_pager_poip; /* pageouts in progress */
int swap_pager_piip; /* pageins in progress */
#endif
int swap_pager_maxcluster; /* maximum cluster size */
int swap_pager_npendingio; /* number of pager clean structs */
struct swpclean swap_pager_inuse; /* list of pending page cleans */
struct swpclean swap_pager_free; /* list of free pager clean structs */
struct pagerlst swap_pager_list; /* list of "named" anon regions */
static void swap_pager_init __P((void));
static vm_pager_t swap_pager_alloc
__P((caddr_t, vm_size_t, vm_prot_t, vm_offset_t));
static void swap_pager_clean __P((int));
#ifdef DEBUG
static void swap_pager_clean_check __P((vm_page_t *, int, int));
#endif
static void swap_pager_cluster
__P((vm_pager_t, vm_offset_t,
vm_offset_t *, vm_offset_t *));
static void swap_pager_dealloc __P((vm_pager_t));
static int swap_pager_remove
__P((vm_pager_t, vm_offset_t, vm_offset_t));
static vm_offset_t swap_pager_next __P((vm_pager_t, vm_offset_t));
static int swap_pager_count __P((vm_pager_t));
static int swap_pager_getpage
__P((vm_pager_t, vm_page_t *, int, boolean_t));
static boolean_t swap_pager_haspage __P((vm_pager_t, vm_offset_t));
static int swap_pager_io __P((sw_pager_t, vm_page_t *, int, int));
static void swap_pager_iodone __P((struct buf *));
static int swap_pager_putpage
__P((vm_pager_t, vm_page_t *, int, boolean_t));
static int count_bits __P((u_int));
struct pagerops swappagerops = {
swap_pager_init,
swap_pager_alloc,
swap_pager_dealloc,
swap_pager_getpage,
swap_pager_putpage,
swap_pager_haspage,
swap_pager_cluster,
swap_pager_remove,
swap_pager_next,
swap_pager_count
};
static void
swap_pager_init()
{
register swp_clean_t spc;
register int i, bsize;
extern int dmmin, dmmax;
int maxbsize;
#ifdef DEBUG
if (swpagerdebug & (SDB_FOLLOW|SDB_INIT))
printf("swpg_init()\n");
#endif
dfltpagerops = &swappagerops;
TAILQ_INIT(&swap_pager_list);
/*
* Allocate async IO structures.
*
* XXX it would be nice if we could do this dynamically based on
* the value of nswbuf (since we are ultimately limited by that)
* but neither nswbuf or malloc has been initialized yet. So the
* structs are statically allocated above.
*/
swap_pager_npendingio = NPENDINGIO;
/*
* Initialize clean lists
*/
TAILQ_INIT(&swap_pager_inuse);
TAILQ_INIT(&swap_pager_free);
for (i = 0, spc = swcleanlist; i < swap_pager_npendingio; i++, spc++) {
TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
spc->spc_flags = SPC_FREE;
}
/*
* Calculate the swap allocation constants.
*/
if (dmmin == 0) {
dmmin = DMMIN;
if (dmmin < CLBYTES/DEV_BSIZE)
dmmin = CLBYTES/DEV_BSIZE;
}
if (dmmax == 0)
dmmax = DMMAX;
/*
* Fill in our table of object size vs. allocation size
*/
bsize = btodb(PAGE_SIZE);
if (bsize < dmmin)
bsize = dmmin;
maxbsize = btodb(sizeof(sw_bm_t) * NBBY * PAGE_SIZE);
if (maxbsize > dmmax)
maxbsize = dmmax;
for (i = 0; i < NSWSIZES; i++) {
swtab[i].st_osize = (vm_size_t) (MAXDADDRS * dbtob(bsize));
swtab[i].st_bsize = bsize;
if (bsize <= btodb(MAXPHYS))
swap_pager_maxcluster = dbtob(bsize);
#ifdef DEBUG
if (swpagerdebug & SDB_INIT)
printf("swpg_init: ix %d, size %lx, bsize %x\n",
i, swtab[i].st_osize, swtab[i].st_bsize);
#endif
if (bsize >= maxbsize)
break;
bsize *= 2;
}
swtab[i].st_osize = 0;
swtab[i].st_bsize = bsize;
}
/*
* Allocate a pager structure and associated resources.
* Note that if we are called from the pageout daemon (handle == NULL)
* we should not wait for memory as it could resulting in deadlock.
*/
static vm_pager_t
swap_pager_alloc(handle, size, prot, foff)
caddr_t handle;
register vm_size_t size;
vm_prot_t prot;
vm_offset_t foff;
{
register vm_pager_t pager;
register sw_pager_t swp;
struct swtab *swt;
int waitok;
#ifdef DEBUG
if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC))
printf("swpg_alloc(%p, %lx, %x)\n", handle, size, prot);
#endif
/*
* If this is a "named" anonymous region, look it up and
* return the appropriate pager if it exists.
*/
if (handle) {
pager = vm_pager_lookup(&swap_pager_list, handle);
if (pager != NULL) {
/*
* Use vm_object_lookup to gain a reference
* to the object and also to remove from the
* object cache.
*/
if (vm_object_lookup(pager) == NULL)
panic("swap_pager_alloc: bad object");
return(pager);
}
}
/*
* Pager doesn't exist, allocate swap management resources
* and initialize.
*/
waitok = handle ? M_WAITOK : M_NOWAIT;
pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok);
if (pager == NULL)
return(NULL);
swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok);
if (swp == NULL) {
#ifdef DEBUG
if (swpagerdebug & SDB_FAIL)
printf("swpg_alloc: swpager malloc failed\n");
#endif
free((caddr_t)pager, M_VMPAGER);
return(NULL);
}
size = round_page(size);
for (swt = swtab; swt->st_osize; swt++)
if (size <= swt->st_osize)
break;
#ifdef DEBUG
swt->st_inuse++;
swt->st_usecnt++;
#endif
swp->sw_osize = size;
swp->sw_bsize = swt->st_bsize;
swp->sw_nblocks = (btodb(size) + swp->sw_bsize - 1) / swp->sw_bsize;
swp->sw_blocks = (sw_blk_t)
malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks),
M_VMPGDATA, M_NOWAIT);
if (swp->sw_blocks == NULL) {
free((caddr_t)swp, M_VMPGDATA);
free((caddr_t)pager, M_VMPAGER);
#ifdef DEBUG
if (swpagerdebug & SDB_FAIL)
printf("swpg_alloc: sw_blocks malloc failed\n");
swt->st_inuse--;
swt->st_usecnt--;
#endif
return(FALSE);
}
bzero((caddr_t)swp->sw_blocks,
swp->sw_nblocks * sizeof(*swp->sw_blocks));
swp->sw_poip = swp->sw_cnt = 0;
if (handle) {
vm_object_t object;
swp->sw_flags = SW_NAMED;
TAILQ_INSERT_TAIL(&swap_pager_list, pager, pg_list);
/*
* Consistant with other pagers: return with object
* referenced. Can't do this with handle == NULL
* since it might be the pageout daemon calling.
*/
object = vm_object_allocate(size);
vm_object_enter(object, pager);
vm_object_setpager(object, pager, 0, FALSE);
} else {
swp->sw_flags = 0;
pager->pg_list.tqe_next = NULL;
pager->pg_list.tqe_prev = NULL;
}
pager->pg_handle = handle;
pager->pg_ops = &swappagerops;
pager->pg_type = PG_SWAP;
pager->pg_flags = PG_CLUSTERPUT;
pager->pg_data = swp;
#ifdef DEBUG
if (swpagerdebug & SDB_ALLOC)
printf("swpg_alloc: pg_data %p, %x of %x at %p\n",
swp, swp->sw_nblocks, swp->sw_bsize, swp->sw_blocks);
#endif
return(pager);
}
static void
swap_pager_dealloc(pager)
vm_pager_t pager;
{
register int i;
register sw_blk_t bp;
register sw_pager_t swp;
int s;
#ifdef DEBUG
struct swtab *swt;
/* save panic time state */
if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
return;
if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC))
printf("swpg_dealloc(%p)\n", pager);
#endif
/*
* Remove from list right away so lookups will fail if we
* block for pageout completion.
*/
swp = (sw_pager_t) pager->pg_data;
if (swp->sw_flags & SW_NAMED) {
TAILQ_REMOVE(&swap_pager_list, pager, pg_list);
swp->sw_flags &= ~SW_NAMED;
}
#ifdef DEBUG
for (swt = swtab; swt->st_osize; swt++)
if (swp->sw_osize <= swt->st_osize)
break;
swt->st_inuse--;
#endif
/*
* Wait for all pageouts to finish and remove
* all entries from cleaning list.
*/
s = splbio();
while (swp->sw_poip) {
swp->sw_flags |= SW_WANTED;
(void) tsleep(swp, PVM, "swpgdealloc", 0);
}
splx(s);
swap_pager_clean(B_WRITE);
/*
* Free left over swap blocks
*/
for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++)
if (bp->swb_block) {
#ifdef DEBUG
if (swpagerdebug & (SDB_ALLOCBLK|SDB_FULL))
printf("swpg_dealloc: blk %x\n",
bp->swb_block);
#endif
rmfree(swapmap, swp->sw_bsize, bp->swb_block);
}
/*
* Free swap management resources
*/
free((caddr_t)swp->sw_blocks, M_VMPGDATA);
free((caddr_t)swp, M_VMPGDATA);
free((caddr_t)pager, M_VMPAGER);
}
static int
swap_pager_getpage(pager, mlist, npages, sync)
vm_pager_t pager;
vm_page_t *mlist;
int npages;
boolean_t sync;
{
register int rv;
#ifdef DIAGNOSTIC
vm_page_t m;
int i;
#endif
#ifdef DEBUG
if (swpagerdebug & SDB_FOLLOW)
printf("swpg_getpage(%p, %p, %x, %x)\n",
pager, mlist, npages, sync);
#endif
#ifdef DIAGNOSTIC
for (i = 0; i < npages; i++) {
m = mlist[i];
if (m->flags & PG_FAULTING)
panic("swap_pager_getpage: page is already faulting");
m->flags |= PG_FAULTING;
}
#endif
rv = swap_pager_io((sw_pager_t)pager->pg_data, mlist, npages, B_READ);
#ifdef DIAGNOSTIC
for (i = 0; i < npages; i++) {
m = mlist[i];
m->flags &= ~PG_FAULTING;
}
#endif
return(rv);
}
static int
swap_pager_putpage(pager, mlist, npages, sync)
vm_pager_t pager;
vm_page_t *mlist;
int npages;
boolean_t sync;
{
int flags;
#ifdef DEBUG
if (swpagerdebug & SDB_FOLLOW)
printf("swpg_putpage(%p, %p, %x, %x)\n",
pager, mlist, npages, sync);
#endif
if (pager == NULL) {
swap_pager_clean(B_WRITE);
return (VM_PAGER_OK); /* ??? */
}
flags = B_WRITE;
if (!sync)
flags |= B_ASYNC;
return(swap_pager_io((sw_pager_t)pager->pg_data,
mlist, npages, flags));
}
static boolean_t
swap_pager_haspage(pager, offset)
vm_pager_t pager;
vm_offset_t offset;
{
register sw_pager_t swp;
register sw_blk_t swb;
int ix;
#ifdef DEBUG
if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK))
printf("swpg_haspage(%p, %lx) ", pager, offset);
#endif
swp = (sw_pager_t) pager->pg_data;
ix = offset / dbtob(swp->sw_bsize);
if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
#ifdef DEBUG
if (swpagerdebug & (SDB_FAIL|SDB_FOLLOW|SDB_ALLOCBLK))
printf("swpg_haspage: %p bad offset %lx, ix %x\n",
swp->sw_blocks, offset, ix);
#endif
return(FALSE);
}
swb = &swp->sw_blocks[ix];
if (swb->swb_block)
ix = atop(offset % dbtob(swp->sw_bsize));
#ifdef DEBUG
if (swpagerdebug & SDB_ALLOCBLK)
printf("%p blk %x+%x ", swp->sw_blocks, swb->swb_block, ix);
if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK))
printf("-> %c\n",
"FT"[swb->swb_block && (swb->swb_mask & (1 << ix))]);
#endif
if (swb->swb_block && (swb->swb_mask & (1 << ix)))
return(TRUE);
return(FALSE);
}
static void
swap_pager_cluster(pager, offset, loffset, hoffset)
vm_pager_t pager;
vm_offset_t offset;
vm_offset_t *loffset;
vm_offset_t *hoffset;
{
sw_pager_t swp;
register int bsize;
vm_offset_t loff, hoff;
#ifdef DEBUG
if (swpagerdebug & (SDB_FOLLOW|SDB_CLUSTER))
printf("swpg_cluster(%p, %lx) ", pager, offset);
#endif
swp = (sw_pager_t) pager->pg_data;
bsize = dbtob(swp->sw_bsize);
if (bsize > swap_pager_maxcluster)
bsize = swap_pager_maxcluster;
loff = offset - (offset % bsize);
if (loff >= swp->sw_osize)
panic("swap_pager_cluster: bad offset");
hoff = loff + bsize;
if (hoff > swp->sw_osize)
hoff = swp->sw_osize;
*loffset = loff;
*hoffset = hoff;
#ifdef DEBUG
if (swpagerdebug & (SDB_FOLLOW|SDB_CLUSTER))
printf("returns [%lx-%lx]\n", loff, hoff);
#endif
}
/*
* Scaled down version of swap().
* Assumes that PAGE_SIZE < MAXPHYS; i.e. only one operation needed.
* BOGUS: lower level IO routines expect a KVA so we have to map our
* provided physical page into the KVA to keep them happy.
*/
static int
swap_pager_io(swp, mlist, npages, flags)
register sw_pager_t swp;
vm_page_t *mlist;
int npages;
int flags;
{
register struct buf *bp;
register sw_blk_t swb;
register int s;
int ix;
u_int mask;
boolean_t rv;
vm_offset_t kva, off;
swp_clean_t spc;
vm_page_t m;
#ifdef DEBUG
/* save panic time state */
if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
return (VM_PAGER_FAIL); /* XXX: correct return? */
if (swpagerdebug & (SDB_FOLLOW|SDB_IO))
printf("swpg_io(%p, %p, %x, %x)\n", swp, mlist, npages, flags);
if (flags & B_READ) {
if (flags & B_ASYNC)
panic("swap_pager_io: cannot do ASYNC reads");
if (npages != 1)
panic("swap_pager_io: cannot do clustered reads");
}
#endif
/*
* First determine if the page exists in the pager if this is
* a sync read. This quickly handles cases where we are
* following shadow chains looking for the top level object
* with the page.
*/
m = *mlist;
off = m->offset + m->object->paging_offset;
ix = off / dbtob(swp->sw_bsize);
if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
#ifdef DEBUG
if ((flags & B_READ) == 0 && (swpagerdebug & SDB_ANOM)) {
printf("swap_pager_io: no swap block on write\n");
return(VM_PAGER_BAD);
}
#endif
return(VM_PAGER_FAIL);
}
swb = &swp->sw_blocks[ix];
off = off % dbtob(swp->sw_bsize);
if ((flags & B_READ) &&
(swb->swb_block == 0 || (swb->swb_mask & (1 << atop(off))) == 0))
return(VM_PAGER_FAIL);
/*
* For reads (pageins) and synchronous writes, we clean up
* all completed async pageouts.
*/
if ((flags & B_ASYNC) == 0) {
s = splbio();
swap_pager_clean(flags&B_READ);
#ifdef DEBUG
if (swpagerdebug & SDB_PARANOIA)
swap_pager_clean_check(mlist, npages, flags&B_READ);
#endif
splx(s);
}
/*
* For async writes (pageouts), we cleanup completed pageouts so
* that all available resources are freed. Also tells us if this
* page is already being cleaned. If it is, or no resources
* are available, we try again later.
*/
else {
swap_pager_clean(B_WRITE);
#ifdef DEBUG
if (swpagerdebug & SDB_PARANOIA)
swap_pager_clean_check(mlist, npages, B_WRITE);
#endif
if (swap_pager_free.tqh_first == NULL) {
#ifdef DEBUG
if (swpagerdebug & SDB_FAIL)
printf("%s: no available io headers\n",
"swap_pager_io");
#endif
return(VM_PAGER_AGAIN);
}
}
/*
* Allocate a swap block if necessary.
*/
if (swb->swb_block == 0) {
swb->swb_block = rmalloc(swapmap, swp->sw_bsize);
if (swb->swb_block == 0) {
#ifdef DEBUG
if (swpagerdebug & SDB_FAIL)
printf("swpg_io: rmalloc of %x failed\n",
swp->sw_bsize);
#endif
/*
* XXX this is technically a resource shortage that
* should return AGAIN, but the situation isn't likely
* to be remedied just by delaying a little while and
* trying again (the pageout daemon's current response
* to AGAIN) so we just return FAIL.
*/
return(VM_PAGER_FAIL);
}
#ifdef DEBUG
if (swpagerdebug & (SDB_FULL|SDB_ALLOCBLK))
printf("swpg_io: %p alloc blk %x at ix %x\n",
swp->sw_blocks, swb->swb_block, ix);
#endif
}
/*
* Allocate a kernel virtual address and initialize so that PTE
* is available for lower level IO drivers.
*/
kva = vm_pager_map_pages(mlist, npages, !(flags & B_ASYNC));
if (kva == NULL) {
#ifdef DEBUG
if (swpagerdebug & SDB_FAIL)
printf("%s: no KVA space to map pages\n",
"swap_pager_io");
#endif
return(VM_PAGER_AGAIN);
}
/*
* Get a swap buffer header and initialize it.
*/
s = splbio();
while (bswlist.b_actf == NULL) {
#ifdef DEBUG
if (swpagerdebug & SDB_IO) /* XXX what should this be? */
printf("swap_pager_io: wait on swbuf for %p (%d)\n",
m, flags);
#endif
bswlist.b_flags |= B_WANTED;
tsleep((caddr_t)&bswlist, PSWP+1, "swpgiobuf", 0);
}
bp = bswlist.b_actf;
bswlist.b_actf = bp->b_actf;
splx(s);
bp->b_flags = B_BUSY | (flags & B_READ);
bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */
bp->b_data = (caddr_t)kva;
bp->b_blkno = swb->swb_block + btodb(off);
VHOLD(swapdev_vp);
bp->b_vp = swapdev_vp;
if (swapdev_vp->v_type == VBLK)
bp->b_dev = swapdev_vp->v_rdev;
bp->b_bcount = npages * PAGE_SIZE;
/*
* For writes we set up additional buffer fields, record a pageout
* in progress and mark that these swap blocks are now allocated.
*/
if ((bp->b_flags & B_READ) == 0) {
bp->b_dirtyoff = 0;
bp->b_dirtyend = npages * PAGE_SIZE;
swapdev_vp->v_numoutput++;
s = splbio();
swp->sw_poip++;
splx(s);
mask = (~(~0 << npages)) << atop(off);
#ifdef DEBUG
swap_pager_poip++;
if (swpagerdebug & SDB_WRITE)
printf("swpg_io: write: bp=%p swp=%p poip=%d\n",
bp, swp, swp->sw_poip);
if ((swpagerdebug & SDB_ALLOCBLK) &&
(swb->swb_mask & mask) != mask)
printf("swpg_io: %p write %d pages at %x+%lx\n",
swp->sw_blocks, npages, swb->swb_block,
atop(off));
if (swpagerdebug & SDB_CLUSTER)
printf("swpg_io: off=%lx, npg=%x, mask=%x, bmask=%x\n",
off, npages, mask, swb->swb_mask);
#endif
swp->sw_cnt += count_bits(mask & ~swb->swb_mask);
swb->swb_mask |= mask;
}
/*
* If this is an async write we set up still more buffer fields
* and place a "cleaning" entry on the inuse queue.
*/
if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) {
#ifdef DEBUG
if (swap_pager_free.tqh_first == NULL)
panic("swpg_io: lost spc");
#endif
spc = swap_pager_free.tqh_first;
TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
#ifdef DEBUG
if (spc->spc_flags != SPC_FREE)
panic("swpg_io: bad free spc");
#endif
spc->spc_flags = SPC_BUSY;
spc->spc_bp = bp;
spc->spc_swp = swp;
spc->spc_kva = kva;
/*
* Record the first page. This allows swap_pager_clean
* to efficiently handle the common case of a single page.
* For clusters, it allows us to locate the object easily
* and we then reconstruct the rest of the mlist from spc_kva.
*/
spc->spc_m = m;
spc->spc_npages = npages;
bp->b_flags |= B_CALL;
bp->b_iodone = swap_pager_iodone;
s = splbio();
TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list);
splx(s);
}
/*
* Finally, start the IO operation.
* If it is async we are all done, otherwise we must wait for
* completion and cleanup afterwards.
*/
#ifdef DEBUG
if (swpagerdebug & SDB_IO)
printf("swpg_io: IO start: bp %p, db %lx, va %lx, pa %lx\n",
bp, swb->swb_block+btodb(off), kva, VM_PAGE_TO_PHYS(m));
#endif
VOP_STRATEGY(bp);
if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) {
#ifdef DEBUG
if (swpagerdebug & SDB_IO)
printf("swpg_io: IO started: bp %p\n", bp);
#endif
return(VM_PAGER_PEND);
}
s = splbio();
#ifdef DEBUG
if (flags & B_READ)
swap_pager_piip++;
else
swap_pager_poip++;
#endif
while ((bp->b_flags & B_DONE) == 0)
(void) tsleep(bp, PVM, "swpgio", 0);
if ((flags & B_READ) == 0)
--swp->sw_poip;
#ifdef DEBUG
if (flags & B_READ)
--swap_pager_piip;
else
--swap_pager_poip;
#endif
rv = (bp->b_flags & B_ERROR) ? VM_PAGER_ERROR : VM_PAGER_OK;
bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
bp->b_actf = bswlist.b_actf;
bswlist.b_actf = bp;
if (bp->b_vp)
brelvp(bp);
if (bswlist.b_flags & B_WANTED) {
bswlist.b_flags &= ~B_WANTED;
wakeup(&bswlist);
}
if ((flags & B_READ) == 0 && rv == VM_PAGER_OK) {
m->flags |= PG_CLEAN;
pmap_clear_modify(VM_PAGE_TO_PHYS(m));
}
splx(s);
#ifdef DEBUG
if (swpagerdebug & SDB_IO)
printf("swpg_io: IO done: bp %p, rv %d\n", bp, rv);
if ((swpagerdebug & SDB_FAIL) && rv == VM_PAGER_ERROR)
printf("swpg_io: IO error\n");
#endif
vm_pager_unmap_pages(kva, npages);
return(rv);
}
static void
swap_pager_clean(rw)
int rw;
{
register swp_clean_t spc;
register int s, i;
vm_object_t object;
vm_page_t m;
#ifdef DEBUG
/* save panic time state */
if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
return;
if (swpagerdebug & SDB_FOLLOW)
printf("swpg_clean(%x)\n", rw);
#endif
for (;;) {
/*
* Look up and removal from inuse list must be done
* at splbio() to avoid conflicts with swap_pager_iodone.
*/
s = splbio();
for (spc = swap_pager_inuse.tqh_first;
spc != NULL;
spc = spc->spc_list.tqe_next) {
/*
* If the operation is done, remove it from the
* list and process it.
*
* XXX if we can't get the object lock we also
* leave it on the list and try again later.
* Is there something better we could do?
*/
if ((spc->spc_flags & SPC_DONE) &&
vm_object_lock_try(spc->spc_m->object)) {
TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list);
break;
}
}
splx(s);
/*
* No operations done, thats all we can do for now.
*/
if (spc == NULL)
break;
/*
* Found a completed operation so finish it off.
* Note: no longer at splbio since entry is off the list.
*/
m = spc->spc_m;
object = m->object;
/*
* Process each page in the cluster.
* The first page is explicitly kept in the cleaning
* entry, others must be reconstructed from the KVA.
*/
for (i = 0; i < spc->spc_npages; i++) {
if (i)
m = vm_pager_atop(spc->spc_kva + ptoa(i));
/*
* If no error mark as clean and inform the pmap
* system. If there was an error, mark as dirty
* so we will try again.
*
* XXX could get stuck doing this, should give up
* after awhile.
*/
if (spc->spc_flags & SPC_ERROR) {
printf("%s: clean of page %lx failed\n",
"swap_pager_clean", VM_PAGE_TO_PHYS(m));
m->flags |= PG_LAUNDRY;
} else {
m->flags |= PG_CLEAN;
pmap_clear_modify(VM_PAGE_TO_PHYS(m));
}
m->flags &= ~PG_BUSY;
PAGE_WAKEUP(m);
}
/*
* Done with the object, decrement the paging count
* and unlock it.
*/
if (--object->paging_in_progress == 0)
wakeup(object);
vm_object_unlock(object);
/*
* Free up KVM used and put the entry back on the list.
*/
vm_pager_unmap_pages(spc->spc_kva, spc->spc_npages);
spc->spc_flags = SPC_FREE;
TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
#ifdef DEBUG
if (swpagerdebug & SDB_WRITE)
printf("swpg_clean: free spc %p\n", spc);
#endif
}
}
#ifdef DEBUG
static void
swap_pager_clean_check(mlist, npages, rw)
vm_page_t *mlist;
int npages;
int rw;
{
register swp_clean_t spc;
boolean_t bad;
int i, j, s;
vm_page_t m;
if (panicstr)
return;
bad = FALSE;
s = splbio();
for (spc = swap_pager_inuse.tqh_first;
spc != NULL;
spc = spc->spc_list.tqe_next) {
for (j = 0; j < spc->spc_npages; j++) {
m = vm_pager_atop(spc->spc_kva + ptoa(j));
for (i = 0; i < npages; i++)
if (m == mlist[i]) {
if (swpagerdebug & SDB_ANOM)
printf(
"swpg_clean_check: %s: page %p on list, flags %x\n",
rw == B_WRITE ? "write" : "read", mlist[i], spc->spc_flags);
bad = TRUE;
}
}
}
splx(s);
if (bad)
panic("swpg_clean_check");
}
#endif
static void
swap_pager_iodone(bp)
register struct buf *bp;
{
register swp_clean_t spc;
daddr_t blk;
int s;
#ifdef DEBUG
/* save panic time state */
if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
return;
if (swpagerdebug & SDB_FOLLOW)
printf("swpg_iodone(%p)\n", bp);
#endif
s = splbio();
for (spc = swap_pager_inuse.tqh_first;
spc != NULL;
spc = spc->spc_list.tqe_next)
if (spc->spc_bp == bp)
break;
#ifdef DEBUG
if (spc == NULL)
panic("swap_pager_iodone: bp not found");
#endif
spc->spc_flags &= ~SPC_BUSY;
spc->spc_flags |= SPC_DONE;
if (bp->b_flags & B_ERROR)
spc->spc_flags |= SPC_ERROR;
spc->spc_bp = NULL;
blk = bp->b_blkno;
#ifdef DEBUG
--swap_pager_poip;
if (swpagerdebug & SDB_WRITE)
printf("swpg_iodone: bp=%p swp=%p flags=%x spc=%p poip=%x\n",
bp, spc->spc_swp, spc->spc_swp->sw_flags,
spc, spc->spc_swp->sw_poip);
#endif
spc->spc_swp->sw_poip--;
if (spc->spc_swp->sw_flags & SW_WANTED) {
spc->spc_swp->sw_flags &= ~SW_WANTED;
wakeup(spc->spc_swp);
}
bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
bp->b_actf = bswlist.b_actf;
bswlist.b_actf = bp;
if (bp->b_vp)
brelvp(bp);
if (bswlist.b_flags & B_WANTED) {
bswlist.b_flags &= ~B_WANTED;
wakeup(&bswlist);
}
wakeup(&vm_pages_needed);
splx(s);
}
/*
* swap_pager_remove:
*
* This is called via the vm_pager_remove path and
* will remove any pages inside the range [from, to)
* backed by us. It is assumed that both addresses
* are multiples of PAGE_SIZE. The special case
* where TO is zero means: remove to end of object.
*/
static int
swap_pager_remove(pager, from, to)
vm_pager_t pager;
vm_offset_t from, to;
{
sw_pager_t swp;
sw_blk_t swb;
int bsize, blk, bit, to_blk, to_bit, mask, cnt = 0;
#ifdef DEBUG
if (swpagerdebug & SDB_FOLLOW)
printf("swpg_remove()\n");
#endif
/* Special case stupid ranges. */
if (to > 0 && from >= to)
return(0);
swp = (sw_pager_t)pager->pg_data;
/*
* If we back no pages, just return. XXX Can this
* ever be the case? At least all remove calls should
* be through vm_object_remove_from_pager which also
* deallocates the pager when it no longer backs any
* pages. Left is the initial case: can a swap-pager
* be created without any pages put into it?
*/
if (swp->sw_cnt == 0)
return(0);
bsize = dbtob(swp->sw_bsize);
blk = from / bsize;
/* Another fast one.. no blocks in range. */
if (blk >= swp->sw_nblocks)
return(0);
bit = atop(from % bsize);
/*
* Deal with the special case with TO == 0.
* XXX Perhaps the code might be improved if we
* made to_blk & to_bit signify the inclusive end
* of range instead (i.e. to - 1).
*/
if (to) {
to_blk = to / bsize;
if (to_blk >= swp->sw_nblocks) {
to_blk = swp->sw_nblocks;
to_bit = 0;
} else
to_bit = atop(to % bsize);
} else {
to_blk = swp->sw_nblocks;
to_bit = 0;
}
/*
* Loop over the range, remove pages as we find them.
* If all pages in a block get freed, deallocate the
* swap block as well.
*/
for (swb = &swp->sw_blocks[blk], mask = (1 << bit) - 1;
blk < to_blk || (blk == to_blk && to_bit);
blk++, swb++, mask = 0) {
/* Don't bother if the block is already cleared. */
if (swb->swb_block == 0)
continue;
/*
* When coming to the end-block we need to
* adjust the mask in the othher end, as well as
* ensuring this will be the last iteration.
*/
if (blk == to_blk) {
mask |= ~((1 << to_bit) - 1);
to_bit = 0;
}
/* Count pages that will be removed. */
cnt += count_bits(swb->swb_mask & ~mask);
/*
* Remove pages by applying our mask, and if this
* means no pages are left in the block, free it.
*/
if ((swb->swb_mask &= mask) == 0) {
rmfree(swapmap, swp->sw_bsize, swb->swb_block);
swb->swb_block = 0;
}
}
/* Adjust the page count and return the removed count. */
swp->sw_cnt -= cnt;
#ifdef DEBUG
if (swp->sw_cnt < 0)
panic("swap_pager_remove: sw_cnt < 0");
#endif
return(cnt);
}
/*
* swap_pager_next:
*
* This is called via the vm_pager_next path and
* will return the offset of the next page (addresswise)
* which this pager is backing. If there are no more
* pages we will return the size of the pager's managed
* space (which by definition is larger than any page's
* offset).
*/
static vm_offset_t
swap_pager_next(pager, offset)
vm_pager_t pager;
vm_offset_t offset;
{
sw_pager_t swp;
sw_blk_t swb;
int bsize, blk, bit, to_blk, to_bit, mask;
#ifdef DEBUG
if (swpagerdebug & SDB_FOLLOW)
printf("swpg_next()\n");
#endif
swp = (sw_pager_t)pager->pg_data;
/*
* If we back no pages, just return our size. XXX Can
* this ever be the case? At least all remove calls
* should be through vm_object_remove_from_pager which
* also deallocates the pager when it no longer backs any
* pages. Left is the initial case: can a swap-pager
* be created without any pages put into it?
*/
if (swp->sw_cnt == 0)
return(swp->sw_osize);
bsize = dbtob(swp->sw_bsize);
blk = offset / bsize;
/* Another fast one.. no blocks in range. */
if (blk >= swp->sw_nblocks)
return(swp->sw_osize);
bit = atop(offset % bsize);
to_blk = swp->sw_osize / bsize;
to_bit = atop(swp->sw_osize % bsize);
/*
* Loop over the remaining blocks, returning as soon
* as we find a page.
*/
swb = &swp->sw_blocks[blk];
mask = ~((1 << bit) - 1);
for (;;) {
if (blk == to_blk) {
/* Nothing to be done in this end-block? */
if (to_bit == 0)
break;
mask &= (1 << to_bit) - 1;
}
/*
* Check this block for a backed page and return
* its offset if there.
*/
mask &= swb->swb_mask;
if (mask)
return(blk * bsize + (ffs (mask) - 1) * PAGE_SIZE);
/*
* If we handled the end of range now, this
* means we are ready.
*/
if (blk == to_blk)
break;
/* Get on with the next block. */
blk++;
swb++;
mask = ~0;
}
return swp->sw_osize;
}
/*
* swap_pager_count:
*
* Just returns the count of pages backed by this pager.
*/
int
swap_pager_count(pager)
vm_pager_t pager;
{
#ifndef notyet
return((sw_pager_t)pager->pg_data)->sw_cnt;
#else
sw_pager_t swp;
sw_blk_t swb;
int i, cnt = 0;
swp = (sw_pager_t)pager->pg_data;
if (swp->sw_blocks == NULL)
return 0;
for (i = 0; i < swp->sw_nblocks; i++)
cnt += count_bits(swp->sw_blocks[i].swb_mask);
return cnt;
#endif
}
/*
* count_bits:
*
* Counts the number of set bits in a word.
*/
static int
count_bits(x)
u_int x;
{
int cnt = 0;
while (x) {
cnt += x & 1;
x >>= 1;
}
return(cnt);
}