diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index 92bac01a93c7..95365c05da72 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -1,4 +1,4 @@ -/* $NetBSD: swap_pager.c,v 1.32 1997/04/17 00:08:50 thorpej Exp $ */ +/* $NetBSD: swap_pager.c,v 1.33 1997/06/12 14:51:24 mrg Exp $ */ /* * Copyright (c) 1990 University of Utah. @@ -56,6 +56,7 @@ #include #include #include +#include #include @@ -64,6 +65,7 @@ #include #include +/* XXX this makes the max swap devices 16 */ #define NSWSIZES 16 /* size of swtab */ #define MAXDADDRS 64 /* max # of disk addrs for fixed allocations */ #ifndef NPENDINGIO @@ -114,10 +116,6 @@ struct swtab { #endif } swtab[NSWSIZES+1]; -int dmmin, dmmax; - -struct map *swapmap; -int nswapmap; #ifdef DEBUG int swap_pager_poip; /* pageouts in progress */ @@ -171,9 +169,8 @@ struct pagerops swappagerops = { static void swap_pager_init() { - register swp_clean_t spc; - register int i, bsize; - int maxbsize; + swp_clean_t spc; + int i, maxbsize, bsize; #ifdef DEBUG if (swpagerdebug & (SDB_FOLLOW|SDB_INIT)) @@ -202,42 +199,34 @@ swap_pager_init() spc->spc_flags = SPC_FREE; } +/* this needs to be at least ctod(1) for all ports for vtod() to work */ +#define DMMIN 32 /* - * Calculate the swap allocation constants. + * Fill in our table of object size vs. allocation size. bsize needs + * to be at least ctod(1) for all ports for vtod() to work, with a + * bare minimum of 32. */ - if (dmmin == 0) { - dmmin = DMMIN; - if (dmmin < CLBYTES/DEV_BSIZE) - dmmin = CLBYTES/DEV_BSIZE; - } - if (dmmax == 0) - dmmax = DMMAX; - - /* - * Fill in our table of object size vs. allocation size - */ - bsize = btodb(PAGE_SIZE); - if (bsize < dmmin) - bsize = dmmin; +#define max(a, b) ((a) > (b) ? (a) : (b)) + bsize = max(32, max(ctod(1), btodb(PAGE_SIZE))); maxbsize = btodb(sizeof(sw_bm_t) * NBBY * PAGE_SIZE); - if (maxbsize > dmmax) - maxbsize = dmmax; + if (maxbsize > NBPG) + maxbsize = NBPG; for (i = 0; i < NSWSIZES; i++) { - swtab[i].st_osize = (vm_size_t) (MAXDADDRS * dbtob(bsize)); - swtab[i].st_bsize = bsize; if (bsize <= btodb(MAXPHYS)) swap_pager_maxcluster = dbtob(bsize); + swtab[i].st_bsize = bsize; + if (bsize >= maxbsize) { + swtab[i].st_osize = 0; + break; + } + swtab[i].st_osize = (vm_size_t) (MAXDADDRS * dbtob(bsize)); #ifdef DEBUG if (swpagerdebug & SDB_INIT) printf("swpg_init: ix %d, size %lx, bsize %x\n", i, swtab[i].st_osize, swtab[i].st_bsize); #endif - if (bsize >= maxbsize) - break; bsize *= 2; } - swtab[i].st_osize = 0; - swtab[i].st_bsize = bsize; } /* @@ -273,8 +262,10 @@ swap_pager_alloc(handle, size, prot, foff) * to the object and also to remove from the * object cache. */ +#ifdef DIAGNOSTIC if (vm_object_lookup(pager) == NULL) panic("swap_pager_alloc: bad object"); +#endif return(pager); } } @@ -409,7 +400,7 @@ swap_pager_dealloc(pager) printf("swpg_dealloc: blk %x\n", bp->swb_block); #endif - rmfree(swapmap, swp->sw_bsize, bp->swb_block); + swap_free(swp->sw_bsize, bp->swb_block); } /* * Free swap management resources @@ -464,7 +455,6 @@ swap_pager_putpage(pager, mlist, npages, sync) int npages; boolean_t sync; { - int flags; #ifdef DEBUG if (swpagerdebug & SDB_FOLLOW) @@ -475,11 +465,8 @@ swap_pager_putpage(pager, mlist, npages, sync) swap_pager_clean(B_WRITE); return (VM_PAGER_OK); /* ??? */ } - flags = B_WRITE; - if (!sync) - flags |= B_ASYNC; return(swap_pager_io((sw_pager_t)pager->pg_data, - mlist, npages, flags)); + mlist, npages, B_WRITE | (sync ? 0 : B_ASYNC))); } static boolean_t @@ -541,8 +528,10 @@ swap_pager_cluster(pager, offset, loffset, hoffset) bsize = swap_pager_maxcluster; loff = offset - (offset % bsize); +#ifdef DIAGNOSTIC if (loff >= swp->sw_osize) panic("swap_pager_cluster: bad offset"); +#endif hoff = loff + bsize; if (hoff > swp->sw_osize) @@ -656,7 +645,7 @@ swap_pager_io(swp, mlist, npages, flags) * Allocate a swap block if necessary. */ if (swb->swb_block == 0) { - swb->swb_block = rmalloc(swapmap, swp->sw_bsize); + swb->swb_block = swap_alloc(swp->sw_bsize); if (swb->swb_block == 0) { #ifdef DEBUG if (swpagerdebug & SDB_FAIL) @@ -726,8 +715,8 @@ swap_pager_io(swp, mlist, npages, flags) if ((bp->b_flags & B_READ) == 0) { bp->b_dirtyoff = 0; bp->b_dirtyend = npages * PAGE_SIZE; - swapdev_vp->v_numoutput++; s = splbio(); + swapdev_vp->v_numoutput++; swp->sw_poip++; splx(s); mask = (~(~0 << npages)) << atop(off); @@ -753,13 +742,13 @@ swap_pager_io(swp, mlist, npages, flags) * and place a "cleaning" entry on the inuse queue. */ if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) { -#ifdef DEBUG +#ifdef DIAGNOSTIC if (swap_pager_free.tqh_first == NULL) panic("swpg_io: lost spc"); #endif spc = swap_pager_free.tqh_first; TAILQ_REMOVE(&swap_pager_free, spc, spc_list); -#ifdef DEBUG +#ifdef DIAGNOSTIC if (spc->spc_flags != SPC_FREE) panic("swpg_io: bad free spc"); #endif @@ -1004,7 +993,7 @@ swap_pager_iodone(bp) spc = spc->spc_list.tqe_next) if (spc->spc_bp == bp) break; -#ifdef DEBUG +#ifdef DIAGNOSTIC if (spc == NULL) panic("swap_pager_iodone: bp not found"); #endif @@ -1140,14 +1129,14 @@ swap_pager_remove(pager, from, to) * means no pages are left in the block, free it. */ if ((swb->swb_mask &= mask) == 0) { - rmfree(swapmap, swp->sw_bsize, swb->swb_block); + swap_free(swp->sw_bsize, swb->swb_block); swb->swb_block = 0; } } /* Adjust the page count and return the removed count. */ swp->sw_cnt -= cnt; -#ifdef DEBUG +#ifdef DIAGNOSTIC if (swp->sw_cnt < 0) panic("swap_pager_remove: sw_cnt < 0"); #endif diff --git a/sys/vm/vm_swap.c b/sys/vm/vm_swap.c index 97cb79b106e9..ef89b17eef50 100644 --- a/sys/vm/vm_swap.c +++ b/sys/vm/vm_swap.c @@ -1,8 +1,8 @@ -/* $NetBSD: vm_swap.c,v 1.37 1997/04/17 00:12:50 thorpej Exp $ */ +/* $NetBSD: vm_swap.c,v 1.38 1997/06/12 14:51:20 mrg Exp $ */ /* - * Copyright (c) 1982, 1986, 1989, 1993 - * The Regents of the University of California. All rights reserved. + * Copyright (c) 1995, 1996, 1997 Matthew R. Green + * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -14,25 +14,22 @@ * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. + * This product includes software developed by Matthew R. Green for + * The NetBSD Foundation. + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * @(#)vm_swap.c 8.5 (Berkeley) 2/17/94 */ #include @@ -40,218 +37,759 @@ #include #include #include +#include +#include +#include +#include +#include #include #include #include - +#include +#include +#include #include #include -#include +#include + #include #include /* - * Indirect driver for multi-controller paging. + * The idea here is to provide a single interface for multiple swap devices, + * of any kind and priority in a simple and fast way. + * + * Each swap device has these properties: + * * swap in use. + * * swap enabled. + * * map information in `/dev/drum'. + * * vnode pointer. + * Files have these additional properties: + * * block size. + * * maximum byte count in buffer. + * * buffer. + * * credentials. + * + * The arguments to swapctl(2) are: + * int cmd; + * void *arg; + * int misc; + * The cmd can be one of: + * SWAP_NSWAP - swapctl(2) returns the number of swap devices currently in + * use. + * SWAP_STATS - swapctl(2) takes a struct ent * in (void *arg) and writes + * misc or fewer (to zero) entries of configured swap devices, + * and returns the number of entries written or -1 on error. + * SWAP_ON - swapctl(2) takes a (char *) in arg to be the pathname of a + * device or file to begin swapping on, with it's priority in + * misc, returning 0 on success and -1 on error. + * SWAP_OFF - swapctl(2) takes a (char *) n arg to be the pathname of a + * device or file to stop swapping on. returning 0 or -1. + * XXX unwritten. + * SWAP_CTL - swapctl(2) changes the priority of a swap device, using the + * misc value. */ -int nswap, nswdev; -#ifdef SEQSWAP -int niswdev; /* number of interleaved swap devices */ -int niswap; /* size of interleaved swap area */ +#ifdef SWAPDEBUG +#define VMSDB_SWON 0x0001 +#define VMSDB_SWOFF 0x0002 +#define VMSDB_SWINIT 0x0004 +#define VMSDB_SWALLOC 0x0008 +#define VMSDB_SWFLOW 0x0010 +#define VMSDB_INFO 0x0020 +int vmswapdebug = 0; #endif -extern int dmmax; -int swfree __P((struct proc *, int)); +#define SWAP_TO_FILES + +struct swapdev { + struct swapent swd_se; +#define swd_dev swd_se.se_dev +#define swd_flags swd_se.se_flags +#define swd_nblks swd_se.se_nblks +#define swd_inuse swd_se.se_inuse +#define swd_priority swd_se.se_priority + daddr_t swd_mapoffset; + int swd_mapsize; + struct extent *swd_ex; + struct vnode *swd_vp; + CIRCLEQ_ENTRY(swapdev) swd_next; + +#ifdef SWAP_TO_FILES + int swd_bsize; + int swd_maxactive; + struct buf swd_tab; + struct ucred *swd_cred; +#endif +}; + +struct swappri { + int spi_priority; + CIRCLEQ_HEAD(spi_swapdev, swapdev) spi_swapdev; + LIST_ENTRY(swappri) spi_swappri; +}; + /* - * Set up swap devices. - * Initialize linked list of free swap - * headers. These do not actually point - * to buffers, but rather to pages that - * are being swapped in and out. + * The following two structures are used to keep track of data transfers + * on swap devices associated with regular files. + * NOTE: this code is more or less a copy of vnd.c; we use the same + * structure names here to ease porting.. + */ +struct vndxfer { + struct buf *vx_bp; /* Pointer to parent buffer */ + struct swapdev *vx_sdp; + int vx_error; + int vx_pending; /* # of pending aux buffers */ +}; + +struct vndbuf { + struct buf vb_buf; + struct vndxfer *vb_xfer; +}; + +/* + * XXX: Not a very good idea in a swap strategy module! + */ +#define getvndxfer() \ + ((struct vndxfer *)malloc(sizeof(struct vndxfer), M_DEVBUF, M_WAITOK)) + +#define putvndxfer(vnx) \ + free((caddr_t)(vnx), M_DEVBUF) + +#define getvndbuf() \ + ((struct vndbuf *)malloc(sizeof(struct vndbuf), M_DEVBUF, M_WAITOK)) + +#define putvndbuf(vbp) \ + free((caddr_t)(vbp), M_DEVBUF) + +int nswapdev, nswap; +int swflags; +struct extent *swapmap; +LIST_HEAD(swap_priority, swappri) swap_priority; + +static int swap_on __P((struct proc *, struct swapdev *)); +#ifdef SWAP_OFF_WORKS +static int swap_off __P((struct proc *, struct swapdev *)); +#endif +static struct swapdev *swap_getsdpfromaddr __P((daddr_t)); +static void swap_addmap __P((struct swapdev *, int)); + +#ifdef SWAP_TO_FILES +static void sw_reg_strategy __P((struct swapdev *, struct buf *, int)); +static void sw_reg_iodone __P((struct buf *)); +static void sw_reg_start __P((struct swapdev *)); +#endif + +static void insert_swapdev __P((struct swapdev *, int)); +static struct swapdev *find_swapdev __P((struct vnode *, int)); +static void swaplist_trim __P((void)); + +/* XXX - Replace with general locking device when available */ +static void _swaplist_lock __P((void)); +static void _swaplist_unlock __P((void)); +int swaplock = 0; +#define SWP_LOCKED 1 +#define SWP_WANT 2 + +static __inline void +_swaplist_lock() +{ + if (swaplock & SWP_LOCKED) { + swaplock |= SWP_WANT; + tsleep((caddr_t)&swaplock, PSWP, "swaplock", 0); + } + swaplock |= SWP_LOCKED; +} + +static __inline void +_swaplist_unlock() +{ + swaplock &= ~SWP_LOCKED; + if (swaplock & SWP_WANT) { + swaplock &= ~SWP_WANT; + wakeup((caddr_t)&swaplock); + } +} + +/* + * Insert a swap device on the priority list. */ void -swapinit() +insert_swapdev(sdp, priority) + struct swapdev *sdp; + int priority; { - register int i; - register struct buf *sp = swbuf; - register struct proc *p = &proc0; /* XXX */ - struct swdevt *swp; - int error; + struct swappri *spp, *pspp; + +again: + _swaplist_lock(); + pspp = swap_priority.lh_first; + + for (spp = pspp; spp != NULL; spp = spp->spi_swappri.le_next) { + if (spp->spi_priority <= priority) + break; + pspp = spp; + } + + if (spp == NULL || spp->spi_priority != priority) { + spp = (struct swappri *) + malloc(sizeof *spp, M_VMSWAP, M_NOWAIT); + + if (spp == NULL) { + _swaplist_unlock(); + tsleep((caddr_t)&lbolt, PSWP, "memory", 0); + goto again; + } +#ifdef SWAPDEBUG + if (vmswapdebug & VMSDB_SWFLOW) + printf("sw: had to create a new swappri = %d\n", + priority); +#endif /* SWAPDEBUG */ + + spp->spi_priority = priority; + CIRCLEQ_INIT(&spp->spi_swapdev); + + if (pspp) + LIST_INSERT_AFTER(pspp, spp, spi_swappri); + else + LIST_INSERT_HEAD(&swap_priority, spp, + spi_swappri); + + } + /* Onto priority list */ + CIRCLEQ_INSERT_TAIL(&spp->spi_swapdev, sdp, swd_next); + sdp->swd_priority = priority; + _swaplist_unlock(); +} + +/* + * Find and optionally remove a swap device from the priority list. + */ +struct swapdev * +find_swapdev(vp, remove) + struct vnode *vp; + int remove; +{ + struct swapdev *sdp; + struct swappri *spp; + + _swaplist_lock(); + for (spp = swap_priority.lh_first; spp != NULL; + spp = spp->spi_swappri.le_next) { + for (sdp = spp->spi_swapdev.cqh_first; + sdp != (void *)&spp->spi_swapdev; + sdp = sdp->swd_next.cqe_next) + if (sdp->swd_vp == vp) { + if (remove) + CIRCLEQ_REMOVE(&spp->spi_swapdev, sdp, + swd_next); + _swaplist_unlock(); + return(sdp); + } + } + _swaplist_unlock(); + return (NULL); +} + +/* + * Scan priority list for empty priority entries. + */ +void +swaplist_trim() +{ + struct swappri *spp; + + _swaplist_lock(); +restart: + for (spp = swap_priority.lh_first; spp != NULL; + spp = spp->spi_swappri.le_next) { + if (spp->spi_swapdev.cqh_first != (void *)&spp->spi_swapdev) + continue; + LIST_REMOVE(spp, spi_swappri); + free((caddr_t)spp, M_VMSWAP); + goto restart; + } + _swaplist_unlock(); +} + +int +sys_swapctl(p, v, retval) + struct proc *p; + void *v; + register_t *retval; +{ + struct sys_swapctl_args /* { + syscallarg(int) cmd; + syscallarg(void *) arg; + syscallarg(int) misc; + } */ *uap = (struct sys_swapctl_args *)v; + struct vnode *vp; + struct nameidata nd; + struct swappri *spp; + struct swapdev *sdp; + struct swapent *sep; + int count, error, misc; + int priority; + + misc = SCARG(uap, misc); + +#ifdef SWAPDEBUG + if (vmswapdebug & VMSDB_SWFLOW) + printf("entering sys_swapctl\n"); +#endif /* SWAPDEBUG */ + + /* how many swap devices */ + if (SCARG(uap, cmd) == SWAP_NSWAP) { +#ifdef SWAPDEBUG + if (vmswapdebug & VMSDB_SWFLOW) + printf("did SWAP_NSWAP: leaving sys_swapctl\n"); +#endif /* SWAPDEBUG */ + *retval = nswapdev; + return (0); + } + + /* stats on the swap devices. */ + if (SCARG(uap, cmd) == SWAP_STATS) { + sep = (struct swapent *)SCARG(uap, arg); + count = 0; + + for (spp = swap_priority.lh_first; spp != NULL; + spp = spp->spi_swappri.le_next) { + for (sdp = spp->spi_swapdev.cqh_first; + sdp != (void *)&spp->spi_swapdev && misc-- > 0; + sdp = sdp->swd_next.cqe_next) { + error = copyout((caddr_t)&sdp->swd_se, + (caddr_t)sep, sizeof(struct swapent)); + if (error) + return (error); + count++; + sep++; + } + } +#ifdef SWAPDEBUG + if (vmswapdebug & VMSDB_SWFLOW) + printf("sw: did SWAP_STATS: leaving sys_swapctl\n"); +#endif /* SWAPDEBUG */ + *retval = count; + return (0); + } + if ((error = suser(p->p_ucred, &p->p_acflag))) + return (error); + + if (SCARG(uap, arg) == NULL) { + /* XXX - interface - arg==NULL: miniroot */ + vp = rootvp; + if (vget(vp, 1)) + return (EBUSY); + } else { + NDINIT(&nd, LOOKUP, FOLLOW|LOCKLEAF, UIO_USERSPACE, + SCARG(uap, arg), p); + if ((error = namei(&nd))) + return (error); + + vp = nd.ni_vp; + } + + switch(SCARG(uap, cmd)) { + case SWAP_CTL: + priority = SCARG(uap, misc); + if ((sdp = find_swapdev(vp, 1)) == NULL) { + error = ENOENT; + break; + } + insert_swapdev(sdp, priority); + swaplist_trim(); + break; + + case SWAP_ON: + priority = SCARG(uap, misc); + + /* Check for duplicates */ + if ((sdp = find_swapdev(vp, 0)) != NULL) { + error = EBUSY; + goto bad; + } + + sdp = (struct swapdev *) + malloc(sizeof *sdp, M_VMSWAP, M_WAITOK); + sdp->swd_inuse = sdp->swd_flags = 0; + sdp->swd_vp = vp; + sdp->swd_dev = (vp->v_type == VBLK) ? vp->v_rdev : NODEV; + + if ((error = swap_on(p, sdp)) != 0) { + free((caddr_t)sdp, M_VMSWAP); + break; + } +#ifdef SWAP_TO_FILES + /* + * XXX Is NFS elaboration necessary? + */ + if (vp->v_type == VREG) + sdp->swd_cred = crdup(p->p_ucred); +#endif + insert_swapdev(sdp, priority); + + /* Keep reference to vnode */ + vref(vp); + break; + + case SWAP_OFF: +#ifdef SWAPDEBUG + if (vmswapdebug & VMSDB_SWFLOW) + printf("doing SWAP_OFF...\n"); +#endif /* SWAPDEBUG */ +#ifdef SWAP_OFF_WORKS + if ((sdp = find_swapdev(vp, 0)) == NULL) { + error = ENXIO; + break; + } + /* + * If a device isn't in use or enabled, we + * can't stop swapping from it (again). + */ + if ((sdp->swd_flags & + (SWF_INUSE|SWF_ENABLE)) == 0) { + error = EBUSY; + goto bad; + } + if ((error = swap_off(p, sdp)) != 0) + goto bad; + + /* Find again and remove this time */ + if ((sdp = find_swapdev(vp, 1)) == NULL) { + error = ENXIO; + break; + } + free((caddr_t)sdp, M_VMSWAP); +#else +#ifdef DIAGNOSTIC + printf("swap SWAP_OFF attempted\n"); +#endif +#endif + break; + + default: +#ifdef SWAPDEBUG + if (vmswapdebug & VMSDB_SWFLOW) + printf("doing default...\n"); +#endif /* SWAPDEBUG */ + error = EINVAL; + } + +bad: + vput(vp); + +#ifdef SWAPDEBUG + if (vmswapdebug & VMSDB_SWFLOW) + printf("leaving sys_swapctl: error %d\n", error); +#endif /* SWAPDEBUG */ + return (error); +} + +/* + * swap_on() attempts to begin swapping on a swapdev. we check that this + * device is OK to swap from, miss the start of any disk (to avoid any + * disk labels that may exist). + */ +static int +swap_on(p, sdp) + struct proc *p; + struct swapdev *sdp; +{ + static int count = 0; + struct vnode *vp = sdp->swd_vp; + int error, nblks, size; + long addr; + char *storage; + int storagesize; +#ifdef SWAP_TO_FILES + struct vattr va; +#endif +#ifdef NFS + extern int (**nfsv2_vnodeop_p) __P((void *)); +#endif /* NFS */ + dev_t dev = sdp->swd_dev; + char *name; + + /* If root on swap, then the skip open/close operations. */ + if (vp != rootvp) { + if ((error = VOP_OPEN(vp, FREAD|FWRITE, p->p_ucred, p))) + return (error); + } + +#ifdef SWAPDEBUG /* this wants only for block devices */ + if (vmswapdebug & VMSDB_INFO) + printf("swap_on: dev = %d, major(dev) = %d\n", dev, major(dev)); +#endif /* SWAPDEBUG */ + + switch (vp->v_type) { + case VBLK: + if (bdevsw[major(dev)].d_psize == 0 || + (nblks = (*bdevsw[major(dev)].d_psize)(dev)) == -1) { + error = ENXIO; + goto bad; + } + break; + +#ifdef SWAP_TO_FILES + case VREG: + if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p))) + goto bad; + nblks = (int)btodb(va.va_size); + if ((error = + VFS_STATFS(vp->v_mount, &vp->v_mount->mnt_stat, p)) != 0) + goto bad; + + sdp->swd_bsize = vp->v_mount->mnt_stat.f_iosize; +#ifdef NFS + if (vp->v_op == nfsv2_vnodeop_p) + sdp->swd_maxactive = 2; /* XXX */ + else +#endif /* NFS */ + sdp->swd_maxactive = 8; /* XXX */ + break; +#endif + + default: + error = ENXIO; + goto bad; + } + if (nblks == 0) { +#ifdef SWAPDEBUG + if (vmswapdebug & VMSDB_SWFLOW) + printf("swap_on: nblks == 0\n"); +#endif /* SWAPDEBUG */ + error = EINVAL; + goto bad; + } + + sdp->swd_flags |= SWF_INUSE; + sdp->swd_nblks = nblks; /* - * Count swap devices, and adjust total swap space available. - * Some of the space will not be countable until later (dynamically - * configurable devices) and some of the counted space will not be - * available until a swapon() system call is issued, both usually - * happen when the system goes multi-user. - * - * If using NFS for swap, swdevt[0] will already be bdevvp'd. XXX + * skip over first cluster of a device in case of labels or + * boot blocks. */ -#ifdef SEQSWAP - nswdev = niswdev = 0; - nswap = niswap = 0; - /* - * All interleaved devices must come first - */ - for (swp = swdevt; swp->sw_dev != NODEV || swp->sw_vp != NULL; swp++) { - if (swp->sw_flags & SW_SEQUENTIAL) - break; - niswdev++; - if (swp->sw_nblks > niswap) - niswap = swp->sw_nblks; + if (vp->v_type == VBLK) { + size = (int)(nblks - ctod(CLSIZE)); + addr = (long)ctod(CLSIZE); + } else { + size = (int)nblks; + addr = (long)0; } - niswap = roundup(niswap, dmmax); - niswap *= niswdev; - if (niswdev != 0 && swdevt[0].sw_vp == NULL && - bdevvp(swdevt[0].sw_dev, &swdevt[0].sw_vp)) - panic("swapvp"); + +#ifdef SWAPDEBUG + if (vmswapdebug & VMSDB_SWON) + printf("swap_on: dev %x: size %d, addr %ld\n", dev, size, addr); +#endif /* SWAPDEBUG */ + + name = malloc(12, M_VMSWAP, M_WAITOK); + sprintf(name, "swap0x%04x", count++); + /* XXX make this based on ram as well. */ + storagesize = EXTENT_FIXED_STORAGE_SIZE(maxproc * 2); + storage = malloc(storagesize, M_VMSWAP, M_WAITOK); + sdp->swd_ex = extent_create(name, addr, addr + size, M_VMSWAP, + storage, storagesize, + EX_NOCOALESCE|EX_WAITOK); + + if (vp == rootvp) { + struct mount *mp; + struct statfs *sp; + int rootblks; + + /* Get size from root FS (mountroot did statfs) */ + mp = rootvnode->v_mount; + sp = &mp->mnt_stat; + rootblks = sp->f_blocks * (sp->f_bsize / DEV_BSIZE); + if (rootblks > nblks) + panic("miniroot size"); + + if (extent_alloc_region(sdp->swd_ex, addr, rootblks, EX_WAITOK)) + panic("miniroot region"); + + printf("Preserved %d blocks of miniroot leaving %d pages of swap + \n", + rootblks, dtoc(size - rootblks)); + } + + swap_addmap(sdp, size); + nswapdev++; + nswap += nblks; + sdp->swd_flags |= SWF_ENABLE; + if (dumpdev == NULL && vp->v_type == VBLK) + dumpdev = dev; + + return (0); + +bad: + if (vp != rootvp) + (void)VOP_CLOSE(vp, FREAD|FWRITE, p->p_ucred, p); + return (error); +} + +#ifdef SWAP_OFF_WORKS +static int +swap_off(p, sdp) + struct proc *p; + struct swapdev *sdp; +{ + /* turn off the enable flag */ + sdp->swd_flags &= ~SWF_ENABLE; + +#ifdef SWAPDEBUG + if (vmswapdebug & VMSDB_SWOFF) + printf("swap_off: %x\n", sdp->swd_dev); +#endif /* SWAPDEBUG */ + /* - * The remainder must be sequential + * XXX write me + * + * the idea is to find out which processes are using this swap + * device, and page them all in. + * + * eventually, we should try to move them out to other swap areas + * if available. */ - for ( ; swp->sw_dev != NODEV; swp++) { - if ((swp->sw_flags & SW_SEQUENTIAL) == 0) - panic("binit: mis-ordered swap devices"); - nswdev++; - if (swp->sw_nblks > 0) { - if (swp->sw_nblks % dmmax) - swp->sw_nblks -= (swp->sw_nblks % dmmax); - nswap += swp->sw_nblks; + + /* until the above code is written, we must ENODEV */ + return ENODEV; + + free(sdp->swd_ex->ex_name, M_VMSWAP); + extent_free(swapmap, sdp->swd_mapoffset, sdp->swd_mapsize, EX_WAITOK); + nswap -= sdp->swd_nblks; + nswapdev--; + extent_destroy(sdp->swd_ex); + free((caddr_t)sdp->swd_ex, M_VMSWAP); + if (sdp->swp_vp != rootvp) + (void) VOP_CLOSE(sdp->swd_vp, FREAD|FWRITE, p->p_ucred, p); + if (sdp->swd_vp) + vrele(sdp->swd_vp); + free((caddr_t)sdp, M_VMSWAP); + return (0); +} +#endif + +/* + * to decide where to allocate what part of swap, we must "round robin" + * the swap devices in swap_priority of the same priority until they are + * full. we do this with a list of swap priorities that have circle + * queues of swapdevs. + * + * the following functions control allocation and freeing of part of the + * swap area. you call swap_alloc() with a size and it returns an address. + * later you call swap_free() and it frees the use of that swap area. + * + * daddr_t swap_alloc(int size); + * void swap_free(int size, daddr_t addr); + */ + +daddr_t +swap_alloc(size) + int size; +{ + struct swapdev *sdp; + struct swappri *spp; + u_long result; + + if (nswapdev < 1) + return 0; + + _swaplist_lock(); + for (spp = swap_priority.lh_first; spp != NULL; + spp = spp->spi_swappri.le_next) { + for (sdp = spp->spi_swapdev.cqh_first; + sdp != (void *)&spp->spi_swapdev; + sdp = sdp->swd_next.cqe_next) { + /* if it's not enabled, then we can't swap from it */ + if ((sdp->swd_flags & SWF_ENABLE) == 0 || + /* XXX IS THIS CORRECT ? */ +#if 1 + (sdp->swd_inuse + size > sdp->swd_nblks) || +#endif + extent_alloc(sdp->swd_ex, size, EX_NOALIGN, + EX_NOBOUNDARY, EX_MALLOCOK|EX_NOWAIT, + &result) != 0) { + continue; + } + CIRCLEQ_REMOVE(&spp->spi_swapdev, sdp, swd_next); + CIRCLEQ_INSERT_TAIL(&spp->spi_swapdev, sdp, swd_next); + sdp->swd_inuse += size; + _swaplist_unlock(); + return (daddr_t)(result + sdp->swd_mapoffset); } } - nswdev += niswdev; - nswap += niswap; -#else - nswdev = 0; - nswap = 0; - for (swp = swdevt; swp->sw_dev != NODEV || swp->sw_vp != NULL; swp++) { - nswdev++; - if (swp->sw_nblks > nswap) - nswap = swp->sw_nblks; - } - if (nswdev > 1) - nswap = ((nswap + dmmax - 1) / dmmax) * dmmax; - nswap *= nswdev; - if (nswdev != 0 && swdevt[0].sw_vp == NULL && - bdevvp(swdevt[0].sw_dev, &swdevt[0].sw_vp)) - panic("swapvp"); -#endif - if (nswap == 0) - printf("WARNING: no swap space found\n"); - else if ((error = swfree(p, 0)) == ENXIO) - printf("WARNING: primary swap device not configured\n"); - else if (error) { - printf("swfree errno %d\n", error); /* XXX */ - panic("swapinit swfree 0"); - } - - /* - * Now set up swap buffer headers. - */ - bswlist.b_actf = sp; - for (i = 0; i < nswbuf - 1; i++, sp++) { - sp->b_actf = sp + 1; - sp->b_rcred = sp->b_wcred = p->p_ucred; - sp->b_vnbufs.le_next = NOLIST; - } - sp->b_rcred = sp->b_wcred = p->p_ucred; - sp->b_vnbufs.le_next = NOLIST; - sp->b_actf = NULL; + _swaplist_unlock(); + return 0; } void -swstrategy(bp) - register struct buf *bp; +swap_free(size, addr) + int size; + daddr_t addr; { - int sz, off, seg, index; - register struct swdevt *sp; - struct vnode *vp; + struct swapdev *sdp = swap_getsdpfromaddr(addr); - sz = howmany(bp->b_bcount, DEV_BSIZE); - if (bp->b_blkno + sz > nswap) { - bp->b_error = EINVAL; - bp->b_flags |= B_ERROR; - biodone(bp); - return; - } - if (nswdev > 1) { -#ifdef SEQSWAP - if (bp->b_blkno < niswap) { - if (niswdev > 1) { - off = bp->b_blkno % dmmax; - if (off+sz > dmmax) { - bp->b_error = EINVAL; - bp->b_flags |= B_ERROR; - biodone(bp); - return; - } - seg = bp->b_blkno / dmmax; - index = seg % niswdev; - seg /= niswdev; - bp->b_blkno = seg*dmmax + off; - } else - index = 0; - } else { - register struct swdevt *swp; - - bp->b_blkno -= niswap; - for (index = niswdev, swp = &swdevt[niswdev]; - swp->sw_dev != NODEV; - swp++, index++) { - if (bp->b_blkno < swp->sw_nblks) - break; - bp->b_blkno -= swp->sw_nblks; - } - if (swp->sw_dev == NODEV || - bp->b_blkno+sz > swp->sw_nblks) { - bp->b_error = swp->sw_dev == NODEV ? - ENODEV : EINVAL; - bp->b_flags |= B_ERROR; - biodone(bp); - return; - } - } -#else - off = bp->b_blkno % dmmax; - if (off+sz > dmmax) { - bp->b_error = EINVAL; - bp->b_flags |= B_ERROR; - biodone(bp); - return; - } - seg = bp->b_blkno / dmmax; - index = seg % nswdev; - seg /= nswdev; - bp->b_blkno = seg*dmmax + off; +#ifdef DIAGNOSTIC + if (sdp == NULL) + panic("swap_free: unmapped address\n"); + if (nswapdev < 1) + panic("swap_free: nswapdev < 1\n"); #endif - } else - index = 0; - sp = &swdevt[index]; - if (sp->sw_vp == NULL) { - bp->b_error = ENODEV; - bp->b_flags |= B_ERROR; - biodone(bp); - return; - } - if ((bp->b_dev = sp->sw_dev) == NODEV && sp->sw_vp->v_type != VREG) - panic("swstrategy"); - VHOLD(sp->sw_vp); - if ((bp->b_flags & B_READ) == 0) { - if ((vp = bp->b_vp) != NULL) { - vp->v_numoutput--; - if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { - vp->v_flag &= ~VBWAIT; - wakeup((caddr_t)&vp->v_numoutput); + extent_free(sdp->swd_ex, addr - sdp->swd_mapoffset, size, + EX_MALLOCOK|EX_NOWAIT); + sdp->swd_inuse -= size; +#ifdef DIAGNOSTIC + if (sdp->swd_inuse < 0) + panic("swap_free: inuse < 0"); +#endif +} + +/* + * We have a physical -> virtual mapping to address here. There are several + * different physical address spaces (one for each swap partition) that are + * to be mapped onto a single virtual address space. + */ +#define ADDR_IN_MAP(addr, sdp) \ + (((addr) >= (sdp)->swd_mapoffset) && \ + ((addr) < ((sdp)->swd_mapoffset + (sdp)->swd_mapsize))) + +struct swapdev * +swap_getsdpfromaddr(addr) + daddr_t addr; +{ + struct swapdev *sdp; + struct swappri *spp; + + _swaplist_lock(); + for (spp = swap_priority.lh_first; spp != NULL; + spp = spp->spi_swappri.le_next) + for (sdp = spp->spi_swapdev.cqh_first; + sdp != (void *)&spp->spi_swapdev; + sdp = sdp->swd_next.cqe_next) + if (ADDR_IN_MAP(addr, sdp)) { + _swaplist_unlock(); + return sdp; } - } - sp->sw_vp->v_numoutput++; - } - if (bp->b_vp != NULL) - brelvp(bp); - bp->b_vp = sp->sw_vp; - VOP_STRATEGY(bp); + _swaplist_unlock(); + return NULL; +} + +void +swap_addmap(sdp, size) + struct swapdev *sdp; + int size; +{ + u_long result; + + if (extent_alloc(swapmap, size, EX_NOALIGN, EX_NOBOUNDARY, + EX_WAITOK, &result)) + panic("swap_addmap"); + + sdp->swd_mapoffset = result; + sdp->swd_mapsize = size; } /*ARGSUSED*/ @@ -276,211 +814,327 @@ swwrite(dev, uio, ioflag) return (physio(swstrategy, NULL, dev, B_WRITE, minphys, uio)); } -/* - * System call swapon(name) enables swapping on device name, - * which must be in the swdevsw. Return EBUSY - * if already swapping on this device. - */ -/* ARGSUSED */ -int -sys_swapon(p, v, retval) - struct proc *p; - void *v; - register_t *retval; +void +swstrategy(bp) + struct buf *bp; { - struct sys_swapon_args /* { - syscallarg(char *) name; - } */ *uap = v; - register struct vnode *vp; - register struct swdevt *sp; - dev_t dev; - int error; - struct nameidata nd; - - if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) - return (error); - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, name), p); - if ((error = namei(&nd)) != 0) - return (error); - vp = nd.ni_vp; - if (vp->v_type != VBLK) { - vrele(vp); - return (ENOTBLK); - } - dev = (dev_t)vp->v_rdev; - if (major(dev) >= nblkdev) { - vrele(vp); - return (ENXIO); - } - for (sp = &swdevt[0]; sp->sw_dev != NODEV; sp++) { - if (sp->sw_dev == dev) { - if (sp->sw_flags & SW_FREED) { - vrele(vp); - return (EBUSY); - } - sp->sw_vp = vp; - if ((error = swfree(p, sp - swdevt)) != 0) { - vrele(vp); - return (error); - } - return (0); - } -#ifdef SEQSWAP - /* - * If we have reached a non-freed sequential device without - * finding what we are looking for, it is an error. - * That is because all interleaved devices must come first - * and sequential devices must be freed in order. - */ - if ((sp->sw_flags & (SW_SEQUENTIAL|SW_FREED)) == SW_SEQUENTIAL) - break; -#endif - } - vrele(vp); - return (EINVAL); -} - -/* - * Swfree(index) frees the index'th portion of the swap map. - * Each of the nswdev devices provides 1/nswdev'th of the swap - * space, which is laid out with blocks of dmmax pages circularly - * among the devices. - */ -int -swfree(p, index) - struct proc *p; - int index; -{ - register struct swdevt *sp; - register swblk_t vsbase; - register long blk; + struct swapdev *sdp; struct vnode *vp; - register swblk_t dvbase; - register int nblks; - int error; + int bn; - sp = &swdevt[index]; - vp = sp->sw_vp; - /* If root on swap, then the skip open/close operations. */ - if (vp != rootvp) { - if ((error = VOP_OPEN(vp, FREAD|FWRITE, p->p_ucred, p)) != 0) - return (error); + bn = bp->b_blkno; + sdp = swap_getsdpfromaddr(bn); + if (sdp == NULL) { + bp->b_error = EINVAL; + bp->b_flags |= B_ERROR; + biodone(bp); + return; } - sp->sw_flags |= SW_FREED; - nblks = sp->sw_nblks; - /* - * Some devices may not exist til after boot time. - * If so, their nblk count will be 0. - */ - if (nblks <= 0) { - int perdev; - dev_t dev = sp->sw_dev; - if (bdevsw[major(dev)].d_psize == 0 || - (nblks = (*bdevsw[major(dev)].d_psize)(dev)) == -1) { - if (vp != rootvp) - (void) VOP_CLOSE(vp, FREAD|FWRITE, p->p_ucred, p); - sp->sw_flags &= ~SW_FREED; - return (ENXIO); + bn -= sdp->swd_mapoffset; + +#ifdef SWAPDEBUG + if (vmswapdebug & VMSDB_SWFLOW) + printf("swstrategy(%s): mapoff %x, bn %x, bcount %ld\n", + ((bp->b_flags & B_READ) == 0) ? "write" : "read", + sdp->swd_mapoffset, bn, bp->b_bcount); +#endif + + switch (sdp->swd_vp->v_type) { + default: + panic("swstrategy: vnode type %x", sdp->swd_vp->v_type); + case VBLK: + bp->b_blkno = bn; + vp = sdp->swd_vp; + bp->b_dev = sdp->swd_dev; + VHOLD(vp); + if ((bp->b_flags & B_READ) == 0) { + int s = splbio(); + vwakeup(bp); + vp->v_numoutput++; + splx(s); } -#ifdef SEQSWAP - if (index < niswdev) { - perdev = niswap / niswdev; - if (nblks > perdev) - nblks = perdev; - } else { - if (nblks % dmmax) - nblks -= (nblks % dmmax); - nswap += nblks; - } -#else - perdev = nswap / nswdev; - if (nblks > perdev) - nblks = perdev; -#endif - sp->sw_nblks = nblks; - } - if (nblks == 0) { - if (vp != rootvp) - (void) VOP_CLOSE(vp, FREAD|FWRITE, p->p_ucred, p); - sp->sw_flags &= ~SW_FREED; - return (0); /* XXX error? */ - } -#ifdef SEQSWAP - if (sp->sw_flags & SW_SEQUENTIAL) { - register struct swdevt *swp; - blk = niswap; - for (swp = &swdevt[niswdev]; swp != sp; swp++) - blk += swp->sw_nblks; - rmfree(swapmap, nblks, blk); - return (0); - } -#endif - for (dvbase = 0; dvbase < nblks; dvbase += dmmax) { - blk = nblks - dvbase; -#ifdef SEQSWAP - if ((vsbase = index*dmmax + dvbase*niswdev) >= niswap) - panic("swfree"); -#else - if ((vsbase = index*dmmax + dvbase*nswdev) >= nswap) - panic("swfree"); -#endif - if (blk > dmmax) - blk = dmmax; - if (vsbase == 0) { - /* - * First of all chunks... initialize the swapmap. - * Don't use the first cluster of the device - * in case it starts with a label or boot block. - */ - rminit(swapmap, blk - ctod(CLSIZE), - vsbase + ctod(CLSIZE), "swap", nswapmap); - } else if (dvbase == 0) { - /* - * Don't use the first cluster of the device - * in case it starts with a label or boot block. - */ - rmfree(swapmap, blk - ctod(CLSIZE), - vsbase + ctod(CLSIZE)); - } else - rmfree(swapmap, blk, vsbase); - } + if (bp->b_vp != NULL) + brelvp(bp); - /* - * Preserve the mini-root if appropriate: - * Note: this requires !SEQSWAP && nswdev==1 - * - * A mini-root gets copied into the front of the swap - * and we run over top of the swap area just long - * enough for us to do a mkfs and restor of the real - * root (sure beats rewriting standalone restor). - */ - if (vp == rootvp) { -#ifndef MINIROOTSIZE - struct mount *mp; - struct statfs *sp; + bp->b_vp = vp; + VOP_STRATEGY(bp); + return; +#ifdef SWAP_TO_FILES + case VREG: + sw_reg_strategy(sdp, bp, bn); + return; #endif - long firstblk; - int rootblks; - -#ifdef MINIROOTSIZE - rootblks = MINIROOTSIZE; -#else - /* Get size from root FS (mountroot did statfs) */ - mp = rootvnode->v_mount; - sp = &mp->mnt_stat; - rootblks = sp->f_blocks * (sp->f_bsize / DEV_BSIZE); -#endif - if (rootblks > nblks) - panic("swfree miniroot size"); - /* First ctod(CLSIZE) blocks are not in the map. */ - firstblk = rmalloc(swapmap, rootblks - ctod(CLSIZE)); - if (firstblk != ctod(CLSIZE)) - panic("swfree miniroot save"); - printf("Preserved %d blocks of miniroot leaving %d pages of swap\n", - rootblks, dtoc(nblks - rootblks)); } - - return (0); + /* NOTREACHED */ +} + +#ifdef SWAP_TO_FILES +int doswvnlock = 0; + +static void +sw_reg_strategy(sdp, bp, bn) + struct swapdev *sdp; + struct buf *bp; + int bn; +{ + struct vnode *vp; + struct vndbuf *nbp; + struct vndxfer *vnx; + daddr_t nbn; + caddr_t addr; + int s, off, nra, error, sz, resid; + + /* + * Translate the device logical block numbers into physical + * block numbers of the underlying filesystem device. + */ + bp->b_resid = bp->b_bcount; + addr = bp->b_data; + bn = dbtob(bn); + + /* Allocate a header for this transfer and link it to the buffer */ + vnx = getvndxfer(); + vnx->vx_error = 0; + vnx->vx_pending = 0; + vnx->vx_bp = bp; + vnx->vx_sdp = sdp; + + for (resid = bp->b_resid; resid; resid -= sz) { + if (doswvnlock) VOP_LOCK(sdp->swd_vp); + nra = 0; + error = VOP_BMAP(sdp->swd_vp, bn / sdp->swd_bsize, + &vp, &nbn, &nra); + if (doswvnlock) VOP_UNLOCK(sdp->swd_vp); + + if (error == 0 && (long)nbn == -1) + error = EIO; + + /* + * If there was an error or a hole in the file...punt. + * Note that we may have to wait for any operations + * that we have already fired off before releasing + * the buffer. + * + * XXX we could deal with holes here but it would be + * a hassle (in the write case). + */ + if (error) { + vnx->vx_error = error; + s = splbio(); + if (vnx->vx_pending == 0) { + bp->b_error = error; + bp->b_flags |= B_ERROR; + putvndxfer(vnx); + biodone(bp); + } + splx(s); + return; + } + + if ((off = bn % sdp->swd_bsize) != 0) + sz = sdp->swd_bsize - off; + else + sz = (1 + nra) * sdp->swd_bsize; + + if (resid < sz) + sz = resid; + +#ifdef SWAPDEBUG + if (vmswapdebug & VMSDB_SWFLOW) + printf("sw_reg_strategy: vp %p/%p bn 0x%x/0x%x" + " sz 0x%x\n", sdp->swd_vp, vp, bn, nbn, sz); +#endif /* SWAPDEBUG */ + + nbp = getvndbuf(); + nbp->vb_buf.b_flags = bp->b_flags | B_CALL; + nbp->vb_buf.b_bcount = sz; + nbp->vb_buf.b_bufsize = bp->b_bufsize; + nbp->vb_buf.b_error = 0; + nbp->vb_buf.b_dev = vp->v_type == VREG + ? NODEV : vp->v_rdev; + nbp->vb_buf.b_data = addr; + nbp->vb_buf.b_blkno = nbn + btodb(off); + nbp->vb_buf.b_proc = bp->b_proc; + nbp->vb_buf.b_iodone = sw_reg_iodone; + nbp->vb_buf.b_vp = NULLVP; + nbp->vb_buf.b_rcred = sdp->swd_cred; + nbp->vb_buf.b_wcred = sdp->swd_cred; + if (bp->b_dirtyend == 0) { + nbp->vb_buf.b_dirtyoff = 0; + nbp->vb_buf.b_dirtyend = sz; + } else { + nbp->vb_buf.b_dirtyoff = + max(0, bp->b_dirtyoff - (bp->b_bcount-resid)); + nbp->vb_buf.b_dirtyend = + min(sz, + max(0, bp->b_dirtyend - (bp->b_bcount-resid))); + } + if (bp->b_validend == 0) { + nbp->vb_buf.b_validoff = 0; + nbp->vb_buf.b_validend = sz; + } else { + nbp->vb_buf.b_validoff = + max(0, bp->b_validoff - (bp->b_bcount-resid)); + nbp->vb_buf.b_validend = + min(sz, + max(0, bp->b_validend - (bp->b_bcount-resid))); + } + + nbp->vb_xfer = vnx; + + /* + * Just sort by block number + */ + nbp->vb_buf.b_cylinder = nbp->vb_buf.b_blkno; + s = splbio(); + vnx->vx_pending++; + bgetvp(vp, &nbp->vb_buf); + disksort(&sdp->swd_tab, &nbp->vb_buf); + if (sdp->swd_tab.b_active < sdp->swd_maxactive) { + sdp->swd_tab.b_active++; + sw_reg_start(sdp); + } + splx(s); + + bn += sz; + addr += sz; + } +} + +/* + * Feed requests sequentially. + * We do it this way to keep from flooding NFS servers if we are connected + * to an NFS file. This places the burden on the client rather than the + * server. + */ +static void +sw_reg_start(sdp) + struct swapdev *sdp; +{ + struct buf *bp; + + bp = sdp->swd_tab.b_actf; + sdp->swd_tab.b_actf = bp->b_actf; + +#ifdef SWAPDEBUG + if (vmswapdebug & VMSDB_SWFLOW) + printf("sw_reg_start: bp %p vp %p blkno %x addr %p cnt %lx\n", + bp, bp->b_vp, bp->b_blkno,bp->b_data, bp->b_bcount); +#endif + if ((bp->b_flags & B_READ) == 0) + bp->b_vp->v_numoutput++; + VOP_STRATEGY(bp); +} + +static void +sw_reg_iodone(bp) + struct buf *bp; +{ + register struct vndbuf *vbp = (struct vndbuf *) bp; + register struct vndxfer *vnx = (struct vndxfer *)vbp->vb_xfer; + register struct buf *pbp = vnx->vx_bp; + struct swapdev *sdp = vnx->vx_sdp; + int s, resid; + +#ifdef SWAPDEBUG + if (vmswapdebug & VMSDB_SWFLOW) + printf("sw_reg_iodone: vbp %p vp %p blkno %x addr %p " + "cnt %lx(%lx)\n", + vbp, vbp->vb_buf.b_vp, vbp->vb_buf.b_blkno, + vbp->vb_buf.b_data, vbp->vb_buf.b_bcount, + vbp->vb_buf.b_resid); +#endif /* SWAPDEBUG */ + + s = splbio(); + resid = vbp->vb_buf.b_bcount - vbp->vb_buf.b_resid; + pbp->b_resid -= resid; + vnx->vx_pending--; + + if (vbp->vb_buf.b_error) { +#ifdef SWAPDEBUG + if (vmswapdebug & VMSDB_SWFLOW) + printf("sw_reg_iodone: vbp %p error %d\n", vbp, + vbp->vb_buf.b_error); +#endif /* SWAPDEBUG */ + + vnx->vx_error = vbp->vb_buf.b_error; + } + + if (vbp->vb_buf.b_vp != NULLVP) + brelvp(&vbp->vb_buf); + + putvndbuf(vbp); + + /* + * Wrap up this transaction if it has run to completion or, in + * case of an error, when all auxiliary buffers have returned. + */ + if (pbp->b_resid == 0 || (vnx->vx_error && vnx->vx_pending == 0)) { + + if (vnx->vx_error != 0) { + pbp->b_flags |= B_ERROR; + pbp->b_error = vnx->vx_error; + } + putvndxfer(vnx); +#ifdef SWAPDEBUG + if (vmswapdebug & VMSDB_SWFLOW) + printf("swiodone: pbp %p iodone\n", pbp); +#endif + biodone(pbp); + } + + if (sdp->swd_tab.b_actf) + sw_reg_start(sdp); + else + sdp->swd_tab.b_active--; + + splx(s); +} +#endif /* SWAP_TO_FILES */ + +void +swapinit() +{ + struct buf *sp = swbuf; + struct proc *p = &proc0; /* XXX */ + int i; + +#ifdef SWAPDEBUG + if (vmswapdebug & VMSDB_SWINIT) + printf("swapinit\n"); +#endif + nswap = 0; + nswapdev = 0; + + LIST_INIT(&swap_priority); + + /* + * Create swap block resource map. The range [1..INT_MAX] allows + * for a grand total of 2 gigablocks of swap resource. + * (start at 1 because "block #0" will be interpreted as + * an allocation failure). + */ + swapmap = extent_create("swapmap", 1, INT_MAX, + M_VMSWAP, 0, 0, EX_WAITOK); + if (swapmap == 0) + panic("swapinit: extent_create failed"); + + /* + * Now set up swap buffer headers. + */ + bswlist.b_actf = sp; + for (i = 0; i < nswbuf - 1; i++, sp++) { + sp->b_actf = sp + 1; + sp->b_rcred = sp->b_wcred = p->p_ucred; + sp->b_vnbufs.le_next = NOLIST; + } + sp->b_rcred = sp->b_wcred = p->p_ucred; + sp->b_vnbufs.le_next = NOLIST; + sp->b_actf = NULL; +#ifdef SWAPDEBUG + if (vmswapdebug & VMSDB_SWINIT) + printf("leaving swapinit\n"); +#endif } diff --git a/sys/vm/vm_swap.h b/sys/vm/vm_swap.h new file mode 100644 index 000000000000..80b9894247f2 --- /dev/null +++ b/sys/vm/vm_swap.h @@ -0,0 +1,63 @@ +/* $NetBSD: vm_swap.h,v 1.2 1997/06/12 14:51:25 mrg Exp $ */ + +/* + * Copyright (c) 1995, 1996 Matthew R. Green + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Matthew R. Green for + * The NetBSD Foundation. + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _VM_VM_SWAP_H_ +#define _VM_VM_SWAP_H_ + +/* These structures are used to return swap information for userland */ +struct swapent { + dev_t se_dev; + int se_flags; + int se_nblks; + int se_inuse; + int se_priority; +}; + +#define SWAP_ON 1 +#define SWAP_OFF 2 +#define SWAP_NSWAP 3 +#define SWAP_STATS 4 +#define SWAP_CTL 5 + +#define SWF_INUSE 0x00000001 +#define SWF_ENABLE 0x00000002 + +#ifdef _KERNEL +int sys_swapctl __P((struct proc *, void *, register_t *)); +daddr_t swap_alloc __P((int size)); +void swap_free __P((int size, daddr_t addr)); +void swapinit __P((void)); +#endif + +#endif /* _VM_VM_SWAP_H_ */