From b397c875ae91c0eea34205c51de1153a7d8510b6 Mon Sep 17 00:00:00 2001 From: perseant Date: Mon, 17 Feb 2003 23:48:08 +0000 Subject: [PATCH] Add code to UBCify LFS. This is still behind "#ifdef LFS_UBC" for now (there are still some details to work out) but expect that to go away soon. To support these basic changes (creation of lfs_putpages, lfs_gop_write, mods to lfs_balloc) several other changes were made, to wit: * Create a writer daemon kernel thread whose purpose is to handle page writes for the pagedaemon, but which also takes over some of the functions of lfs_check(). This thread is started the first time an LFS is mounted. * Add a "flags" parameter to GOP_SIZE. Current values are GOP_SIZE_READ, meaning that the call should return the size of the in-core version of the file, and GOP_SIZE_WRITE, meaning that it should return the on-disk size. One of GOP_SIZE_READ or GOP_SIZE_WRITE must be specified. * Instead of using malloc(...M_WAITOK) for everything, reserve enough resources to get by and use malloc(...M_NOWAIT), using the reserves if necessary. Use the pool subsystem for structures small enough that this is feasible. This also obsoletes LFS_THROTTLE. And a few that are not strictly necessary: * Moves the LFS inode extensions off onto a separately allocated structure; getting closer to LFS as an LKM. "Welcome to 1.6O." * Unified GOP_ALLOC between FFS and LFS. * Update LFS copyright headers to correct values. * Actually cast to unsigned in lfs_shellsort, like the comment says. * Keep track of which segments were empty before the previous checkpoint; any segments that pass two checkpoints both dirty and empty can be summarily cleaned. Do this. Right now lfs_segclean still works, but this should be turned into an effectless compatibility syscall. --- libexec/lfs_cleanerd/print.c | 6 +- sbin/fsck_lfs/inode.c | 10 +- sbin/fsck_lfs/pass0.c | 10 +- sys/conf/osrelease.sh | 4 +- sys/miscfs/genfs/genfs_node.h | 14 +- sys/miscfs/genfs/genfs_vnops.c | 19 +- sys/nfs/nfs_node.c | 11 +- sys/sys/param.h | 4 +- sys/ufs/ffs/ffs_extern.h | 4 +- sys/ufs/ffs/ffs_vfsops.c | 6 +- sys/ufs/ffs/ffs_vnops.c | 10 +- sys/ufs/lfs/TODO | 23 +- sys/ufs/lfs/lfs.h | 122 +++++- sys/ufs/lfs/lfs_alloc.c | 75 +--- sys/ufs/lfs/lfs_balloc.c | 120 ++--- sys/ufs/lfs/lfs_bio.c | 128 +++--- sys/ufs/lfs/lfs_cksum.c | 6 +- sys/ufs/lfs/lfs_debug.c | 8 +- sys/ufs/lfs/lfs_extern.h | 37 +- sys/ufs/lfs/lfs_inode.c | 108 +++-- sys/ufs/lfs/lfs_segment.c | 726 +++++++++++++++++++------------ sys/ufs/lfs/lfs_subr.c | 331 ++++++++++---- sys/ufs/lfs/lfs_syscalls.c | 74 ++-- sys/ufs/lfs/lfs_vfsops.c | 434 +++++++++++++++++- sys/ufs/lfs/lfs_vnops.c | 774 +++++++++++++++++++++++++++++++-- sys/ufs/ufs/inode.h | 14 +- sys/ufs/ufs/ufs_extern.h | 3 +- sys/ufs/ufs/ufs_inode.c | 8 +- sys/ufs/ufs/ufs_readwrite.c | 21 +- sys/ufs/ufs/ufs_vnops.c | 52 ++- sys/uvm/uvm_page.c | 6 +- sys/uvm/uvm_pager.h | 3 +- usr.sbin/dumplfs/dumplfs.c | 6 +- usr.sbin/quotaon/quotaon.c | 7 +- 34 files changed, 2401 insertions(+), 783 deletions(-) diff --git a/libexec/lfs_cleanerd/print.c b/libexec/lfs_cleanerd/print.c index 60f691707abd..9097e83975c4 100644 --- a/libexec/lfs_cleanerd/print.c +++ b/libexec/lfs_cleanerd/print.c @@ -1,4 +1,4 @@ -/* $NetBSD: print.c,v 1.13 2003/01/24 21:55:05 fvdl Exp $ */ +/* $NetBSD: print.c,v 1.14 2003/02/17 23:48:08 perseant Exp $ */ /*- * Copyright (c) 1992, 1993 @@ -38,7 +38,7 @@ #if 0 static char sccsid[] = "from: @(#)print.c 8.1 (Berkeley) 6/4/93"; #else -__RCSID("$NetBSD: print.c,v 1.13 2003/01/24 21:55:05 fvdl Exp $"); +__RCSID("$NetBSD: print.c,v 1.14 2003/02/17 23:48:08 perseant Exp $"); #endif #endif /* not lint */ @@ -261,7 +261,7 @@ dump_super(struct lfs *lfsp) syslog(LOG_DEBUG, "Checkpoint Info\n"); syslog(LOG_DEBUG, "%s%d\t%s0x%X\t%s%d\n", - "free ", lfsp->lfs_free, + "freehd ", lfsp->lfs_freehd, "idaddr ", lfsp->lfs_idaddr, "ifile ", lfsp->lfs_ifile); syslog(LOG_DEBUG, "%s%d\t%s%d\t%s%d\n", diff --git a/sbin/fsck_lfs/inode.c b/sbin/fsck_lfs/inode.c index 6bdd1253fe9f..32572a5bc51f 100644 --- a/sbin/fsck_lfs/inode.c +++ b/sbin/fsck_lfs/inode.c @@ -1,4 +1,4 @@ -/* $NetBSD: inode.c,v 1.14 2003/01/24 21:55:10 fvdl Exp $ */ +/* $NetBSD: inode.c,v 1.15 2003/02/17 23:48:09 perseant Exp $ */ /* * Copyright (c) 1997, 1998 @@ -348,8 +348,8 @@ lfs_ginode(ino_t inumber) if (reply("free")) { ifp = lfs_ientry(inumber, &bp); ifp->if_daddr = LFS_UNUSED_DADDR; - ifp->if_nextfree = sblock.lfs_free; - sblock.lfs_free = inumber; + ifp->if_nextfree = sblock.lfs_freehd; + sblock.lfs_freehd = inumber; sbdirty(); dirty(bp); bp->b_flags &= ~B_INUSE; @@ -700,8 +700,8 @@ clri(struct inodesc *idesc, char *type, int flag) ifp = lfs_ientry(idesc->id_number, &bp); ifp->if_daddr = LFS_UNUSED_DADDR; - ifp->if_nextfree = sblock.lfs_free; - sblock.lfs_free = idesc->id_number; + ifp->if_nextfree = sblock.lfs_freehd; + sblock.lfs_freehd = idesc->id_number; sbdirty(); dirty(bp); bp->b_flags &= ~B_INUSE; diff --git a/sbin/fsck_lfs/pass0.c b/sbin/fsck_lfs/pass0.c index 30913dfedd0f..d4d7d1661853 100644 --- a/sbin/fsck_lfs/pass0.c +++ b/sbin/fsck_lfs/pass0.c @@ -1,4 +1,4 @@ -/* $NetBSD: pass0.c,v 1.12 2003/01/24 21:55:10 fvdl Exp $ */ +/* $NetBSD: pass0.c,v 1.13 2003/02/17 23:48:09 perseant Exp $ */ /* * Copyright (c) 1998 Konrad E. Schroder. @@ -86,7 +86,7 @@ pass0() memset(visited, 0, maxino * sizeof(ino_t)); plastino = 0; - ino = sblock.lfs_free; + ino = sblock.lfs_freehd; while (ino) { if (ino >= maxino) { printf("! Ino %d out of range (last was %d)\n", ino, @@ -115,7 +115,7 @@ pass0() ino, (long long)daddr); if (preen || reply("FIX") == 1) { if (plastino == 0) { - sblock.lfs_free = nextino; + sblock.lfs_freehd = nextino; sbdirty(); } else { ifp = lfs_ientry(plastino, &bp); @@ -145,8 +145,8 @@ pass0() pwarn("! Ino %d free, but not on the free list\n", ino); if (preen || reply("FIX") == 1) { - ifp->if_nextfree = sblock.lfs_free; - sblock.lfs_free = ino; + ifp->if_nextfree = sblock.lfs_freehd; + sblock.lfs_freehd = ino; sbdirty(); dirty(bp); } diff --git a/sys/conf/osrelease.sh b/sys/conf/osrelease.sh index 31bfb77fc870..fc247da50614 100644 --- a/sys/conf/osrelease.sh +++ b/sys/conf/osrelease.sh @@ -1,6 +1,6 @@ #!/bin/sh # -# $NetBSD: osrelease.sh,v 1.90 2003/02/01 06:26:30 thorpej Exp $ +# $NetBSD: osrelease.sh,v 1.91 2003/02/17 23:48:09 perseant Exp $ # # Copyright (c) 1997 The NetBSD Foundation, Inc. # All rights reserved. @@ -42,7 +42,7 @@ # sys/sys/param.h: __NetBSD_Version__ # share/tmac/doc-common: ds oS # -release=1.6N +release=1.6O case $1 in -s) diff --git a/sys/miscfs/genfs/genfs_node.h b/sys/miscfs/genfs/genfs_node.h index da1f1afca5e6..19d05c17fa16 100644 --- a/sys/miscfs/genfs/genfs_node.h +++ b/sys/miscfs/genfs/genfs_node.h @@ -1,4 +1,4 @@ -/* $NetBSD: genfs_node.h,v 1.3 2001/12/18 07:49:36 chs Exp $ */ +/* $NetBSD: genfs_node.h,v 1.4 2003/02/17 23:48:10 perseant Exp $ */ /* * Copyright (c) 2001 Chuck Silvers. @@ -36,18 +36,22 @@ struct vm_page; struct genfs_ops { - void (*gop_size)(struct vnode *, off_t, off_t *); + void (*gop_size)(struct vnode *, off_t, off_t *, int); int (*gop_alloc)(struct vnode *, off_t, off_t, int, struct ucred *); int (*gop_write)(struct vnode *, struct vm_page **, int, int); }; -#define GOP_SIZE(vp, size, eobp) \ - (*VTOG(vp)->g_op->gop_size)((vp), (size), (eobp)) +#define GOP_SIZE(vp, size, eobp, flags) \ + (*VTOG(vp)->g_op->gop_size)((vp), (size), (eobp), (flags)) #define GOP_ALLOC(vp, off, len, flags, cred) \ (*VTOG(vp)->g_op->gop_alloc)((vp), (off), (len), (flags), (cred)) #define GOP_WRITE(vp, pgs, npages, flags) \ (*VTOG(vp)->g_op->gop_write)((vp), (pgs), (npages), (flags)) +/* Flags to GOP_SIZE */ +#define GOP_SIZE_READ 0x1 /* Advise how many pages to read/create */ +#define GOP_SIZE_WRITE 0x2 /* Tell how many pages to write */ + struct genfs_node { struct genfs_ops *g_op; /* ops vector */ struct lock g_glock; /* getpages lock */ @@ -55,7 +59,7 @@ struct genfs_node { #define VTOG(vp) ((struct genfs_node *)(vp)->v_data) -void genfs_size(struct vnode *, off_t, off_t *); +void genfs_size(struct vnode *, off_t, off_t *, int); void genfs_node_init(struct vnode *, struct genfs_ops *); int genfs_gop_write(struct vnode *, struct vm_page **, int, int); int genfs_compat_gop_write(struct vnode *, struct vm_page **, int, int); diff --git a/sys/miscfs/genfs/genfs_vnops.c b/sys/miscfs/genfs/genfs_vnops.c index 3b62dbe01169..baf3220869c3 100644 --- a/sys/miscfs/genfs/genfs_vnops.c +++ b/sys/miscfs/genfs/genfs_vnops.c @@ -1,4 +1,4 @@ -/* $NetBSD: genfs_vnops.c,v 1.71 2003/02/05 21:38:42 pk Exp $ */ +/* $NetBSD: genfs_vnops.c,v 1.72 2003/02/17 23:48:11 perseant Exp $ */ /* * Copyright (c) 1982, 1986, 1989, 1993 @@ -35,7 +35,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: genfs_vnops.c,v 1.71 2003/02/05 21:38:42 pk Exp $"); +__KERNEL_RCSID(0, "$NetBSD: genfs_vnops.c,v 1.72 2003/02/17 23:48:11 perseant Exp $"); #include "opt_nfsserver.h" @@ -495,11 +495,11 @@ genfs_getpages(void *v) error = 0; origoffset = ap->a_offset; orignpages = *ap->a_count; - GOP_SIZE(vp, vp->v_size, &diskeof); + GOP_SIZE(vp, vp->v_size, &diskeof, GOP_SIZE_READ); if (flags & PGO_PASTEOF) { newsize = MAX(vp->v_size, origoffset + (orignpages << PAGE_SHIFT)); - GOP_SIZE(vp, newsize, &memeof); + GOP_SIZE(vp, newsize, &memeof, GOP_SIZE_READ); } else { memeof = diskeof; } @@ -1139,8 +1139,13 @@ genfs_putpages(void *v) yield = (l->l_cpu->ci_schedstate.spc_flags & SPCF_SHOULDYIELD) && !pagedaemon; if (pg->flags & PG_BUSY || yield) { - KASSERT(!pagedaemon); UVMHIST_LOG(ubchist, "busy %p", pg,0,0,0); + if (flags & PGO_BUSYFAIL && pg->flags & PG_BUSY) { + UVMHIST_LOG(ubchist, "busyfail %p", pg, 0,0,0); + error = EDEADLK; + break; + } + KASSERT(!pagedaemon); if (by_list) { TAILQ_INSERT_BEFORE(pg, &curmp, listq); UVMHIST_LOG(ubchist, "curmp next %p", @@ -1381,7 +1386,7 @@ genfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages, int flags) UVMHIST_LOG(ubchist, "vp %p pgs %p npages %d flags 0x%x", vp, pgs, npages, flags); - GOP_SIZE(vp, vp->v_size, &eof); + GOP_SIZE(vp, vp->v_size, &eof, GOP_SIZE_WRITE); if (vp->v_type == VREG) { fs_bshift = vp->v_mount->mnt_fs_bshift; dev_bshift = vp->v_mount->mnt_dev_bshift; @@ -1523,7 +1528,7 @@ genfs_node_init(struct vnode *vp, struct genfs_ops *ops) } void -genfs_size(struct vnode *vp, off_t size, off_t *eobp) +genfs_size(struct vnode *vp, off_t size, off_t *eobp, int flags) { int bsize; diff --git a/sys/nfs/nfs_node.c b/sys/nfs/nfs_node.c index a78de26dfb4c..1a93a45602ef 100644 --- a/sys/nfs/nfs_node.c +++ b/sys/nfs/nfs_node.c @@ -1,4 +1,4 @@ -/* $NetBSD: nfs_node.c,v 1.60 2003/02/15 18:00:25 drochner Exp $ */ +/* $NetBSD: nfs_node.c,v 1.61 2003/02/17 23:48:12 perseant Exp $ */ /* * Copyright (c) 1989, 1993 @@ -39,7 +39,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: nfs_node.c,v 1.60 2003/02/15 18:00:25 drochner Exp $"); +__KERNEL_RCSID(0, "$NetBSD: nfs_node.c,v 1.61 2003/02/17 23:48:12 perseant Exp $"); #include "opt_nfs.h" @@ -80,7 +80,7 @@ extern int prtactive; #define nfs_hash(x,y) hash32_buf((x), (y), HASH32_BUF_INIT) -void nfs_gop_size(struct vnode *, off_t, off_t *); +void nfs_gop_size(struct vnode *, off_t, off_t *, int); int nfs_gop_alloc(struct vnode *, off_t, off_t, int, struct ucred *); int nfs_gop_write(struct vnode *, struct vm_page **, int, int); @@ -315,8 +315,11 @@ nfs_reclaim(v) } void -nfs_gop_size(struct vnode *vp, off_t size, off_t *eobp) +nfs_gop_size(struct vnode *vp, off_t size, off_t *eobp, int flags) { + KASSERT(flags & (GOP_SIZE_READ | GOP_SIZE_WRITE)); + KASSERT((flags & (GOP_SIZE_READ | GOP_SIZE_WRITE)) + != (GOP_SIZE_READ | GOP_SIZE_WRITE)); *eobp = MAX(size, vp->v_size); } diff --git a/sys/sys/param.h b/sys/sys/param.h index a659e5cc7948..808916eb4df4 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -1,4 +1,4 @@ -/* $NetBSD: param.h,v 1.159 2003/02/01 06:26:30 thorpej Exp $ */ +/* $NetBSD: param.h,v 1.160 2003/02/17 23:48:13 perseant Exp $ */ /*- * Copyright (c) 1982, 1986, 1989, 1993 @@ -67,7 +67,7 @@ * Don't forget to change conf/osrelease.sh too. */ -#define __NetBSD_Version__ 106140000 /* NetBSD 1.6N */ +#define __NetBSD_Version__ 106150000 /* NetBSD 1.6O */ /* * Historical NetBSD #define diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h index d3f0722fb020..4cf6f99967ec 100644 --- a/sys/ufs/ffs/ffs_extern.h +++ b/sys/ufs/ffs/ffs_extern.h @@ -1,4 +1,4 @@ -/* $NetBSD: ffs_extern.h,v 1.25 2003/01/24 21:55:22 fvdl Exp $ */ +/* $NetBSD: ffs_extern.h,v 1.26 2003/02/17 23:48:14 perseant Exp $ */ /*- * Copyright (c) 1991, 1993, 1994 @@ -151,7 +151,7 @@ int ffs_fsync __P((void *)); int ffs_reclaim __P((void *)); int ffs_getpages __P((void *)); int ffs_putpages __P((void *)); -void ffs_gop_size __P((struct vnode *, off_t, off_t *)); +void ffs_gop_size __P((struct vnode *, off_t, off_t *, int)); __END_DECLS diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c index c29089ef5428..e0a368eca950 100644 --- a/sys/ufs/ffs/ffs_vfsops.c +++ b/sys/ufs/ffs/ffs_vfsops.c @@ -1,4 +1,4 @@ -/* $NetBSD: ffs_vfsops.c,v 1.106 2003/01/24 21:55:23 fvdl Exp $ */ +/* $NetBSD: ffs_vfsops.c,v 1.107 2003/02/17 23:48:14 perseant Exp $ */ /* * Copyright (c) 1989, 1991, 1993, 1994 @@ -36,7 +36,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.106 2003/01/24 21:55:23 fvdl Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.107 2003/02/17 23:48:14 perseant Exp $"); #if defined(_KERNEL_OPT) #include "opt_ffs.h" @@ -117,7 +117,7 @@ struct vfsops ffs_vfsops = { struct genfs_ops ffs_genfsops = { ffs_gop_size, - ffs_gop_alloc, + ufs_gop_alloc, genfs_gop_write, }; diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c index 74b4a5d7995a..e845ed874cdd 100644 --- a/sys/ufs/ffs/ffs_vnops.c +++ b/sys/ufs/ffs/ffs_vnops.c @@ -1,4 +1,4 @@ -/* $NetBSD: ffs_vnops.c,v 1.54 2003/02/05 21:38:44 pk Exp $ */ +/* $NetBSD: ffs_vnops.c,v 1.55 2003/02/17 23:48:15 perseant Exp $ */ /* * Copyright (c) 1982, 1986, 1989, 1993 @@ -36,7 +36,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ffs_vnops.c,v 1.54 2003/02/05 21:38:44 pk Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ffs_vnops.c,v 1.55 2003/02/17 23:48:15 perseant Exp $"); #include #include @@ -567,12 +567,16 @@ ffs_putpages(void *v) */ void -ffs_gop_size(struct vnode *vp, off_t size, off_t *eobp) +ffs_gop_size(struct vnode *vp, off_t size, off_t *eobp, int flags) { struct inode *ip = VTOI(vp); struct fs *fs = ip->i_fs; daddr_t olbn, nlbn; + KASSERT(flags & (GOP_SIZE_READ | GOP_SIZE_WRITE)); + KASSERT((flags & (GOP_SIZE_READ | GOP_SIZE_WRITE)) + != (GOP_SIZE_READ | GOP_SIZE_WRITE)); + olbn = lblkno(fs, ip->i_ffs_size); nlbn = lblkno(fs, size); if (nlbn < NDADDR && olbn <= nlbn) { diff --git a/sys/ufs/lfs/TODO b/sys/ufs/lfs/TODO index c60e2b69c4a7..b968cd5834f9 100644 --- a/sys/ufs/lfs/TODO +++ b/sys/ufs/lfs/TODO @@ -1,4 +1,19 @@ -# $NetBSD: TODO,v 1.5 2001/07/13 20:30:22 perseant Exp $ +# $NetBSD: TODO,v 1.6 2003/02/17 23:48:16 perseant Exp $ + +- Lock audit. Need to check locking for multiprocessor case in particular. + +- Get rid of the syscalls: make them into ioctl calls instead. This would + allow LFS to be loaded as a module. We would then ideally have an + in-kernel cleaner that runs if no userland cleaner has asserted itself. + +- Get rid of lfs_segclean(); the kernel should clean a dirty segment IFF it + has passed two checkpoints containing zero live bytes. + +- Now that our cache is basically all of physical memory, we need to make + sure that segwrite is not starving other important things. Need a way + to prioritize which blocks are most important to write, and write only + those before giving up the seglock to do the rest. How does this change + our notion of what a checkpoint is? - Investigate alternate inode locking strategy: Inode locks are useful for locking against simultaneous changes to inode size (balloc, @@ -11,12 +26,6 @@ - Fully working fsck_lfs. (Really, need a general-purpose external partial-segment writer.) -- Inode blocks are currently the same size as the fs block size; but all - the ones I've seen are mostly empty, and this will be especially true - if atime information is kept in the ifile instead of the inode. Could - we shrink the inode block size to DEV_BSIZE? Or parametrize it at fs - creation time? - - Get rid of DEV_BSIZE, pay attention to the media block size at mount time. - More fs ops need to call lfs_imtime. Which ones? (Blackwell et al., 1995) diff --git a/sys/ufs/lfs/lfs.h b/sys/ufs/lfs/lfs.h index 989773499039..94fe31443506 100644 --- a/sys/ufs/lfs/lfs.h +++ b/sys/ufs/lfs/lfs.h @@ -1,7 +1,7 @@ -/* $NetBSD: lfs.h,v 1.45 2003/01/29 13:14:33 yamt Exp $ */ +/* $NetBSD: lfs.h,v 1.46 2003/02/17 23:48:16 perseant Exp $ */ /*- - * Copyright (c) 1999, 2000 The NetBSD Foundation, Inc. + * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -95,11 +95,44 @@ #define BW_CLEAN 1 #define MIN_FREE_SEGS 2 #define LFS_MAX_ACTIVE 10 -#define LFS_MAXDIROP (desiredvnodes >> 2) #ifndef LFS_ATIME_IFILE # define LFS_ATIME_IFILE 0 #endif +/* Local definition for LFS's usage of PG_PAGER1 */ +#define PG_DELWRI PG_PAGER1 + +/* Types for lfs_newbuf and lfs_malloc */ +#define LFS_NB_UNKNOWN -1 +#define LFS_NB_SUMMARY 0 +#define LFS_NB_SBLOCK 1 +#define LFS_NB_IBLOCK 2 +#define LFS_NB_CLUSTER 3 +#define LFS_NB_CLEAN 4 +#define LFS_NB_COUNT 5 /* always last */ + +/* Number of reserved memory blocks of each type */ +#define LFS_N_SUMMARIES 2 +#define LFS_N_SBLOCKS 1 /* Always 1, to throttle superblock writes */ +#define LFS_N_IBLOCKS 16 /* In theory ssize/bsize; in practice around 2 */ +#define LFS_N_CLUSTERS 16 /* In theory ssize/MAXPHYS */ +#define LFS_N_CLEAN 0 + +/* Total count of "large" (non-pool) types */ +#define LFS_N_TOTAL (LFS_N_SUMMARIES + LFS_N_SBLOCKS + LFS_N_IBLOCKS + LFS_N_CLUSTERS + LFS_N_CLEAN) + +/* Counts for pool types */ +#define LFS_N_CL LFS_N_CLUSTERS +#define LFS_N_BPP 2 +#define LFS_N_SEG 2 + +/* Structure to keep reserved blocks */ +typedef struct lfs_res_blk { + void *p; + LIST_ENTRY(lfs_res_blk) res; + char inuse; +} res_t; + /* * #define WRITE_THRESHHOLD ((nbuf >> 1) - 10) * #define WAIT_THRESHHOLD (nbuf - (nbuf >> 2) - 10) @@ -109,8 +142,17 @@ /* These are new ... is LFS taking up too much memory in its buffers? */ #define LFS_MAX_BYTES (((bufpages >> 2) - 10) * NBPG) #define LFS_WAIT_BYTES (((bufpages >> 1) - (bufpages >> 3) - 10) * NBPG) +#define LFS_MAX_DIROP ((desiredvnodes >> 2) + (desiredvnodes >> 3)) #define LFS_BUFWAIT 2 +#define LFS_MAX_PAGES \ + (((uvmexp.active + uvmexp.inactive + uvmexp.free) * uvmexp.filemin) >> 8) +#define LFS_WAIT_PAGES \ + (((uvmexp.active + uvmexp.inactive + uvmexp.free) * uvmexp.filemax) >> 8) + +#define LFS_IS_MALLOC_BUF(bp) (((bp)->b_flags & B_CALL) && \ + ((bp)->b_iodone == lfs_callback || (bp)->b_iodone == lfs_fakebuf_iodone)) + #define LFS_LOCK_BUF(bp) do { \ if (((bp)->b_flags & (B_LOCKED | B_CALL)) == 0) { \ ++locked_queue_count; \ @@ -237,7 +279,21 @@ extern struct lfs_log_entry lfs_log[LFS_LOGLENGTH]; (ip)->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE); \ } while (0) -#define WRITEINPROG(vp) (vp->v_dirtyblkhd.lh_first && !(VTOI(vp)->i_flag & \ +/* + * How to find out whether a vnode had dirty buffers or pages, + * to know whether it needs to retain IN_MODIFIED after a write. + */ +#ifdef LFS_UBC +int lfs_checkifempty(struct vnode *); +# define VPISEMPTY(vp) lfs_checkifempty(vp) +#else +# define VPISEMPTY(vp) ((vp)->v_dirtyblkhd.lh_first == NULL) +#endif +/* + * WRITEINPROG does not use VPISEMPTY because any dirty pages will + * have been given buffer headers, if they are "in progress". + */ +#define WRITEINPROG(vp) ((vp)->v_dirtyblkhd.lh_first && !(VTOI(vp)->i_flag & \ (IN_MODIFIED | IN_ACCESSED | IN_CLEANING))) /* Here begins the berkeley code */ @@ -257,6 +313,7 @@ struct segusage { #define SEGUSE_DIRTY 0x02 /* segment has data in it */ #define SEGUSE_SUPERBLOCK 0x04 /* segment contains a superblock */ #define SEGUSE_ERROR 0x08 /* cleaner: do not clean segment */ +#define SEGUSE_EMPTY 0x10 /* segment is empty */ u_int32_t su_flags; /* 12: segment flags */ u_int64_t su_lastmod; /* 16: last modified timestamp */ }; @@ -304,7 +361,7 @@ struct dlfs { u_int32_t dlfs_frag; /* 28: number of frags in a block in fs */ /* Checkpoint region. */ - u_int32_t dlfs_free; /* 32: start of the free list */ + u_int32_t dlfs_freehd; /* 32: start of the free list */ u_int32_t dlfs_bfree; /* 36: number of free disk blocks */ u_int32_t dlfs_nfiles; /* 40: number of allocated inodes */ int32_t dlfs_avail; /* 44: blocks available for writing */ @@ -371,9 +428,6 @@ struct dlfs { u_int32_t dlfs_cksum; /* 508: checksum for superblock checking */ }; -/* Maximum number of io's we can have pending at once */ -#define LFS_THROTTLE 32 /* XXX should be better paramtrized - ? */ - /* In-memory super block. */ struct lfs { struct dlfs lfs_dlfs; /* on-disk parameters */ @@ -385,7 +439,7 @@ struct lfs { #define lfs_bsize lfs_dlfs.dlfs_bsize #define lfs_fsize lfs_dlfs.dlfs_fsize #define lfs_frag lfs_dlfs.dlfs_frag -#define lfs_free lfs_dlfs.dlfs_free +#define lfs_freehd lfs_dlfs.dlfs_freehd #define lfs_bfree lfs_dlfs.dlfs_bfree #define lfs_nfiles lfs_dlfs.dlfs_nfiles #define lfs_avail lfs_dlfs.dlfs_avail @@ -455,20 +509,26 @@ struct lfs { #define LFS_WARNED 0x04 int8_t lfs_flags; /* currently unused flag */ u_int16_t lfs_activesb; /* toggle between superblocks */ -#ifdef LFS_TRACK_IOS - daddr_t lfs_pending[LFS_THROTTLE]; /* daddrs of pending writes */ -#endif /* LFS_TRACK_IOS */ daddr_t lfs_sbactive; /* disk address of in-progress sb write */ struct vnode *lfs_flushvp; /* vnode being flushed */ struct vnode *lfs_unlockvp; /* being inactivated in lfs_segunlock */ u_int32_t lfs_diropwait; /* # procs waiting on dirop flush */ size_t lfs_devbsize; /* Device block size */ size_t lfs_devbshift; /* Device block shift */ - struct lock lfs_freelock; struct lock lfs_fraglock; pid_t lfs_rfpid; /* Process ID of roll-forward agent */ int lfs_nadirop; /* number of active dirop nodes */ long lfs_ravail; /* blocks pre-reserved for writing */ + res_t *lfs_resblk; /* Reserved memory for pageout */ + TAILQ_HEAD(, inode) lfs_dchainhd; /* dirop vnodes */ + TAILQ_HEAD(, inode) lfs_pchainhd; /* paging vnodes */ +#define LFS_RESHASH_WIDTH 17 + LIST_HEAD(, lfs_res_blk) lfs_reshash[LFS_RESHASH_WIDTH]; + int lfs_pdflush; /* pagedaemon wants us to flush */ + u_int32_t **lfs_suflags; /* Segment use flags */ + struct pool lfs_clpool; /* Pool for struct lfs_cluster */ + struct pool lfs_bpppool; /* Pool for bpp */ + struct pool lfs_segpool; /* Pool for struct segment */ }; /* @@ -659,14 +719,14 @@ struct segsum { #define LFS_GET_HEADFREE(FS, CIP, BP, FREEP) do { \ if ((FS)->lfs_version > 1) { \ LFS_CLEANERINFO((CIP), (FS), (BP)); \ - (FS)->lfs_free = (CIP)->free_head; \ + (FS)->lfs_freehd = (CIP)->free_head; \ brelse(BP); \ } \ - *(FREEP) = (FS)->lfs_free; \ + *(FREEP) = (FS)->lfs_freehd; \ } while (0) #define LFS_PUT_HEADFREE(FS, CIP, BP, VAL) do { \ - (FS)->lfs_free = (VAL); \ + (FS)->lfs_freehd = (VAL); \ if ((FS)->lfs_version > 1) { \ LFS_CLEANERINFO((CIP), (FS), (BP)); \ (CIP)->free_head = (VAL); \ @@ -721,6 +781,15 @@ struct segsum { (SP) = (SEGUSE *)(BP)->b_data + ((IN) % (F)->lfs_sepb); \ } while(0) +#define LFS_WRITESEGENTRY(SP, F, IN, BP) do { \ + if ((SP)->su_nbytes == 0) \ + (SP)->su_flags |= SEGUSE_EMPTY; \ + else \ + (SP)->su_flags &= ~SEGUSE_EMPTY; \ + (F)->lfs_suflags[(F)->lfs_activesb][(IN)] = (SP)->su_flags; \ + LFS_BWRITE_LOG(BP); \ +} while(0) + /* Determine if a buffer belongs to the ifile */ #define IS_IFILE(bp) (VTOI(bp->b_vp)->i_number == LFS_IFILE_INUM) @@ -773,15 +842,16 @@ struct segment { #define SEGM_CLEAN 0x02 /* cleaner call; don't sort */ #define SEGM_SYNC 0x04 /* wait for segment */ #define SEGM_PROT 0x08 /* don't inactivate at segunlock */ +#define SEGM_PAGEDAEMON 0x10 /* pagedaemon called us */ u_int16_t seg_flags; /* run-time flags for this segment */ u_int32_t seg_iocount; /* number of ios pending */ int ndupino; /* number of duplicate inodes */ }; struct lfs_cluster { + size_t bufsize; /* Size of kept data */ struct buf **bpp; /* Array of kept buffers */ int bufcount; /* Number of kept buffers */ - size_t bufsize; /* Size of kept data */ #define LFS_CL_MALLOC 0x00000001 #define LFS_CL_SHIFT 0x00000002 #define LFS_CL_SYNC 0x00000004 @@ -789,9 +859,25 @@ struct lfs_cluster { struct lfs *fs; /* LFS that this belongs to */ struct segment *seg; /* Segment structure, for LFS_CL_SYNC */ void *saveaddr; /* Original contents of saveaddr */ - char *olddata; /* Original b_data, if LFS_CL_MALLOC */ + char *olddata; /* Original b_data, if LFS_CL_MALLOC */ }; +/* + * LFS inode extensions; moved from so that file didn't + * have to change every time LFS changed. + */ +struct lfs_inode_ext { + off_t lfs_osize; /* size of file on disk */ + u_int32_t lfs_effnblocks; /* number of blocks when i/o completes */ + size_t lfs_fragsize[NDADDR]; /* size of on-disk direct blocks */ + TAILQ_ENTRY(inode) lfs_dchain; /* Dirop chain. */ + TAILQ_ENTRY(inode) lfs_pchain; /* Paging chain. */ +}; +#define i_lfs_osize inode_ext.lfs->lfs_osize +#define i_lfs_effnblks inode_ext.lfs->lfs_effnblocks +#define i_lfs_fragsize inode_ext.lfs->lfs_fragsize +#define i_lfs_dchain inode_ext.lfs->lfs_dchain + /* * Macros for determining free space on the disk, with the variable metadata * of segment summaries and inode blocks taken into account. diff --git a/sys/ufs/lfs/lfs_alloc.c b/sys/ufs/lfs/lfs_alloc.c index 31a4a538c6d4..bfc30918afb2 100644 --- a/sys/ufs/lfs/lfs_alloc.c +++ b/sys/ufs/lfs/lfs_alloc.c @@ -1,7 +1,7 @@ -/* $NetBSD: lfs_alloc.c,v 1.62 2003/01/27 23:17:56 yamt Exp $ */ +/* $NetBSD: lfs_alloc.c,v 1.63 2003/02/17 23:48:16 perseant Exp $ */ /*- - * Copyright (c) 1999, 2000 The NetBSD Foundation, Inc. + * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -71,7 +71,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: lfs_alloc.c,v 1.62 2003/01/27 23:17:56 yamt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: lfs_alloc.c,v 1.63 2003/02/17 23:48:16 perseant Exp $"); #if defined(_KERNEL_OPT) #include "opt_quota.h" @@ -85,7 +85,6 @@ __KERNEL_RCSID(0, "$NetBSD: lfs_alloc.c,v 1.62 2003/01/27 23:17:56 yamt Exp $"); #include #include #include -#include #include #include @@ -99,6 +98,8 @@ __KERNEL_RCSID(0, "$NetBSD: lfs_alloc.c,v 1.62 2003/01/27 23:17:56 yamt Exp $"); extern int lfs_dirvcount; extern struct lock ufs_hashlock; +extern struct simplelock lfs_subsys_lock; +extern int lfs_subsys_pages; static int extend_ifile(struct lfs *, struct ucred *); static int lfs_ialloc(struct lfs *, struct vnode *, ino_t, int, struct vnode **); @@ -207,6 +208,7 @@ lfs_rf_valloc(struct lfs *fs, ino_t ino, int version, struct proc *p, (void)lfs_vunref(vp); --lfs_dirvcount; vp->v_flag &= ~VDIROP; + TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain); --fs->lfs_nadirop; ip->i_flag &= ~IN_ADIROP; } @@ -245,7 +247,7 @@ extend_ifile(struct lfs *fs, struct ucred *cred) LFS_GET_HEADFREE(fs, cip, cbp, &oldlast); LFS_PUT_HEADFREE(fs, cip, cbp, i); #ifdef DIAGNOSTIC - if (fs->lfs_free == LFS_UNUSED_INUM) + if (fs->lfs_freehd == LFS_UNUSED_INUM) panic("inode 0 allocated [2]"); #endif /* DIAGNOSTIC */ max = i + fs->lfs_ifpb; @@ -300,21 +302,7 @@ lfs_valloc(void *v) return EROFS; *ap->a_vpp = NULL; -#ifdef LFS_AGGRESSIVE_SEGLOCK lfs_seglock(fs, SEGM_PROT); -#else - if (fs->lfs_version == 1) { - /* - * Use lfs_seglock here, instead of fs->lfs_freelock, to - * ensure that the free list is not changed in between - * the time that the ifile blocks are written to disk - * and the time that the superblock is written to disk. - */ - lfs_seglock(fs, SEGM_PROT); - } else { - lockmgr(&fs->lfs_freelock, LK_EXCLUSIVE, 0); - } -#endif /* Get the head of the freelist. */ LFS_GET_HEADFREE(fs, cip, cbp, &new_ino); @@ -345,33 +333,20 @@ lfs_valloc(void *v) brelse(bp); /* Extend IFILE so that the next lfs_valloc will succeed. */ - if (fs->lfs_free == LFS_UNUSED_INUM) { + if (fs->lfs_freehd == LFS_UNUSED_INUM) { if ((error = extend_ifile(fs, ap->a_cred)) != 0) { LFS_PUT_HEADFREE(fs, cip, cbp, new_ino); -#ifdef LFS_AGGRESSIVE_SEGLOCK lfs_segunlock(fs); -#else - if (fs->lfs_version == 1) - lfs_segunlock(fs); - else - lockmgr(&fs->lfs_freelock, LK_RELEASE, 0); -#endif return error; } } #ifdef DIAGNOSTIC - if (fs->lfs_free == LFS_UNUSED_INUM) + if (fs->lfs_freehd == LFS_UNUSED_INUM) panic("inode 0 allocated [3]"); #endif /* DIAGNOSTIC */ -#ifdef LFS_AGGRESSIVE_SEGLOCK lfs_segunlock(fs); -#else - if (fs->lfs_version == 1) - lfs_segunlock(fs); - else - lockmgr(&fs->lfs_freelock, LK_RELEASE, 0); -#endif + return lfs_ialloc(fs, ap->a_pvp, new_ino, new_gen, ap->a_vpp); } @@ -417,17 +392,16 @@ lfs_ialloc(struct lfs *fs, struct vnode *pvp, ino_t new_ino, int new_gen, uvm_vnp_setsize(vp, 0); *vpp = vp; -#if 1 if (!(vp->v_flag & VDIROP)) { (void)lfs_vref(vp); ++lfs_dirvcount; + TAILQ_INSERT_TAIL(&fs->lfs_dchainhd, ip, i_lfs_dchain); } vp->v_flag |= VDIROP; if (!(ip->i_flag & IN_ADIROP)) ++fs->lfs_nadirop; ip->i_flag |= IN_ADIROP; -#endif genfs_node_init(vp, &lfs_genfsops); VREF(ip->i_devvp); /* Set superblock modified bit and increment file count. */ @@ -439,17 +413,13 @@ lfs_ialloc(struct lfs *fs, struct vnode *pvp, ino_t new_ino, int new_gen, /* * Put the new inum back on the free list. */ -#ifdef LFS_AGGRESSIVE_SEGLOCK lfs_seglock(fs, SEGM_PROT); -#endif LFS_IENTRY(ifp, fs, new_ino, bp); ifp->if_daddr = LFS_UNUSED_DADDR; LFS_GET_HEADFREE(fs, cip, cbp, &(ifp->if_nextfree)); LFS_PUT_HEADFREE(fs, cip, cbp, new_ino); (void) LFS_BWRITE_LOG(bp); /* Ifile */ -#ifdef LFS_AGGRESSIVE_SEGLOCK lfs_segunlock(fs); -#endif *vpp = NULLVP; return (error); @@ -470,6 +440,7 @@ lfs_vcreate(struct mount *mp, ino_t ino, struct vnode *vp) /* Initialize the inode. */ ip = pool_get(&lfs_inode_pool, PR_WAITOK); + ip->inode_ext.lfs = pool_get(&lfs_inoext_pool, PR_WAITOK); vp->v_data = ip; ip->i_vnode = vp; ip->i_devvp = ump->um_devvp; @@ -487,8 +458,6 @@ lfs_vcreate(struct mount *mp, ino_t ino, struct vnode *vp) ip->i_ffs_blocks = 0; ip->i_lfs_effnblks = 0; ip->i_flag = 0; - /* Why was IN_MODIFIED ever set here? */ - /* LFS_SET_UINO(ip, IN_CHANGE | IN_MODIFIED); */ #ifdef DEBUG_LFS_VNLOCK if (ino == LFS_IFILE_INUM) @@ -531,18 +500,12 @@ lfs_vfree(void *v) tsleep(vp, (PRIBIO+1), "lfs_vfree", 0); splx(s); -#ifdef LFS_AGGRESSIVE_SEGLOCK - lfs_seglock(fs, SEGM_PROT); /* XXX */; -#else - if (fs->lfs_version == 1) - lfs_seglock(fs, SEGM_PROT); - else - lockmgr(&fs->lfs_freelock, LK_EXCLUSIVE, 0); -#endif + lfs_seglock(fs, SEGM_PROT); if (vp->v_flag & VDIROP) { --lfs_dirvcount; vp->v_flag &= ~VDIROP; + TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain); wakeup(&lfs_dirvcount); lfs_vunref(vp); } @@ -597,20 +560,14 @@ lfs_vfree(void *v) } #endif sup->su_nbytes -= DINODE_SIZE; - (void) LFS_BWRITE_LOG(bp); /* Ifile */ + LFS_WRITESEGENTRY(sup, fs, dtosn(fs, old_iaddr), bp); /* Ifile */ } /* Set superblock modified bit and decrement file count. */ fs->lfs_fmod = 1; --fs->lfs_nfiles; -#ifdef LFS_AGGRESSIVE_SEGLOCK lfs_segunlock(fs); -#else - if (fs->lfs_version == 1) - lfs_segunlock(fs); - else - lockmgr(&fs->lfs_freelock, LK_RELEASE, 0); -#endif + return (0); } diff --git a/sys/ufs/lfs/lfs_balloc.c b/sys/ufs/lfs/lfs_balloc.c index b0054b2d5f6c..ed6d79343c96 100644 --- a/sys/ufs/lfs/lfs_balloc.c +++ b/sys/ufs/lfs/lfs_balloc.c @@ -1,7 +1,7 @@ -/* $NetBSD: lfs_balloc.c,v 1.35 2003/01/24 21:55:26 fvdl Exp $ */ +/* $NetBSD: lfs_balloc.c,v 1.36 2003/02/17 23:48:16 perseant Exp $ */ /*- - * Copyright (c) 1999, 2000 The NetBSD Foundation, Inc. + * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -71,7 +71,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: lfs_balloc.c,v 1.35 2003/01/24 21:55:26 fvdl Exp $"); +__KERNEL_RCSID(0, "$NetBSD: lfs_balloc.c,v 1.36 2003/02/17 23:48:16 perseant Exp $"); #if defined(_KERNEL_OPT) #include "opt_quota.h" @@ -96,6 +96,10 @@ __KERNEL_RCSID(0, "$NetBSD: lfs_balloc.c,v 1.35 2003/01/24 21:55:26 fvdl Exp $") #include #include +#include + +extern int lfs_subsys_pages; + int lfs_fragextend(struct vnode *, int, int, daddr_t, struct buf **, struct ucred *); /* @@ -127,7 +131,7 @@ lfs_balloc(void *v) int offset; u_long iosize; daddr_t daddr, idaddr; - struct buf *ibp, *bp; + struct buf *ibp, *bp, **bpp; struct inode *ip; struct lfs *fs; struct indir indirs[NIADDR+2], *idp; @@ -141,8 +145,9 @@ lfs_balloc(void *v) offset = blkoff(fs, ap->a_startoffset); iosize = ap->a_size; lbn = lblkno(fs, ap->a_startoffset); - (void)lfs_check(vp, lbn, 0); - + /* (void)lfs_check(vp, lbn, 0); */ + bpp = ap->a_bpp; + /* * Three cases: it's a block beyond the end of file, it's a block in * the file that may or may not have been assigned a disk address or @@ -159,7 +164,8 @@ lfs_balloc(void *v) * to rewrite it. */ - *ap->a_bpp = NULL; + if (bpp) + *bpp = NULL; /* Check for block beyond end of file and fragment extension needed. */ lastblock = lblkno(fs, ip->i_ffs_size); @@ -167,13 +173,15 @@ lfs_balloc(void *v) osize = blksize(fs, ip, lastblock); if (osize < fs->lfs_bsize && osize > 0) { if ((error = lfs_fragextend(vp, osize, fs->lfs_bsize, - lastblock, &bp, + lastblock, + (bpp ? &bp : NULL), ap->a_cred))) return (error); ip->i_ffs_size = (lastblock + 1) * fs->lfs_bsize; uvm_vnp_setsize(vp, ip->i_ffs_size); ip->i_flag |= IN_CHANGE | IN_UPDATE; - (void) VOP_BWRITE(bp); + if (bpp) + (void) VOP_BWRITE(bp); } } @@ -192,25 +200,30 @@ lfs_balloc(void *v) /* Brand new block or fragment */ frags = numfrags(fs, nsize); bb = fragstofsb(fs, frags); - *ap->a_bpp = bp = getblk(vp, lbn, nsize, 0, 0); + if (bpp) { + *ap->a_bpp = bp = getblk(vp, lbn, nsize, 0, 0); + bp->b_blkno = UNWRITTEN; + } if (ap->a_flags & B_CLRBUF) clrbuf(bp); ip->i_lfs_effnblks += bb; ip->i_lfs->lfs_bfree -= bb; - ip->i_ffs_db[lbn] = bp->b_blkno = UNWRITTEN; + ip->i_ffs_db[lbn] = UNWRITTEN; } else { if (nsize <= osize) { /* No need to extend */ - if ((error = bread(vp, lbn, osize, NOCRED, &bp))) + if (bpp && (error = bread(vp, lbn, osize, NOCRED, &bp))) return error; } else { /* Extend existing block */ if ((error = - lfs_fragextend(vp, osize, nsize, lbn, &bp, + lfs_fragextend(vp, osize, nsize, lbn, + (bpp ? &bp : NULL), ap->a_cred))) return error; } - *ap->a_bpp = bp; + if (bpp) + *bpp = bp; } return 0; } @@ -279,10 +292,11 @@ lfs_balloc(void *v) /* - * Get the existing block from the cache. + * Get the existing block from the cache, if requested. */ frags = fsbtofrags(fs, bb); - *ap->a_bpp = bp = getblk(vp, lbn, blksize(fs, ip, lbn), 0, 0); + if (bpp) + *bpp = bp = getblk(vp, lbn, blksize(fs, ip, lbn), 0, 0); /* * The block we are writing may be a brand new block @@ -293,11 +307,13 @@ lfs_balloc(void *v) * disk address UNWRITTEN. */ if (daddr == UNASSIGNED) { - if (ap->a_flags & B_CLRBUF) - clrbuf(bp); + if (bpp) { + if (ap->a_flags & B_CLRBUF) + clrbuf(bp); - /* Note the new address */ - bp->b_blkno = UNWRITTEN; + /* Note the new address */ + bp->b_blkno = UNWRITTEN; + } switch (num) { case 0: @@ -316,7 +332,7 @@ lfs_balloc(void *v) ((int32_t *)ibp->b_data)[idp->in_off] = UNWRITTEN; VOP_BWRITE(ibp); } - } else if (!(bp->b_flags & (B_DONE|B_DELWRI))) { + } else if (bpp && !(bp->b_flags & (B_DONE|B_DELWRI))) { /* * Not a brand new block, also not in the cache; * read it in from disk. @@ -356,26 +372,35 @@ lfs_fragextend(struct vnode *vp, int osize, int nsize, daddr_t lbn, struct buf * error = 0; /* - * Get the seglock so we don't enlarge blocks or change the segment - * accounting information while a segment is being written. + * Get the seglock so we don't enlarge blocks while a segment + * is being written. If we're called with bpp==NULL, though, + * we are only pretending to change a buffer, so we don't have to + * lock. */ top: -#ifdef LFS_MALLOC_SEGLOCK - lfs_seglock(fs, SEGM_PROT); -#else - lockmgr(&fs->lfs_fraglock, LK_SHARED, 0); -#endif + if (bpp) { + lockmgr(&fs->lfs_fraglock, LK_SHARED, 0); + } + if (!ISSPACE(fs, bb, cred)) { error = ENOSPC; goto out; } - if ((error = bread(vp, lbn, osize, NOCRED, bpp))) { + + /* + * If we are not asked to actually return the block, all we need + * to do is allocate space for it. UBC will handle dirtying the + * appropriate things and making sure it all goes to disk. + * Don't bother to read in that case. + */ + if (bpp && (error = bread(vp, lbn, osize, NOCRED, bpp))) { brelse(*bpp); goto out; } #ifdef QUOTA if ((error = chkdq(ip, bb, cred, 0))) { - brelse(*bpp); + if (bpp) + brelse(*bpp); goto out; } #endif @@ -386,17 +411,14 @@ lfs_fragextend(struct vnode *vp, int osize, int nsize, daddr_t lbn, struct buf * * release both and start over after waiting. */ - if ((*bpp)->b_flags & B_DELWRI) { + if (bpp && ((*bpp)->b_flags & B_DELWRI)) { if (!lfs_fits(fs, bb)) { - brelse(*bpp); + if (bpp) + brelse(*bpp); #ifdef QUOTA chkdq(ip, -bb, cred, 0); #endif -#ifdef LFS_FRAGSIZE_SEGLOCK - lfs_segunlock(fs); -#else lockmgr(&fs->lfs_fraglock, LK_RELEASE, 0); -#endif lfs_availwait(fs, bb); goto top; } @@ -407,24 +429,24 @@ lfs_fragextend(struct vnode *vp, int osize, int nsize, daddr_t lbn, struct buf * ip->i_lfs_effnblks += bb; ip->i_flag |= IN_CHANGE | IN_UPDATE; - LFS_DEBUG_COUNTLOCKED("frag1"); + if (bpp) { + LFS_DEBUG_COUNTLOCKED("frag1"); - obufsize = (*bpp)->b_bufsize; - allocbuf(*bpp, nsize); + obufsize = (*bpp)->b_bufsize; + allocbuf(*bpp, nsize); - /* Adjust locked-list accounting */ - if (((*bpp)->b_flags & (B_LOCKED | B_CALL)) == B_LOCKED) - locked_queue_bytes += (*bpp)->b_bufsize - obufsize; + /* Adjust locked-list accounting */ + if (((*bpp)->b_flags & (B_LOCKED | B_CALL)) == B_LOCKED) + locked_queue_bytes += (*bpp)->b_bufsize - obufsize; - LFS_DEBUG_COUNTLOCKED("frag2"); + LFS_DEBUG_COUNTLOCKED("frag2"); - bzero((char *)((*bpp)->b_data) + osize, (u_int)(nsize - osize)); + bzero((char *)((*bpp)->b_data) + osize, (u_int)(nsize - osize)); + } out: -#ifdef LFS_FRAGSIZE_SEGLOCK - lfs_segunlock(fs); -#else - lockmgr(&fs->lfs_fraglock, LK_RELEASE, 0); -#endif + if (bpp) { + lockmgr(&fs->lfs_fraglock, LK_RELEASE, 0); + } return (error); } diff --git a/sys/ufs/lfs/lfs_bio.c b/sys/ufs/lfs/lfs_bio.c index de3599f8f145..dd05a33d8f45 100644 --- a/sys/ufs/lfs/lfs_bio.c +++ b/sys/ufs/lfs/lfs_bio.c @@ -1,7 +1,7 @@ -/* $NetBSD: lfs_bio.c,v 1.57 2003/02/05 21:38:45 pk Exp $ */ +/* $NetBSD: lfs_bio.c,v 1.58 2003/02/17 23:48:17 perseant Exp $ */ /*- - * Copyright (c) 1999, 2000 The NetBSD Foundation, Inc. + * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -71,7 +71,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: lfs_bio.c,v 1.57 2003/02/05 21:38:45 pk Exp $"); +__KERNEL_RCSID(0, "$NetBSD: lfs_bio.c,v 1.58 2003/02/17 23:48:17 perseant Exp $"); #include #include @@ -86,10 +86,11 @@ __KERNEL_RCSID(0, "$NetBSD: lfs_bio.c,v 1.57 2003/02/05 21:38:45 pk Exp $"); #include #include -#include #include #include +#include + /* Macros to clear/set/test flags. */ # define SET(t, f) (t) |= (f) # define CLR(t, f) (t) &= ~(f) @@ -102,11 +103,14 @@ __KERNEL_RCSID(0, "$NetBSD: lfs_bio.c,v 1.57 2003/02/05 21:38:45 pk Exp $"); * No write cost accounting is done. * This is almost certainly wrong for synchronous operations and NFS. */ -int locked_queue_count = 0; /* XXX Count of locked-down buffers. */ -long locked_queue_bytes = 0L; /* XXX Total size of locked buffers. */ +int locked_queue_count = 0; /* Count of locked-down buffers. */ +long locked_queue_bytes = 0L; /* Total size of locked buffers. */ +int lfs_subsys_pages = 0L; /* Total number LFS-written pages */ int lfs_writing = 0; /* Set if already kicked off a writer because of buffer space */ +struct simplelock lfs_subsys_lock; /* Lock on subsys_pages */ extern int lfs_dostats; +extern int lfs_do_flush; /* * reserved number/bytes of locked buffers @@ -402,7 +406,7 @@ lfs_bwrite_ext(struct buf *bp, int flags) int fsb, s; KASSERT(bp->b_flags & B_BUSY); - KASSERT(flags & BW_CLEAN || !(bp->b_flags & B_CALL)); + KASSERT(flags & BW_CLEAN || !LFS_IS_MALLOC_BUF(bp)); /* * Don't write *any* blocks if we're mounted read-only. @@ -411,7 +415,7 @@ lfs_bwrite_ext(struct buf *bp, int flags) if (VTOI(bp->b_vp)->i_lfs->lfs_ronly) { bp->b_flags &= ~(B_DELWRI | B_READ | B_ERROR); LFS_UNLOCK_BUF(bp); - if (bp->b_flags & B_CALL) + if (LFS_IS_MALLOC_BUF(bp)) bp->b_flags &= ~B_BUSY; else brelse(bp); @@ -465,28 +469,26 @@ lfs_bwrite_ext(struct buf *bp, int flags) void lfs_flush_fs(struct lfs *fs, int flags) { - if (fs->lfs_ronly == 0 && fs->lfs_dirops == 0) - { - /* disallow dirops during flush */ - fs->lfs_writer++; + if (fs->lfs_ronly) + return; - /* - * We set the queue to 0 here because we - * are about to write all the dirty - * buffers we have. If more come in - * while we're writing the segment, they - * may not get written, so we want the - * count to reflect these new writes - * after the segwrite completes. - */ - if (lfs_dostats) - ++lfs_stats.flush_invoked; - lfs_segwrite(fs->lfs_ivnode->v_mount, flags); + /* disallow dirops during flush */ + fs->lfs_writer++; - /* XXX KS - allow dirops again */ - if (--fs->lfs_writer == 0) - wakeup(&fs->lfs_dirops); + /* drain dirops */ + while (fs->lfs_dirops > 0) { + ++fs->lfs_diropwait; + tsleep(&fs->lfs_writer, PRIBIO+1, "fldirop", 0); + --fs->lfs_diropwait; } + + if (lfs_dostats) + ++lfs_stats.flush_invoked; + lfs_segwrite(fs->lfs_ivnode->v_mount, flags); + + /* allow dirops again */ + if (--fs->lfs_writer == 0) + wakeup(&fs->lfs_dirops); } /* @@ -512,6 +514,9 @@ lfs_flush(struct lfs *fs, int flags) } lfs_writing = 1; + lfs_subsys_pages = 0; /* XXXUBC need a better way to count this */ + wakeup(&lfs_subsys_pages); + simple_lock(&mountlist_slock); for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { @@ -525,7 +530,6 @@ lfs_flush(struct lfs *fs, int flags) vfs_unbusy(mp); } simple_unlock(&mountlist_slock); - LFS_DEBUG_COUNTLOCKED("flush"); lfs_writing = 0; @@ -562,25 +566,40 @@ lfs_check(struct vnode *vp, daddr_t blkno, int flags) while (fs->lfs_dirops > 0 && (locked_queue_count + INOCOUNT(fs) > LFS_MAX_BUFS || locked_queue_bytes + INOBYTES(fs) > LFS_MAX_BYTES || - lfs_dirvcount > LFS_MAXDIROP || fs->lfs_diropwait > 0)) + lfs_subsys_pages > LFS_MAX_PAGES || + lfs_dirvcount > LFS_MAX_DIROP || fs->lfs_diropwait > 0)) { ++fs->lfs_diropwait; tsleep(&fs->lfs_writer, PRIBIO+1, "bufdirop", 0); --fs->lfs_diropwait; } +#ifdef DEBUG_LFS_FLUSH + if (locked_queue_count + INOCOUNT(fs) > LFS_MAX_BUFS) + printf("lqc = %d, max %d\n", locked_queue_count + INOCOUNT(fs), + LFS_MAX_BUFS); + if (locked_queue_bytes + INOBYTES(fs) > LFS_MAX_BYTES) + printf("lqb = %ld, max %d\n", locked_queue_bytes + INOBYTES(fs), + LFS_MAX_BYTES); + if (lfs_subsys_pages > LFS_MAX_PAGES) + printf("lssp = %d, max %d\n", lfs_subsys_pages, LFS_MAX_PAGES); + if (lfs_dirvcount > LFS_MAX_DIROP) + printf("ldvc = %d, max %d\n", lfs_dirvcount, LFS_MAX_DIROP); + if (fs->lfs_diropwait > 0) + printf("ldvw = %d\n", fs->lfs_diropwait); +#endif if (locked_queue_count + INOCOUNT(fs) > LFS_MAX_BUFS || locked_queue_bytes + INOBYTES(fs) > LFS_MAX_BYTES || - lfs_dirvcount > LFS_MAXDIROP || fs->lfs_diropwait > 0) + lfs_subsys_pages > LFS_MAX_PAGES || + lfs_dirvcount > LFS_MAX_DIROP || fs->lfs_diropwait > 0) { - ++fs->lfs_writer; lfs_flush(fs, flags); - if (--fs->lfs_writer == 0) - wakeup(&fs->lfs_dirops); } - while (locked_queue_count + INOCOUNT(fs) > LFS_WAIT_BUFS - || locked_queue_bytes + INOBYTES(fs) > LFS_WAIT_BYTES) + while (locked_queue_count + INOCOUNT(fs) > LFS_WAIT_BUFS || + locked_queue_bytes + INOBYTES(fs) > LFS_WAIT_BYTES || + lfs_subsys_pages > LFS_WAIT_PAGES || + lfs_dirvcount > LFS_MAX_DIROP) { if (lfs_dostats) ++lfs_stats.wait_exceeded; @@ -601,10 +620,7 @@ lfs_check(struct vnode *vp, daddr_t blkno, int flags) if (locked_queue_count + INOCOUNT(fs) > LFS_MAX_BUFS || locked_queue_bytes + INOBYTES(fs) > LFS_MAX_BYTES) { - ++fs->lfs_writer; lfs_flush(fs, flags | SEGM_CKP); - if (--fs->lfs_writer == 0) - wakeup(&fs->lfs_dirops); } } return (error); @@ -613,15 +629,8 @@ lfs_check(struct vnode *vp, daddr_t blkno, int flags) /* * Allocate a new buffer header. */ -#ifdef MALLOCLOG -# define DOMALLOC(S, T, F) _malloc((S), (T), (F), file, line) struct buf * -lfs_newbuf_malloclog(struct lfs *fs, struct vnode *vp, daddr_t daddr, size_t size, char *file, int line) -#else -# define DOMALLOC(S, T, F) malloc((S), (T), (F)) -struct buf * -lfs_newbuf(struct lfs *fs, struct vnode *vp, daddr_t daddr, size_t size) -#endif +lfs_newbuf(struct lfs *fs, struct vnode *vp, daddr_t daddr, size_t size, int type) { struct buf *bp; size_t nbytes; @@ -629,11 +638,13 @@ lfs_newbuf(struct lfs *fs, struct vnode *vp, daddr_t daddr, size_t size) nbytes = roundup(size, fsbtob(fs, 1)); - bp = DOMALLOC(sizeof(struct buf), M_SEGMENT, M_WAITOK); - bzero(bp, sizeof(struct buf)); + s = splbio(); + bp = pool_get(&bufpool, PR_WAITOK); + splx(s); + memset(bp, 0, sizeof(struct buf)); if (nbytes) { - bp->b_data = DOMALLOC(nbytes, M_SEGMENT, M_WAITOK); - bzero(bp->b_data, nbytes); + bp->b_data = lfs_malloc(fs, nbytes, type); + /* memset(bp->b_data, 0, nbytes); */ } #ifdef DIAGNOSTIC if (vp == NULL) @@ -659,27 +670,20 @@ lfs_newbuf(struct lfs *fs, struct vnode *vp, daddr_t daddr, size_t size) return (bp); } -#ifdef MALLOCLOG -# define DOFREE(A, T) _free((A), (T), file, line) void -lfs_freebuf_malloclog(struct buf *bp, char *file, int line) -#else -# define DOFREE(A, T) free((A), (T)) -void -lfs_freebuf(struct buf *bp) -#endif +lfs_freebuf(struct lfs *fs, struct buf *bp) { int s; s = splbio(); if (bp->b_vp) brelvp(bp); - splx(s); if (!(bp->b_flags & B_INVAL)) { /* B_INVAL indicates a "fake" buffer */ - DOFREE(bp->b_data, M_SEGMENT); + lfs_free(fs, bp->b_data, LFS_NB_UNKNOWN); bp->b_data = NULL; } - DOFREE(bp, M_SEGMENT); + pool_put(&bufpool, bp); + splx(s); } /* @@ -707,7 +711,7 @@ lfs_countlocked(int *count, long *bytes, char *msg) for (bp = bufqueues[BQ_LOCKED].tqh_first; bp; bp = bp->b_freelist.tqe_next) { - if (bp->b_flags & B_CALL) /* Malloced buffer */ + if (bp->b_flags & B_CALL) continue; n++; size += bp->b_bufsize; diff --git a/sys/ufs/lfs/lfs_cksum.c b/sys/ufs/lfs/lfs_cksum.c index 35ba63d8fce7..a8f652ea30a8 100644 --- a/sys/ufs/lfs/lfs_cksum.c +++ b/sys/ufs/lfs/lfs_cksum.c @@ -1,7 +1,7 @@ -/* $NetBSD: lfs_cksum.c,v 1.20 2002/06/16 00:13:15 perseant Exp $ */ +/* $NetBSD: lfs_cksum.c,v 1.21 2003/02/17 23:48:18 perseant Exp $ */ /*- - * Copyright (c) 1999, 2000 The NetBSD Foundation, Inc. + * Copyright (c) 1999, 2000, 2001, 2002 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -71,7 +71,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: lfs_cksum.c,v 1.20 2002/06/16 00:13:15 perseant Exp $"); +__KERNEL_RCSID(0, "$NetBSD: lfs_cksum.c,v 1.21 2003/02/17 23:48:18 perseant Exp $"); #include #ifdef _KERNEL diff --git a/sys/ufs/lfs/lfs_debug.c b/sys/ufs/lfs/lfs_debug.c index 978ab4fc2393..069dc804fa61 100644 --- a/sys/ufs/lfs/lfs_debug.c +++ b/sys/ufs/lfs/lfs_debug.c @@ -1,7 +1,7 @@ -/* $NetBSD: lfs_debug.c,v 1.19 2003/01/29 13:14:34 yamt Exp $ */ +/* $NetBSD: lfs_debug.c,v 1.20 2003/02/17 23:48:18 perseant Exp $ */ /*- - * Copyright (c) 1999, 2000 The NetBSD Foundation, Inc. + * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -73,7 +73,7 @@ #ifdef DEBUG #include -__KERNEL_RCSID(0, "$NetBSD: lfs_debug.c,v 1.19 2003/01/29 13:14:34 yamt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: lfs_debug.c,v 1.20 2003/02/17 23:48:18 perseant Exp $"); #include #include #include @@ -167,7 +167,7 @@ lfs_dump_super(struct lfs *lfsp) printf("Checkpoint Info\n"); printf("%s%d\t%s%x\t%s%d\n", - "free ", lfsp->lfs_free, + "freehd ", lfsp->lfs_freehd, "idaddr ", lfsp->lfs_idaddr, "ifile ", lfsp->lfs_ifile); printf("%s%x\t%s%d\t%s%x\t%s%x\t%s%x\t%s%x\n", diff --git a/sys/ufs/lfs/lfs_extern.h b/sys/ufs/lfs/lfs_extern.h index 66e42e3afda5..31696f9a10cc 100644 --- a/sys/ufs/lfs/lfs_extern.h +++ b/sys/ufs/lfs/lfs_extern.h @@ -1,7 +1,7 @@ -/* $NetBSD: lfs_extern.h,v 1.38 2003/02/01 18:34:14 tron Exp $ */ +/* $NetBSD: lfs_extern.h,v 1.39 2003/02/17 23:48:18 perseant Exp $ */ /*- - * Copyright (c) 1999, 2000 The NetBSD Foundation, Inc. + * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -88,7 +88,7 @@ MALLOC_DECLARE(M_SEGMENT); #define LFS_WRITEINDIR 1 /* flush indirect blocks on non-checkpoint writes */ #define LFS_CLEAN_VNHEAD 2 /* put prev unrefed cleaned vnodes on head of free list */ #define LFS_DOSTATS 3 -#define LFS_STATS 4 +#define LFS_MAXPAGES 4 #define LFS_MAXID 5 #define LFS_NAMES { \ @@ -96,7 +96,7 @@ MALLOC_DECLARE(M_SEGMENT); { "flushindir", CTLTYPE_INT }, \ { "clean_vnhead", CTLTYPE_INT }, \ { "dostats", CTLTYPE_INT }, \ - { "stats", CTLTYPE_STRUCT }, \ + { "maxpages", CTLTYPE_INT }, \ } struct fid; @@ -117,7 +117,8 @@ struct segment; struct ucred; extern int lfs_allclean_wakeup; -extern struct pool lfs_inode_pool; /* memory pool for inodes */ +extern struct pool lfs_inode_pool; /* memory pool for inodes */ +extern struct pool lfs_inoext_pool; /* memory pool for inode extension */ __BEGIN_DECLS /* lfs_alloc.c */ @@ -130,16 +131,8 @@ int lfs_fits(struct lfs *, int); void lfs_flush_fs(struct lfs *, int); void lfs_flush(struct lfs *, int); int lfs_check(struct vnode *, daddr_t, int); -#ifdef MALLOCLOG -void lfs_freebuf_malloclog(struct buf *, char *, int); -struct buf *lfs_newbuf_malloclog(struct lfs *, struct vnode *, - daddr_t, size_t, char *, int); -#define lfs_freebuf(BP) lfs_freebuf_malloclog((BP), __FILE__, __LINE__) -#define lfs_newbuf(F, V, A, S) lfs_newbuf_malloclog((F),(V),(A),(S),__FILE__,__LINE__) -#else -void lfs_freebuf(struct buf *); -struct buf *lfs_newbuf(struct lfs *, struct vnode *, daddr_t, size_t); -#endif +void lfs_freebuf(struct lfs *, struct buf *); +struct buf *lfs_newbuf(struct lfs *, struct vnode *, daddr_t, size_t, int); void lfs_countlocked(int *, long *, char *); int lfs_reserve(struct lfs *, struct vnode *, struct vnode *, int); @@ -169,6 +162,7 @@ void lfs_writefile(struct lfs *, struct segment *, struct vnode *); int lfs_writeinode(struct lfs *, struct segment *, struct inode *); int lfs_gatherblock(struct segment *, struct buf *, int *); int lfs_gather(struct lfs *, struct segment *, struct vnode *, int (*match )(struct lfs *, struct buf *)); +void lfs_update_single(struct lfs *, struct segment *, daddr_t, int32_t, int, int); void lfs_updatemeta(struct segment *); int lfs_initseg(struct lfs *); void lfs_newseg(struct lfs *); @@ -187,12 +181,17 @@ void lfs_vunref(struct vnode *); void lfs_vunref_head(struct vnode *); /* lfs_subr.c */ -void lfs_seglock(struct lfs *, unsigned long); +void lfs_setup_resblks(struct lfs *); +void lfs_free_resblks(struct lfs *); +void *lfs_malloc(struct lfs *, size_t, int); +void lfs_free(struct lfs *, void *, int); +int lfs_seglock(struct lfs *, unsigned long); void lfs_segunlock(struct lfs *); /* lfs_syscalls.c */ int lfs_fastvget(struct mount *, ino_t, daddr_t, struct vnode **, struct dinode *); struct buf *lfs_fakebuf(struct lfs *, struct vnode *, int, size_t, caddr_t); +int lfs_do_segclean(struct lfs *, unsigned long); /* lfs_vfsops.c */ void lfs_init(void); @@ -200,7 +199,6 @@ void lfs_reinit(void); void lfs_done(void); int lfs_mountroot(void); int lfs_mount(struct mount *, const char *, void *, struct nameidata *, struct proc *); -int lfs_mountfs(struct vnode *, struct mount *, struct proc *); int lfs_unmount(struct mount *, int, struct proc *); int lfs_statfs(struct mount *, struct statfs *, struct proc *); int lfs_sync(struct mount *, int, struct ucred *, struct proc *); @@ -213,6 +211,10 @@ int lfs_sysctl(int *, u_int, void *, size_t *, void *, size_t, struct proc *); void lfs_unmark_vnode(struct vnode *); void lfs_itimes(struct inode *, struct timespec *, struct timespec *, struct timespec *); +int lfs_gop_alloc(struct vnode *, off_t, off_t, int, struct ucred *); +void lfs_gop_size(struct vnode *, off_t, off_t *, int); +int lfs_putpages_ext(void *, int); +int lfs_gatherpages(struct vnode *); int lfs_balloc (void *); int lfs_valloc (void *); @@ -230,6 +232,7 @@ int lfs_read (void *); int lfs_remove (void *); int lfs_rmdir (void *); int lfs_link (void *); +int lfs_mmap (void *); int lfs_rename (void *); int lfs_getattr (void *); int lfs_setattr (void *); diff --git a/sys/ufs/lfs/lfs_inode.c b/sys/ufs/lfs/lfs_inode.c index 315eaca097bf..e0f8618aa318 100644 --- a/sys/ufs/lfs/lfs_inode.c +++ b/sys/ufs/lfs/lfs_inode.c @@ -1,7 +1,7 @@ -/* $NetBSD: lfs_inode.c,v 1.63 2003/01/25 16:40:29 fvdl Exp $ */ +/* $NetBSD: lfs_inode.c,v 1.64 2003/02/17 23:48:18 perseant Exp $ */ /*- - * Copyright (c) 1999, 2000 The NetBSD Foundation, Inc. + * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -71,7 +71,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: lfs_inode.c,v 1.63 2003/01/25 16:40:29 fvdl Exp $"); +__KERNEL_RCSID(0, "$NetBSD: lfs_inode.c,v 1.64 2003/02/17 23:48:18 perseant Exp $"); #if defined(_KERNEL_OPT) #include "opt_quota.h" @@ -231,6 +231,9 @@ lfs_truncate(void *v) struct proc *a_p; } */ *ap = v; struct vnode *ovp = ap->a_vp; +#ifdef LFS_UBC + struct genfs_node *gp = VTOG(ovp); +#endif daddr_t lastblock; struct inode *oip; daddr_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR]; @@ -247,6 +250,7 @@ lfs_truncate(void *v) long lastseg; size_t bc; int obufsize, odb; + int usepc, needunlock; if (length < 0) return (EINVAL); @@ -282,6 +286,10 @@ lfs_truncate(void *v) fs = oip->i_lfs; lfs_imtime(fs); osize = oip->i_ffs_size; + needunlock = usepc = 0; +#ifdef LFS_UBC + usepc = (ovp->v_type == VREG && osize > length && ovp != fs->lfs_ivnode); +#endif /* * Lengthen the size of the file. We must ensure that the @@ -313,18 +321,7 @@ lfs_truncate(void *v) if ((error = lfs_reserve(fs, ovp, NULL, btofsb(fs, (2 * NIADDR + 3) << fs->lfs_bshift))) != 0) return (error); - /* - * Make sure no writes to this inode can happen while we're - * truncating. Otherwise, blocks which are accounted for on the - * inode *and* which have been created for cleaning can coexist, - * and cause an overcounting. - */ -#ifdef LFS_FRAGSIZE_SEGLOCK - lfs_seglock(fs, SEGM_PROT); -#else - lockmgr(&fs->lfs_fraglock, LK_SHARED, 0); -#endif - + /* * Shorten the size of the file. If the file is not being * truncated to a block boundary, the contents of the @@ -338,7 +335,12 @@ lfs_truncate(void *v) bc = 0; if (offset == 0) { oip->i_ffs_size = length; - } else { + } else +#ifdef LFS_UBC + if (!usepc) +#endif + { + lockmgr(&fs->lfs_fraglock, LK_SHARED, 0); lbn = lblkno(fs, length); aflags = B_CLRBUF; if (ap->a_flags & IO_SYNC) @@ -347,11 +349,7 @@ lfs_truncate(void *v) if (error) { lfs_reserve(fs, ovp, NULL, -btofsb(fs, (2 * NIADDR + 3) << fs->lfs_bshift)); -#ifdef LFS_FRAGSIZE_SEGLOCK - lfs_segunlock(fs); -#else lockmgr(&fs->lfs_fraglock, LK_RELEASE, 0); -#endif return (error); } obufsize = bp->b_bufsize; @@ -367,7 +365,45 @@ lfs_truncate(void *v) if (bp->b_flags & B_DELWRI) fs->lfs_avail += odb - btofsb(fs, size); (void) VOP_BWRITE(bp); + lockmgr(&fs->lfs_fraglock, LK_RELEASE, 0); } +#ifdef LFS_UBC + /* + * When truncating a regular file down to a non-block-aligned size, + * we must zero the part of last block which is past the new EOF. + * We must synchronously flush the zeroed pages to disk + * since the new pages will be invalidated as soon as we + * inform the VM system of the new, smaller size. + * We must do this before acquiring the GLOCK, since fetching + * the pages will acquire the GLOCK internally. + * So there is a window where another thread could see a whole + * zeroed page past EOF, but that's life. + */ + + else { /* vp->v_type == VREG && length < osize && offset != 0 */ + voff_t eoz; + + aflags = ap->a_flags & IO_SYNC ? B_SYNC : 0; + error = ufs_balloc_range(ovp, length - 1, 1, ap->a_cred, + aflags); + if (error) { + return error; + } + size = blksize(fs, oip, lblkno(fs, length)); + eoz = MIN(lblktosize(fs, lblkno(fs, length)) + size, osize); + uvm_vnp_zerorange(ovp, length, eoz - length); + simple_lock(&ovp->v_interlock); + error = VOP_PUTPAGES(ovp, trunc_page(length), round_page(eoz), + PGO_CLEANIT | PGO_DEACTIVATE | PGO_SYNCIO); + if (error) { + return error; + } + } + + lockmgr(&gp->g_glock, LK_EXCLUSIVE, NULL); +#endif + + oip->i_ffs_size = length; uvm_vnp_setsize(ovp, length); /* * Calculate index into inode's block list of @@ -428,6 +464,10 @@ lfs_truncate(void *v) goto done; } + if (!usepc) { + lockmgr(&fs->lfs_fraglock, LK_SHARED, 0); + needunlock = 1; + } /* * All whole direct blocks or frags. */ @@ -516,10 +556,10 @@ done: #endif lfs_reserve(fs, ovp, NULL, -btofsb(fs, (2 * NIADDR + 3) << fs->lfs_bshift)); -#ifdef LFS_FRAGSIZE_SEGLOCK - lfs_segunlock(fs); -#else - lockmgr(&fs->lfs_fraglock, LK_RELEASE, 0); + if (needunlock) + lockmgr(&fs->lfs_fraglock, LK_RELEASE, 0); +#ifdef LFS_UBC + lockmgr(&gp->g_glock, LK_RELEASE, NULL); #endif return (allerror); } @@ -550,7 +590,6 @@ lfs_update_seguse(struct lfs *fs, long lastseg, size_t num) { SEGUSE *sup; struct buf *bp; - int error; if (lastseg < 0 || num == 0) return 0; @@ -563,8 +602,9 @@ lfs_update_seguse(struct lfs *fs, long lastseg, size_t num) sup->su_nbytes = num; } sup->su_nbytes -= num; - error = LFS_BWRITE_LOG(bp); /* Ifile */ - return error; + LFS_WRITESEGENTRY(sup, fs, lastseg, bp); + + return 0; } /* @@ -707,6 +747,8 @@ lfs_indirtrunc(struct inode *ip, daddr_t lbn, daddr_t dbn, /* * Destroy any in core blocks past the truncation length. * Inlined from vtruncbuf, so that lfs_avail could be updated. + * We take the fraglock to prevent cleaning from occurring while we are + * invalidating blocks. */ static int lfs_vtruncbuf(struct vnode *vp, daddr_t lbn, int slpflag, int slptimeo) @@ -714,10 +756,19 @@ lfs_vtruncbuf(struct vnode *vp, daddr_t lbn, int slpflag, int slptimeo) struct buf *bp, *nbp; int s, error; struct lfs *fs; + voff_t off; + + off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift); + simple_lock(&vp->v_interlock); + error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO); + if (error) { + return error; + } fs = VTOI(vp)->i_lfs; s = splbio(); + lockmgr(&fs->lfs_fraglock, LK_SHARED, 0); restart: for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { nbp = LIST_NEXT(bp, b_vnbufs); @@ -729,6 +780,7 @@ restart: "lfs_vtruncbuf", slptimeo); if (error) { splx(s); + lockmgr(&fs->lfs_fraglock, LK_RELEASE, 0); return (error); } goto restart; @@ -753,6 +805,7 @@ restart: "lfs_vtruncbuf", slptimeo); if (error) { splx(s); + lockmgr(&fs->lfs_fraglock, LK_RELEASE, 0); return (error); } goto restart; @@ -768,6 +821,7 @@ restart: } splx(s); + lockmgr(&fs->lfs_fraglock, LK_RELEASE, 0); return (0); } diff --git a/sys/ufs/lfs/lfs_segment.c b/sys/ufs/lfs/lfs_segment.c index 6290d7484ba4..b797f9ba5eb6 100644 --- a/sys/ufs/lfs/lfs_segment.c +++ b/sys/ufs/lfs/lfs_segment.c @@ -1,7 +1,7 @@ -/* $NetBSD: lfs_segment.c,v 1.100 2003/02/05 21:38:45 pk Exp $ */ +/* $NetBSD: lfs_segment.c,v 1.101 2003/02/17 23:48:19 perseant Exp $ */ /*- - * Copyright (c) 1999, 2000 The NetBSD Foundation, Inc. + * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -71,7 +71,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: lfs_segment.c,v 1.100 2003/02/05 21:38:45 pk Exp $"); +__KERNEL_RCSID(0, "$NetBSD: lfs_segment.c,v 1.101 2003/02/17 23:48:19 perseant Exp $"); #define ivndebug(vp,str) printf("ino %d: %s\n",VTOI(vp)->i_number,(str)) @@ -89,7 +89,6 @@ __KERNEL_RCSID(0, "$NetBSD: lfs_segment.c,v 1.100 2003/02/05 21:38:45 pk Exp $") #include #include #include -#include #include #include @@ -110,6 +109,7 @@ MALLOC_DEFINE(M_SEGMENT, "LFS segment", "Segment for LFS"); extern int count_lock_queue(void); extern struct simplelock vnode_free_list_slock; /* XXX */ +extern int lfs_subsys_pages; static void lfs_generic_callback(struct buf *, void (*)(struct buf *)); static void lfs_super_aiodone(struct buf *); @@ -206,6 +206,10 @@ lfs_vflush(struct vnode *vp) struct segment *sp; struct buf *bp, *nbp, *tbp, *tnbp; int error, s; + int flushed; +#if 0 + int redo; +#endif ip = VTOI(vp); fs = VFSTOUFS(vp->v_mount)->um_lfs; @@ -219,28 +223,57 @@ lfs_vflush(struct vnode *vp) /* * Toss any cleaning buffers that have real counterparts - * to avoid losing new data + * to avoid losing new data. */ s = splbio(); for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { nbp = LIST_NEXT(bp, b_vnbufs); - if (bp->b_flags & B_CALL) { - for (tbp = LIST_FIRST(&vp->v_dirtyblkhd); tbp; - tbp = tnbp) - { - tnbp = LIST_NEXT(tbp, b_vnbufs); - if (tbp->b_vp == bp->b_vp - && tbp->b_lblkno == bp->b_lblkno - && tbp != bp) - { - fs->lfs_avail += btofsb(fs, bp->b_bcount); + if (!LFS_IS_MALLOC_BUF(bp)) + continue; +#ifdef LFS_UBC + /* + * In the UBC case, look for *pages* matching + * the range covered by cleaning blocks. + */ + if (bp->b_lblkno > 0 && vp->v_type == VREG && + vp != fs->lfs_ivnode) { + struct vm_page *pg; + voff_t off; + + for (off = lblktosize(fs, bp->b_lblkno); + off < lblktosize(fs, bp->b_lblkno + 1); + off += PAGE_SIZE) { + pg = uvm_pagelookup(&vp->v_uobj, off); + if (pg && pmap_is_modified(pg)) { + fs->lfs_avail += btofsb(fs, + bp->b_bcount); wakeup(&fs->lfs_avail); - lfs_freebuf(bp); + lfs_freebuf(fs, bp); bp = NULL; - break; + goto nextbp; } } } +#endif + for (tbp = LIST_FIRST(&vp->v_dirtyblkhd); tbp; + tbp = tnbp) + { + tnbp = LIST_NEXT(tbp, b_vnbufs); + if (tbp->b_vp == bp->b_vp + && tbp->b_lblkno == bp->b_lblkno + && tbp != bp) + { + fs->lfs_avail += btofsb(fs, + bp->b_bcount); + wakeup(&fs->lfs_avail); + lfs_freebuf(fs, bp); + bp = NULL; + break; + } + } +#ifdef LFS_UBC + nextbp: +#endif } splx(s); } @@ -272,9 +305,7 @@ lfs_vflush(struct vnode *vp) } /* Copied from lfs_writeseg */ if (bp->b_flags & B_CALL) { - /* if B_CALL, it was created with newbuf */ - lfs_freebuf(bp); - bp = NULL; + biodone(bp); } else { bremfree(bp); LFS_UNLOCK_BUF(bp); @@ -305,16 +336,19 @@ lfs_vflush(struct vnode *vp) } sp = fs->lfs_sp; - if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { + flushed = 0; + if (VPISEMPTY(vp)) { lfs_writevnodes(fs, vp->v_mount, sp, VN_EMPTY); + ++flushed; } else if ((ip->i_flag & IN_CLEANING) && (fs->lfs_sp->seg_flags & SEGM_CLEAN)) { #ifdef DEBUG_LFS ivndebug(vp,"vflush/clean"); #endif lfs_writevnodes(fs, vp->v_mount, sp, VN_CLEAN); + ++flushed; } else if (lfs_dostats) { - if (LIST_FIRST(&vp->v_dirtyblkhd) || (VTOI(vp)->i_flag & IN_ALLMOD)) + if (!VPISEMPTY(vp) || (VTOI(vp)->i_flag & IN_ALLMOD)) ++lfs_stats.vflush_invoked; #ifdef DEBUG_LFS ivndebug(vp,"vflush"); @@ -333,13 +367,24 @@ lfs_vflush(struct vnode *vp) } #endif +#if 1 /* XXX */ do { do { if (LIST_FIRST(&vp->v_dirtyblkhd) != NULL) lfs_writefile(fs, sp, vp); } while (lfs_writeinode(fs, sp, ip)); } while (lfs_writeseg(fs, sp) && ip->i_number == LFS_IFILE_INUM); - +#else + if (flushed && vp != fs->lfs_ivnode) + lfs_writeseg(fs, sp); + else do { + fs->lfs_flags &= ~LFS_IFDIRTY; + lfs_writefile(fs, sp, vp); + redo = lfs_writeinode(fs, sp, ip); + redo += lfs_writeseg(fs, sp); + redo += (fs->lfs_flags & LFS_IFDIRTY); + } while (redo && vp == fs->lfs_ivnode); +#endif if (lfs_dostats) { ++lfs_stats.nwrites; if (sp->seg_flags & SEGM_SYNC) @@ -418,7 +463,7 @@ lfs_writevnodes(struct lfs *fs, struct mount *mp, struct segment *sp, int op) continue; } - if (op == VN_EMPTY && LIST_FIRST(&vp->v_dirtyblkhd)) { + if (op == VN_EMPTY && !VPISEMPTY(vp)) { vndebug(vp,"empty"); continue; } @@ -439,17 +484,12 @@ lfs_writevnodes(struct lfs *fs, struct mount *mp, struct segment *sp, int op) /* * Write the inode/file if dirty and it's not the IFILE. */ - if ((ip->i_flag & IN_ALLMOD) || - (LIST_FIRST(&vp->v_dirtyblkhd) != NULL)) - { + if ((ip->i_flag & IN_ALLMOD) || !VPISEMPTY(vp)) { only_cleaning = ((ip->i_flag & IN_ALLMOD) == IN_CLEANING); - if (ip->i_number != LFS_IFILE_INUM - && LIST_FIRST(&vp->v_dirtyblkhd) != NULL) - { + if (ip->i_number != LFS_IFILE_INUM) lfs_writefile(fs, sp, vp); - } - if (LIST_FIRST(&vp->v_dirtyblkhd) != NULL) { + if (!VPISEMPTY(vp)) { if (WRITEINPROG(vp)) { #ifdef DEBUG_LFS ivndebug(vp,"writevnodes/write2"); @@ -490,6 +530,7 @@ lfs_segwrite(struct mount *mp, int flags) int writer_set = 0; int dirty; int redo; + int loopcount; fs = VFSTOUFS(mp)->um_lfs; @@ -550,11 +591,12 @@ lfs_segwrite(struct mount *mp, int flags) if ((error = tsleep(&fs->lfs_writer, PRIBIO + 1, "lfs writer", 0))) { + printf("segwrite mysterious error\n"); /* XXX why not segunlock? */ - free(sp->bpp, M_SEGMENT); + pool_put(&fs->lfs_bpppool, sp->bpp); sp->bpp = NULL; - free(sp, M_SEGMENT); - fs->lfs_sp = NULL; + pool_put(&fs->lfs_segpool, sp); + sp = fs->lfs_sp = NULL; return (error); } fs->lfs_writer++; @@ -613,31 +655,28 @@ lfs_segwrite(struct mount *mp, int flags) did_ckp = 0; if (do_ckp || fs->lfs_doifile) { + loopcount = 10; do { vp = fs->lfs_ivnode; - vget(vp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY); #ifdef DEBUG LFS_ENTER_LOG("pretend", __FILE__, __LINE__, 0, 0); #endif fs->lfs_flags &= ~LFS_IFDIRTY; ip = VTOI(vp); + if (LIST_FIRST(&vp->v_dirtyblkhd) != NULL) lfs_writefile(fs, sp, vp); + if (ip->i_flag & IN_ALLMOD) ++did_ckp; redo = lfs_writeinode(fs, sp, ip); - - vput(vp); - /* - * if we know we'll redo, no need to writeseg here. - */ - if (!(redo && do_ckp)) { - redo += lfs_writeseg(fs, sp); - } + redo += lfs_writeseg(fs, sp); redo += (fs->lfs_flags & LFS_IFDIRTY); - } while (redo && do_ckp); + } while (redo && do_ckp && --loopcount > 0); + if (loopcount <= 0) + printf("lfs_segwrite: possibly invalid checkpoint!\n"); /* The ifile should now be all clear */ if (do_ckp && LIST_FIRST(&vp->v_dirtyblkhd)) { @@ -670,7 +709,10 @@ lfs_segwrite(struct mount *mp, int flags) * At the moment, the user's process hangs around so we can * sleep. */ - fs->lfs_doifile = 0; + if (loopcount <= 0) + fs->lfs_doifile = 1; + else + fs->lfs_doifile = 0; if (writer_set && --fs->lfs_writer == 0) wakeup(&fs->lfs_dirops); @@ -738,10 +780,29 @@ lfs_writefile(struct lfs *fs, struct segment *sp, struct vnode *vp) * The same is true of the Ifile since checkpoints assume * that all valid Ifile blocks are written. */ - if (IS_FLUSHING(fs,vp) || vp == fs->lfs_ivnode) + if (IS_FLUSHING(fs,vp) || vp == fs->lfs_ivnode) { lfs_gather(fs, sp, vp, lfs_match_data); - } else + /* + * Don't call VOP_PUTPAGES: if we're flushing, + * we've already done it, and the Ifile doesn't + * use the page cache. + */ + } + } else { lfs_gather(fs, sp, vp, lfs_match_data); +#ifdef LFS_UBC + /* + * If we're flushing, we've already called VOP_PUTPAGES + * so don't do it again. Otherwise, we want to write + * everything we've got. + */ + if (!IS_FLUSHING(fs, vp)) { + VOP_PUTPAGES(vp, 0, 0, + PGO_CLEANIT | PGO_ALLPAGES | PGO_LOCKED | + PGO_BUSYFAIL); + } +#endif + } /* * It may not be necessary to write the meta-data blocks at this point, @@ -865,6 +926,10 @@ lfs_writeinode(struct lfs *fs, struct segment *sp, struct inode *ip) /* * If we are cleaning, ensure that we don't write UNWRITTEN disk * addresses to disk; possibly revert the inode size. + * XXX By not writing these blocks, we are making the lfs_avail + * XXX count on disk wrong by the same amount. We should be + * XXX able to "borrow" from lfs_avail and return it after the + * XXX Ifile is written. See also in lfs_writeseg. */ if (ip->i_lfs_effnblks != ip->i_ffs_blocks) { cdp->di_size = ip->i_lfs_osize; @@ -992,7 +1057,7 @@ lfs_writeinode(struct lfs *fs, struct segment *sp, struct inode *ip) (ino == LFS_IFILE_INUM && !(bp->b_flags & B_GATHERED)); if (redo_ifile) fs->lfs_flags |= LFS_IFDIRTY; - error = LFS_BWRITE_LOG(bp); /* Ifile */ + LFS_WRITESEGENTRY(sup, fs, oldsn, bp); /* Ifile */ } return (redo_ifile); } @@ -1002,7 +1067,8 @@ lfs_gatherblock(struct segment *sp, struct buf *bp, int *sptr) { struct lfs *fs; int version; - + int j, blksinblk; + /* * If full, finish this segment. We may be doing I/O, so * release and reacquire the splbio(). @@ -1012,7 +1078,8 @@ lfs_gatherblock(struct segment *sp, struct buf *bp, int *sptr) panic ("lfs_gatherblock: Null vp in segment"); #endif fs = sp->fs; - if (sp->sum_bytes_left < sizeof(int32_t) || + blksinblk = howmany(bp->b_bcount, fs->lfs_bsize); + if (sp->sum_bytes_left < sizeof(int32_t) * blksinblk || sp->seg_bytes_left < bp->b_bcount) { if (sptr) splx(*sptr); @@ -1045,7 +1112,9 @@ lfs_gatherblock(struct segment *sp, struct buf *bp, int *sptr) bp->b_flags &= ~B_DONE; *sp->cbpp++ = bp; - sp->fip->fi_blocks[sp->fip->fi_nblocks++] = bp->b_lblkno; + for (j = 0; j < blksinblk; j++) + sp->fip->fi_blocks[sp->fip->fi_nblocks++] = bp->b_lblkno + + (j << fs->lfs_fbshift); sp->sum_bytes_left -= sizeof(int32_t); sp->seg_bytes_left -= bp->b_bcount; @@ -1128,6 +1197,135 @@ loop: for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { return count; } +#if DEBUG +# define DEBUG_OOFF(n) do { \ + if (ooff == 0) { \ + printf("lfs_updatemeta[%d]: warning: writing " \ + "ino %d lbn %" PRId64 " at 0x%" PRIx64 \ + ", was 0x0\n", (n), ip->i_number, lbn, daddr); \ + } \ +} while(0) +#else +# define DEBUG_OOFF(n) +#endif + +/* + * Change the given block's address to ndaddr, finding its previous + * location using ufs_bmaparray(). + * + * Account for this change in the segment table. + */ +void +lfs_update_single(struct lfs *fs, struct segment *sp, daddr_t lbn, + int32_t ndaddr, int size, int num) +{ + SEGUSE *sup; + struct buf *bp; + struct indir a[NIADDR + 2], *ap; + struct inode *ip; + struct vnode *vp; + daddr_t daddr, ooff; + int error; + int bb, osize, obb; + + vp = sp->vp; + ip = VTOI(vp); + + error = ufs_bmaparray(vp, lbn, &daddr, a, &num, NULL); + if (error) + panic("lfs_updatemeta: ufs_bmaparray returned %d", error); + if (daddr > 0) + daddr = dbtofsb(fs, daddr); + + bb = fragstofsb(fs, numfrags(fs, size)); + switch (num) { + case 0: + ooff = ip->i_ffs_db[lbn]; + DEBUG_OOFF(0); + if (ooff == UNWRITTEN) + ip->i_ffs_blocks += bb; + else { + /* possible fragment truncation or extension */ + obb = btofsb(fs, ip->i_lfs_fragsize[lbn]); + ip->i_ffs_blocks += (bb - obb); + } + ip->i_ffs_db[lbn] = ndaddr; + break; + case 1: + ooff = ip->i_ffs_ib[a[0].in_off]; + DEBUG_OOFF(1); + if (ooff == UNWRITTEN) + ip->i_ffs_blocks += bb; + ip->i_ffs_ib[a[0].in_off] = ndaddr; + break; + default: + ap = &a[num - 1]; + if (bread(vp, ap->in_lbn, fs->lfs_bsize, NOCRED, &bp)) + panic("lfs_updatemeta: bread bno %" PRId64, + ap->in_lbn); + + /* XXX ondisk32 */ + ooff = ((int32_t *)bp->b_data)[ap->in_off]; + DEBUG_OOFF(num); + if (ooff == UNWRITTEN) + ip->i_ffs_blocks += bb; + /* XXX ondisk32 */ + ((int32_t *)bp->b_data)[ap->in_off] = ndaddr; + (void) VOP_BWRITE(bp); + } + KASSERT(daddr < fs->lfs_lastpseg || daddr > ndaddr); + + /* + * Update segment usage information, based on old size + * and location. + */ + if (daddr > 0) { + u_int32_t oldsn = dtosn(fs, daddr); +#ifdef DIAGNOSTIC + int ndupino = (sp->seg_number == oldsn) ? + sp->ndupino : 0; +#endif + if (lbn >= 0 && lbn < NDADDR) + osize = ip->i_lfs_fragsize[lbn]; + else + osize = fs->lfs_bsize; + LFS_SEGENTRY(sup, fs, oldsn, bp); +#ifdef DIAGNOSTIC + if (sup->su_nbytes + DINODE_SIZE * ndupino < osize) { + printf("lfs_updatemeta: negative bytes " + "(segment %" PRIu32 " short by %" PRId64 + ")\n", dtosn(fs, daddr), + (int64_t)osize - + (DINODE_SIZE * sp->ndupino + + sup->su_nbytes)); + printf("lfs_updatemeta: ino %d, lbn %" PRId64 + ", addr = 0x%" PRIx64 "\n", + VTOI(sp->vp)->i_number, lbn, daddr); + printf("lfs_updatemeta: ndupino=%d\n", ndupino); + panic("lfs_updatemeta: negative bytes"); + sup->su_nbytes = osize - DINODE_SIZE * sp->ndupino; + } +#endif +#ifdef DEBUG_SU_NBYTES + printf("seg %" PRIu32 " -= %d for ino %d lbn %" PRId64 + " db 0x%" PRIx64 "\n", + dtosn(fs, daddr), osize, + VTOI(sp->vp)->i_number, lbn, daddr); +#endif + sup->su_nbytes -= osize; + if (!(bp->b_flags & B_GATHERED)) + fs->lfs_flags |= LFS_IFDIRTY; + LFS_WRITESEGENTRY(sup, fs, oldsn, bp); + } + /* + * Now that this block has a new address, and its old + * segment no longer owns it, we can forget about its + * old size. + */ + if (lbn >= 0 && lbn < NDADDR) + ip->i_lfs_fragsize[lbn] = size; +} + /* * Update the metadata that points to the blocks listed in the FINFO * array. @@ -1135,32 +1333,28 @@ loop: for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { void lfs_updatemeta(struct segment *sp) { - SEGUSE *sup; - struct buf *bp, *sbp; + struct buf *sbp; struct lfs *fs; struct vnode *vp; - struct indir a[NIADDR + 2], *ap; - struct inode *ip; - daddr_t daddr, lbn, off; - daddr_t ooff; - int error, i, nblocks, num; - int bb, osize, obb; + daddr_t lbn; + int i, nblocks, num; + int bb; + int bytesleft, size; vp = sp->vp; nblocks = &sp->fip->fi_blocks[sp->fip->fi_nblocks] - sp->start_lbp; - if (nblocks < 0) - panic("This is a bad thing"); - if (vp == NULL || nblocks == 0) + KASSERT(nblocks >= 0); + if (vp == NULL || nblocks == 0) return; - /* Sort the blocks. */ /* - * XXX KS - We have to sort even if the blocks come from the + * Sort the blocks. + * + * We have to sort even if the blocks come from the * cleaner, because there might be other pending blocks on the * same inode...and if we don't sort, and there are fragments * present, blocks may be written in the wrong place. */ - /* if (!(sp->seg_flags & SEGM_CLEAN)) */ lfs_shellsort(sp->start_bpp, sp->start_lbp, nblocks); /* @@ -1174,24 +1368,18 @@ lfs_updatemeta(struct segment *sp) * XXX true until lfs_markv is fixed to do everything with * XXX fake blocks (including fake inodes and fake indirect blocks). */ - sp->fip->fi_lastlength = sp->start_bpp[nblocks - 1]->b_bcount; + fs = sp->fs; + sp->fip->fi_lastlength = ((sp->start_bpp[nblocks - 1]->b_bcount - 1) & + fs->lfs_bmask) + 1; /* * Assign disk addresses, and update references to the logical * block and the segment usage information. */ - fs = sp->fs; for (i = nblocks; i--; ++sp->start_bpp) { - lbn = *sp->start_lbp++; sbp = *sp->start_bpp; - + lbn = *sp->start_lbp++; sbp->b_blkno = fsbtodb(fs, fs->lfs_offset); - off = fs->lfs_offset; - if (sbp->b_blkno == sbp->b_lblkno) { - printf("lfs_updatemeta: ino %d blk %" PRId64 - " has same lbn and daddr\n", - VTOI(vp)->i_number, off); - } /* * If we write a frag in the wrong place, the cleaner won't @@ -1200,124 +1388,24 @@ lfs_updatemeta(struct segment *sp) * that the indirect block that actually ends the list * is of a smaller size!) */ - if (sbp->b_bcount < fs->lfs_bsize && i != 0) + if ((sbp->b_bcount & fs->lfs_bmask) && i != 0) panic("lfs_updatemeta: fragment is not last block"); - - bb = fragstofsb(fs, numfrags(fs, sbp->b_bcount)); - fs->lfs_offset += bb; - error = ufs_bmaparray(vp, lbn, &daddr, a, &num, NULL); - if (daddr > 0) - daddr = dbtofsb(fs, daddr); - if (error) - panic("lfs_updatemeta: ufs_bmaparray %d", error); - ip = VTOI(vp); - switch (num) { - case 0: - ooff = ip->i_ffs_db[lbn]; -#ifdef DEBUG - if (ooff == 0) { - printf("lfs_updatemeta[1]: warning: writing " - "ino %d lbn %" PRId64 " at 0x%" PRIx64 - ", was 0x0\n", ip->i_number, lbn, off); - } -#endif - if (ooff == UNWRITTEN) - ip->i_ffs_blocks += bb; - else { - /* possible fragment truncation or extension */ - obb = btofsb(fs, ip->i_lfs_fragsize[lbn]); - ip->i_ffs_blocks += (bb - obb); - } - ip->i_ffs_db[lbn] = off; - break; - case 1: - ooff = ip->i_ffs_ib[a[0].in_off]; -#ifdef DEBUG - if (ooff == 0) { - printf("lfs_updatemeta[2]: warning: writing " - "ino %d lbn %" PRId64 " at 0x%" PRIx64 - ", was 0x0\n", ip->i_number, lbn, off); - } -#endif - if (ooff == UNWRITTEN) - ip->i_ffs_blocks += bb; - ip->i_ffs_ib[a[0].in_off] = off; - break; - default: - ap = &a[num - 1]; - if (bread(vp, ap->in_lbn, fs->lfs_bsize, NOCRED, &bp)) - panic("lfs_updatemeta: bread bno %" PRId64, - ap->in_lbn); - - /* XXX ondisk32 */ - ooff = ((int32_t *)bp->b_data)[ap->in_off]; -#if DEBUG - if (ooff == 0) { - printf("lfs_updatemeta[3]: warning: writing " - "ino %d lbn %" PRId64 " at 0x%" PRIx64 - ", was 0x0\n", ip->i_number, lbn, off); - } -#endif - if (ooff == UNWRITTEN) - ip->i_ffs_blocks += bb; - /* XXX ondisk32 */ - ((int32_t *)bp->b_data)[ap->in_off] = off; - (void) VOP_BWRITE(bp); - } -#ifdef DEBUG - if (daddr >= fs->lfs_lastpseg && daddr <= off) { - printf("lfs_updatemeta: ino %d, lbn %" PRId64 ", " - "addr = %" PRIx64 " in same pseg\n", - VTOI(sp->vp)->i_number, sbp->b_lblkno, daddr); - } -#endif + /* - * Update segment usage information, based on old size - * and location. + * For each subblock in this possibly oversized block, + * update its address on disk. */ - if (daddr > 0) { - u_int32_t oldsn = dtosn(fs, daddr); -#ifdef DIAGNOSTIC - int ndupino = (sp->seg_number == oldsn) ? - sp->ndupino : 0; -#endif - if (lbn >= 0 && lbn < NDADDR) - osize = ip->i_lfs_fragsize[lbn]; - else - osize = fs->lfs_bsize; - LFS_SEGENTRY(sup, fs, oldsn, bp); -#ifdef DIAGNOSTIC - if (sup->su_nbytes + DINODE_SIZE * ndupino < osize) { - printf("lfs_updatemeta: negative bytes " - "(segment %" PRIu32 " short by %d)\n", - dtosn(fs, daddr), - osize - sup->su_nbytes); - printf("lfs_updatemeta: ino %d, lbn %" PRId64 - ", addr = 0x%" PRIx64 "\n", - VTOI(sp->vp)->i_number, lbn, daddr); - printf("lfs_updatemeta: ndupino=%d\n", ndupino); - panic("lfs_updatemeta: negative bytes"); - sup->su_nbytes = osize; - } -#endif -#ifdef DEBUG_SU_NBYTES - printf("seg %" PRIu32 " -= %d for ino %d lbn %" PRId64 - " db 0x%" PRIx64 "\n", - dtosn(fs, daddr), osize, - VTOI(sp->vp)->i_number, lbn, daddr); -#endif - sup->su_nbytes -= osize; - if (!(bp->b_flags & B_GATHERED)) - fs->lfs_flags |= LFS_IFDIRTY; - error = LFS_BWRITE_LOG(bp); /* Ifile */ + KASSERT(lbn >= 0 || sbp->b_bcount == fs->lfs_bsize); + for (bytesleft = sbp->b_bcount; bytesleft > 0; + bytesleft -= fs->lfs_bsize) { + size = MIN(bytesleft, fs->lfs_bsize); + bb = fragstofsb(fs, numfrags(fs, size)); + lfs_update_single(fs, sp, lbn, fs->lfs_offset, + size, num); + fs->lfs_offset += bb; + ++lbn; } - /* - * Now that this block has a new address, and its old - * segment no longer owns it, we can forget about its - * old size. - */ - if (lbn >= 0 && lbn < NDADDR) - ip->i_lfs_fragsize[lbn] = sbp->b_bcount; + } } @@ -1347,8 +1435,10 @@ lfs_initseg(struct lfs *fs) lfs_newseg(fs); repeat = 1; fs->lfs_offset = fs->lfs_curseg; + sp->seg_number = dtosn(fs, fs->lfs_curseg); sp->seg_bytes_left = fsbtob(fs, fs->lfs_fsbpseg); + /* * If the segment contains a superblock, update the offset * and summary address to skip over it. @@ -1382,15 +1472,15 @@ lfs_initseg(struct lfs *fs) sp->cbpp = sp->bpp; #ifdef LFS_MALLOC_SUMMARY sbp = *sp->cbpp = lfs_newbuf(fs, VTOI(fs->lfs_ivnode)->i_devvp, - fsbtodb(fs, fs->lfs_offset), fs->lfs_sumsize); + fsbtodb(fs, fs->lfs_offset), fs->lfs_sumsize, LFS_NB_SUMMARY); sp->segsum = (*sp->cbpp)->b_data; #else sbp = *sp->cbpp = getblk(VTOI(fs->lfs_ivnode)->i_devvp, fsbtodb(fs, fs->lfs_offset), NBPG, 0, 0); - memset(sbp->b_data, 0x5a, NBPG); + /* memset(sbp->b_data, 0x5a, NBPG); */ sp->segsum = (*sp->cbpp)->b_data + NBPG - fs->lfs_sumsize; #endif - bzero(sp->segsum, fs->lfs_sumsize); + memset(sp->segsum, 0, fs->lfs_sumsize); sp->start_bpp = ++sp->cbpp; fs->lfs_offset += btofsb(fs, fs->lfs_sumsize); @@ -1436,14 +1526,14 @@ lfs_newseg(struct lfs *fs) sup->su_nbytes = 0; sup->su_nsums = 0; sup->su_ninos = 0; - (void) LFS_BWRITE_LOG(bp); /* Ifile */ + LFS_WRITESEGENTRY(sup, fs, dtosn(fs, fs->lfs_nextseg), bp); LFS_CLEANERINFO(cip, fs, bp); --cip->clean; ++cip->dirty; fs->lfs_nclean = cip->clean; LFS_SYNC_CLEANERINFO(cip, fs, bp, 1); - + fs->lfs_lastseg = fs->lfs_curseg; fs->lfs_curseg = fs->lfs_nextseg; for (sn = curseg = dtosn(fs, fs->lfs_curseg) + fs->lfs_interleave;;) { @@ -1452,7 +1542,12 @@ lfs_newseg(struct lfs *fs) panic("lfs_nextseg: no clean segments"); LFS_SEGENTRY(sup, fs, sn, bp); isdirty = sup->su_flags & SEGUSE_DIRTY; - brelse(bp); + /* Check SEGUSE_EMPTY as we go along */ + if (isdirty && sup->su_nbytes == 0 && !(sup->su_flags & SEGUSE_EMPTY)) + LFS_WRITESEGENTRY(sup, fs, sn, bp); + else + brelse(bp); + if (!isdirty) break; } @@ -1478,7 +1573,7 @@ lookahead_pagemove(struct buf **bpp, int nblocks, size_t *size) return bpp; #else while((bp = *bpp) != NULL && *size < maxsize && nblocks--) { - if(bp->b_flags & B_CALL) + if(LFS_IS_MALLOC_BUF(bp)) return bpp; if(bp->b_bcount % NBPG) return bpp; @@ -1503,6 +1598,8 @@ extern LIST_HEAD(bufhashhdr, buf) invalhash; #define binshash(bp, dp) LIST_INSERT_HEAD(dp, bp, b_hash) #define bremhash(bp) LIST_REMOVE(bp, b_hash) +extern int maxbpp; + static struct buf * lfs_newclusterbuf(struct lfs *fs, struct vnode *vp, daddr_t addr, int n) { @@ -1510,8 +1607,8 @@ lfs_newclusterbuf(struct lfs *fs, struct vnode *vp, daddr_t addr, int n) struct buf **bpp, *bp; int s; - cl = (struct lfs_cluster *)malloc(sizeof(*cl), M_SEGMENT, M_WAITOK); - bpp = (struct buf **)malloc(n*sizeof(*bpp), M_SEGMENT, M_WAITOK); + cl = (struct lfs_cluster *)pool_get(&fs->lfs_clpool, PR_WAITOK); + bpp = (struct buf **)pool_get(&fs->lfs_bpppool, PR_WAITOK); memset(cl, 0, sizeof(*cl)); cl->fs = fs; cl->bpp = bpp; @@ -1575,7 +1672,8 @@ lfs_writeseg(struct lfs *fs, struct segment *sp) SEGSUM *ssp; dev_t i_dev; char *datap, *dp; - int do_again, i, nblocks, s; + int i, s; + int do_again, nblocks, byteoffset; size_t el_size; struct lfs_cluster *cl; int (*strategy)(void *); @@ -1606,6 +1704,11 @@ lfs_writeseg(struct lfs *fs, struct segment *sp) if ((nblocks = sp->cbpp - sp->bpp) == 1) return (0); +#if 0 + printf("lfs_writeseg: %d blocks at 0x%x\n", nblocks, + dbtofsb(fs, sp->bpp[0]->b_blkno)); +#endif + i_dev = VTOI(fs->lfs_ivnode)->i_dev; devvp = VTOI(fs->lfs_ivnode)->i_devvp; @@ -1646,7 +1749,8 @@ lfs_writeseg(struct lfs *fs, struct segment *sp) fs->lfs_avail -= btofsb(fs, fs->lfs_sumsize); do_again = !(bp->b_flags & B_GATHERED); - (void)LFS_BWRITE_LOG(bp); /* Ifile */ + LFS_WRITESEGENTRY(sup, fs, sp->seg_number, bp); /* Ifile */ + /* * Mark blocks B_BUSY, to prevent then from being changed between * the checksum computation and the actual write. @@ -1657,9 +1761,11 @@ lfs_writeseg(struct lfs *fs, struct segment *sp) */ for (bpp = sp->bpp, i = nblocks - 1; i--;) { ++bpp; - if ((*bpp)->b_flags & B_CALL) - continue; bp = *bpp; + if (bp->b_flags & B_CALL) { /* UBC or malloced buffer */ + bp->b_flags |= B_BUSY; + continue; + } again: s = splbio(); if (bp->b_flags & B_BUSY) { @@ -1675,7 +1781,10 @@ lfs_writeseg(struct lfs *fs, struct segment *sp) } bp->b_flags |= B_BUSY; splx(s); - /* Check and replace indirect block UNWRITTEN bogosity */ + /* + * Check and replace indirect block UNWRITTEN bogosity. + * XXX See comment in lfs_writefile. + */ if (bp->b_lblkno < 0 && bp->b_vp != devvp && bp->b_vp && VTOI(bp->b_vp)->i_ffs_blocks != VTOI(bp->b_vp)->i_lfs_effnblks) { @@ -1687,11 +1796,10 @@ lfs_writeseg(struct lfs *fs, struct segment *sp) #endif /* Make a copy we'll make changes to */ newbp = lfs_newbuf(fs, bp->b_vp, bp->b_lblkno, - bp->b_bcount); + bp->b_bcount, LFS_NB_IBLOCK); newbp->b_blkno = bp->b_blkno; memcpy(newbp->b_data, bp->b_data, newbp->b_bcount); - *bpp = newbp; changed = 0; /* XXX ondisk32 */ @@ -1699,10 +1807,32 @@ lfs_writeseg(struct lfs *fs, struct segment *sp) daddrp < (int32_t *)(newbp->b_data + newbp->b_bcount); daddrp++) { if (*daddrp == UNWRITTEN) { - ++changed; #ifdef DEBUG_LFS - printf("lfs_writeseg: replacing UNWRITTEN\n"); -#endif + off_t doff; + int32_t ioff; + + ioff = daddrp - (int32_t *)(newbp->b_data); + doff = (-bp->b_lblkno + ioff) * fs->lfs_bsize; + printf("ino %d lbn %" PRId64 " entry %d off %" PRIx64 "\n", + VTOI(bp->b_vp)->i_number, + bp->b_lblkno, ioff, doff); +# ifdef LFS_UBC + if (bp->b_vp->v_type == VREG) { + /* + * What is up with this page? + */ + struct vm_page *pg; + for (; doff / fs->lfs_bsize == (-bp->b_lblkno + ioff); doff += PAGE_SIZE) { + pg = uvm_pagelookup(&bp->b_vp->v_uobj, doff); + if (pg == NULL) + printf(" page at %" PRIx64 " is NULL\n", doff); + else + printf(" page at %" PRIx64 " flags 0x%x pqflags 0x%x\n", doff, pg->flags, pg->pqflags); + } + } +# endif /* LFS_UBC */ +#endif /* DEBUG_LFS */ + ++changed; *daddrp = 0; } } @@ -1711,9 +1841,18 @@ lfs_writeseg(struct lfs *fs, struct segment *sp) * though, if it still has dirty data on it. */ if (changed) { - bp->b_flags &= ~(B_ERROR | B_GATHERED); +#ifdef DEBUG_LFS + printf("lfs_writeseg: replacing UNWRITTEN(%d):" + " bp = %p newbp = %p\n", changed, bp, + newbp); +#endif + *bpp = newbp; + bp->b_flags &= ~(B_ERROR | B_GATHERED | B_DONE); if (bp->b_flags & B_CALL) { - lfs_freebuf(bp); + printf("lfs_writeseg: indir bp should not be B_CALL\n"); + s = splbio(); + biodone(bp); + splx(s); bp = NULL; } else { /* Still on free list, leave it there */ @@ -1731,22 +1870,8 @@ lfs_writeseg(struct lfs *fs, struct segment *sp) fs->lfs_avail -= btofsb(fs, bp->b_bcount); } } else { - bp->b_flags &= ~(B_ERROR | B_READ | B_DELWRI | - B_GATHERED); - if (bp->b_flags & B_CALL) { - lfs_freebuf(bp); - bp = NULL; - } else { - bremfree(bp); - bp->b_flags |= B_DONE; - s = splbio(); - reassignbuf(bp, bp->b_vp); - splx(s); - LFS_UNLOCK_BUF(bp); - brelse(bp); - } + lfs_freebuf(fs, newbp); } - } } /* @@ -1757,21 +1882,31 @@ lfs_writeseg(struct lfs *fs, struct segment *sp) * XXX * Fix this to do it inline, instead of malloc/copy. */ + datap = dp = pool_get(&fs->lfs_bpppool, PR_WAITOK); if (fs->lfs_version == 1) el_size = sizeof(u_long); else el_size = sizeof(u_int32_t); - datap = dp = malloc(nblocks * el_size, M_SEGMENT, M_WAITOK); - for (bpp = sp->bpp, i = nblocks - 1; i--;) { - if (((*++bpp)->b_flags & (B_CALL|B_INVAL)) == (B_CALL|B_INVAL)) { - if (copyin((*bpp)->b_saveaddr, dp, el_size)) - panic("lfs_writeseg: copyin failed [1]: " - "ino %d blk %" PRId64, - VTOI((*bpp)->b_vp)->i_number, - (*bpp)->b_lblkno); - } else - memcpy(dp, (*bpp)->b_data, el_size); - dp += el_size; + for (bpp = sp->bpp, i = nblocks - 1; i--; ) { + ++bpp; + /* Loop through gop_write cluster blocks */ + for (byteoffset = 0; byteoffset < (*bpp)->b_bcount; + byteoffset += fs->lfs_bsize) { + if (((*bpp)->b_flags & (B_CALL | B_INVAL)) == + (B_CALL | B_INVAL)) { + if (copyin((caddr_t)(*bpp)->b_saveaddr + + byteoffset, dp, el_size)) { + panic("lfs_writeseg: copyin failed [1]: " + "ino %d blk %" PRId64, + VTOI((*bpp)->b_vp)->i_number, + (*bpp)->b_lblkno); + } + } else { + memcpy(dp, (*bpp)->b_data + byteoffset, + el_size); + } + dp += el_size; + } } if (fs->lfs_version == 1) ssp->ss_ocreate = time.tv_sec; @@ -1787,7 +1922,7 @@ lfs_writeseg(struct lfs *fs, struct segment *sp) ssp->ss_datasum = cksum(datap, (nblocks - 1) * el_size); ssp->ss_sumsum = cksum(&ssp->ss_datasum, fs->lfs_sumsize - sizeof(ssp->ss_sumsum)); - free(datap, M_SEGMENT); + pool_put(&fs->lfs_bpppool, datap); datap = dp = NULL; #ifdef DIAGNOSTIC if (fs->lfs_bfree < btofsb(fs, ninos * fs->lfs_ibsize) + btofsb(fs, fs->lfs_sumsize)) @@ -1854,7 +1989,7 @@ lfs_writeseg(struct lfs *fs, struct segment *sp) if(use_pagemove == 0) { cl->flags |= LFS_CL_MALLOC; cl->olddata = cbp->b_data; - cbp->b_data = malloc(CHUNKSIZE, M_SEGMENT, M_WAITOK); + cbp->b_data = lfs_malloc(fs, CHUNKSIZE, LFS_NB_CLUSTER); } #if defined(DEBUG) && defined(DIAGNOSTIC) if(dtosn(fs, dbtofsb(fs, (*bpp)->b_blkno + btodb((*bpp)->b_bcount - 1))) != @@ -1870,12 +2005,6 @@ lfs_writeseg(struct lfs *fs, struct segment *sp) /* * Construct the cluster. */ - while (fs->lfs_iocount >= LFS_THROTTLE) { -#ifdef DEBUG_LFS - printf("[%d]", fs->lfs_iocount); -#endif - tsleep(&fs->lfs_iocount, PRIBIO+1, "lfs_throttle", 0); - } ++fs->lfs_iocount; for (p = cbp->b_data; i && cbp->b_bcount < CHUNKSIZE; i--) { @@ -1884,6 +2013,17 @@ lfs_writeseg(struct lfs *fs, struct segment *sp) if (bp->b_bcount > (CHUNKSIZE - cbp->b_bcount)) break; +#ifdef DIAGNOSTIC + if (dtosn(fs, dbtofsb(fs, bp->b_blkno + + btodb(bp->b_bcount - 1))) != + sp->seg_number) { + printf("blk size %ld daddr %" PRIx64 " not in seg %d\n", + bp->b_bcount, bp->b_blkno, + sp->seg_number); + panic("segment overwrite"); + } +#endif + /* * Fake buffers from the cleaner are marked as B_INVAL. * We need to copy the data from user space rather than @@ -1939,7 +2079,7 @@ lfs_writeseg(struct lfs *fs, struct segment *sp) bp->b_flags &= ~(B_DELWRI | B_READ | B_ERROR); #ifdef LFS_MNOBUSY if (cl->flags & LFS_CL_MALLOC) { - if (!(bp->b_flags & B_CALL)) + if (!LFS_IS_MALLOC_BUF(bp))) brelse(bp); /* Still B_LOCKED */ } #endif @@ -1966,7 +2106,7 @@ lfs_writeseg(struct lfs *fs, struct segment *sp) printf("lfs_writeseg: marking ino %d\n", ip->i_number); #endif - if (bp->b_flags & B_CALL) + if (LFS_IS_MALLOC_BUF(bp)) LFS_SET_UINO(ip, IN_CLEANING); else LFS_SET_UINO(ip, IN_MODIFIED); @@ -1980,7 +2120,7 @@ lfs_writeseg(struct lfs *fs, struct segment *sp) /* * In order to include the summary in a clustered block, * it may be necessary to shift the block forward (since - * summary blocks are in generay smaller than can be + * summary blocks are in general smaller than can be * addressed by pagemove(). After the write, the block * will be corrected before disassembly. */ @@ -2036,7 +2176,8 @@ lfs_writesuper(struct lfs *fs, daddr_t daddr) /* Checksum the superblock and copy it into a buffer. */ fs->lfs_cksum = lfs_sb_cksum(&(fs->lfs_dlfs)); - bp = lfs_newbuf(fs, VTOI(fs->lfs_ivnode)->i_devvp, fsbtodb(fs, daddr), LFS_SBPAD); + bp = lfs_newbuf(fs, VTOI(fs->lfs_ivnode)->i_devvp, fsbtodb(fs, daddr), LFS_SBPAD, LFS_NB_SBLOCK); + memset(bp->b_data + sizeof(struct dlfs), 0, LFS_SBPAD - sizeof(struct dlfs)); *(struct dlfs *)bp->b_data = fs->lfs_dlfs; bp->b_dev = i_dev; @@ -2062,9 +2203,17 @@ lfs_writesuper(struct lfs *fs, daddr_t daddr) int lfs_match_fake(struct lfs *fs, struct buf *bp) { - return (bp->b_flags & B_CALL); + return LFS_IS_MALLOC_BUF(bp); } +#if 0 +int +lfs_match_real(struct lfs *fs, struct buf *bp) +{ + return (lfs_match_data(fs, bp) && !lfs_match_fake(fs, bp)); +} +#endif + int lfs_match_data(struct lfs *fs, struct buf *bp) { @@ -2108,9 +2257,10 @@ lfs_match_tindir(struct lfs *fs, struct buf *bp) void lfs_callback(struct buf *bp) { - /* struct lfs *fs; */ - /* fs = (struct lfs *)bp->b_saveaddr; */ - lfs_freebuf(bp); + struct lfs *fs; + + fs = (struct lfs *)bp->b_saveaddr; + lfs_freebuf(fs, bp); } static void @@ -2121,9 +2271,9 @@ lfs_super_aiodone(struct buf *bp) fs = (struct lfs *)bp->b_saveaddr; fs->lfs_sbactive = 0; wakeup(&fs->lfs_sbactive); - if (--fs->lfs_iocount < LFS_THROTTLE) + if (--fs->lfs_iocount == 0) wakeup(&fs->lfs_iocount); - lfs_freebuf(bp); + lfs_freebuf(fs, bp); } static void @@ -2132,7 +2282,7 @@ lfs_cluster_aiodone(struct buf *bp) struct lfs_cluster *cl; struct lfs *fs; struct buf *tbp; - struct vnode *vp; + struct vnode *vp, *devvp; int s, error=0; char *cp; extern int locked_queue_count; @@ -2143,6 +2293,7 @@ lfs_cluster_aiodone(struct buf *bp) cl = (struct lfs_cluster *)bp->b_saveaddr; fs = cl->fs; + devvp = VTOI(fs->lfs_ivnode)->i_devvp; bp->b_saveaddr = cl->saveaddr; /* If shifted, shift back now */ @@ -2172,13 +2323,19 @@ lfs_cluster_aiodone(struct buf *bp) * the cluster was written, free it. Otherwise, keep it on * the locked list to be written again. */ + vp = tbp->b_vp; if ((tbp->b_flags & (B_LOCKED | B_DELWRI)) == B_LOCKED) LFS_UNLOCK_BUF(tbp); +#if 0 + else if (vp != devvp) + printf("dirtied while busy?! bp %p, ino %d, lbn %d\n", + tbp, vp ? VTOI(vp)->i_number : -1, + tbp->b_lblkno); +#endif tbp->b_flags &= ~B_GATHERED; LFS_BCLEAN_LOG(fs, tbp); - vp = tbp->b_vp; /* Segment summary for a shifted cluster */ if(!cl->bufcount && (cl->flags & LFS_CL_SHIFT)) tbp->b_flags |= B_INVAL; @@ -2197,7 +2354,30 @@ lfs_cluster_aiodone(struct buf *bp) } #endif if (tbp->b_flags & (B_BUSY | B_CALL)) { + if ((tbp->b_flags & B_CALL) && !LFS_IS_MALLOC_BUF(tbp)) { + /* printf("flags 0x%lx\n", tbp->b_flags); */ + /* + * A buffer from the page daemon. + * We use the same iodone as it does, + * so we must manually disassociate its + * buffers from the vp. + */ + if (tbp->b_vp) { + /* This is just silly */ + s = splbio(); + brelvp(tbp); + tbp->b_vp = vp; + splx(s); + } + /* Put it back the way it was */ + tbp->b_flags |= B_ASYNC; + /* Master buffers have B_AGE */ + if (tbp->b_private == tbp) + tbp->b_flags |= B_AGE; + } + s = splbio(); biodone(tbp); + splx(s); } } @@ -2209,7 +2389,7 @@ lfs_cluster_aiodone(struct buf *bp) (char *)bp->b_data, bp->b_bufsize); } if(cl->flags & LFS_CL_MALLOC) { - free(bp->b_data, M_SEGMENT); + lfs_free(fs, bp->b_data, LFS_NB_CLUSTER); bp->b_data = cl->olddata; } bp->b_bcount = 0; @@ -2231,23 +2411,12 @@ lfs_cluster_aiodone(struct buf *bp) if (fs->lfs_iocount == 0) panic("lfs_cluster_aiodone: zero iocount"); #endif - if (--fs->lfs_iocount < LFS_THROTTLE) + if (--fs->lfs_iocount == 0) wakeup(&fs->lfs_iocount); -#if 0 - if (fs->lfs_iocount == 0) { - /* - * Vinvalbuf can move locked buffers off the locked queue - * and we have no way of knowing about this. So, after - * doing a big write, we recalculate how many buffers are - * really still left on the locked queue. - */ - lfs_countlocked(&locked_queue_count, &locked_queue_bytes, "lfs_cluster_callback"); - wakeup(&locked_queue_count); - } -#endif - free(cl->bpp, M_SEGMENT); - free(cl, M_SEGMENT); + pool_put(&fs->lfs_bpppool, cl->bpp); + cl->bpp = NULL; + pool_put(&fs->lfs_clpool, cl); } static void @@ -2294,15 +2463,16 @@ lfs_shellsort(struct buf **bp_array, int32_t *lb_array, int nmemb) static int __rsshell_increments[] = { 4, 1, 0 }; int incr, *incrp, t1, t2; struct buf *bp_temp; - u_long lb_temp; + u_int32_t lbt, *lba; + lba = (u_int32_t *)lb_array; for (incrp = __rsshell_increments; (incr = *incrp++) != 0;) for (t1 = incr; t1 < nmemb; ++t1) for (t2 = t1 - incr; t2 >= 0;) - if (lb_array[t2] > lb_array[t2 + incr]) { - lb_temp = lb_array[t2]; - lb_array[t2] = lb_array[t2 + incr]; - lb_array[t2 + incr] = lb_temp; + if (lba[t2] > lba[t2 + incr]) { + lbt = lba[t2]; + lba[t2] = lba[t2 + incr]; + lba[t2 + incr] = lbt; bp_temp = bp_array[t2]; bp_array[t2] = bp_array[t2 + incr]; bp_array[t2 + incr] = bp_temp; diff --git a/sys/ufs/lfs/lfs_subr.c b/sys/ufs/lfs/lfs_subr.c index 8c7d0d0070db..bb146df9eb7b 100644 --- a/sys/ufs/lfs/lfs_subr.c +++ b/sys/ufs/lfs/lfs_subr.c @@ -1,7 +1,7 @@ -/* $NetBSD: lfs_subr.c,v 1.30 2003/01/29 13:14:35 yamt Exp $ */ +/* $NetBSD: lfs_subr.c,v 1.31 2003/02/17 23:48:20 perseant Exp $ */ /*- - * Copyright (c) 1999, 2000 The NetBSD Foundation, Inc. + * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -71,7 +71,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: lfs_subr.c,v 1.30 2003/01/29 13:14:35 yamt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: lfs_subr.c,v 1.31 2003/02/17 23:48:20 perseant Exp $"); #include #include @@ -86,6 +86,8 @@ __KERNEL_RCSID(0, "$NetBSD: lfs_subr.c,v 1.30 2003/01/29 13:14:35 yamt Exp $"); #include #include +#include + /* * Return buffer with the contents of block "offset" from the beginning of * directory "ip". If "res" is non-zero, fill it in with a pointer to the @@ -122,12 +124,177 @@ lfs_blkatoff(void *v) return (0); } +#ifdef LFS_DEBUG_MALLOC +char *lfs_res_names[LFS_NB_COUNT] = { + "summary", + "superblock", + "ifile block", + "cluster", + "clean", +}; +#endif + +int lfs_res_qty[LFS_NB_COUNT] = { + LFS_N_SUMMARIES, + LFS_N_SBLOCKS, + LFS_N_IBLOCKS, + LFS_N_CLUSTERS, + LFS_N_CLEAN, +}; + +void +lfs_setup_resblks(struct lfs *fs) +{ + int i, j; + int maxbpp; + + fs->lfs_resblk = (res_t *)malloc(LFS_N_TOTAL * sizeof(res_t), M_SEGMENT, + M_WAITOK); + for (i = 0; i < LFS_N_TOTAL; i++) { + fs->lfs_resblk[i].inuse = 0; + fs->lfs_resblk[i].p = NULL; + } + for (i = 0; i < LFS_RESHASH_WIDTH; i++) + LIST_INIT(fs->lfs_reshash + i); + + /* + * These types of allocations can be larger than a page, + * so we can't use the pool subsystem for them. + */ + for (i = 0, j = 0; j < LFS_N_SUMMARIES; j++, i++) + fs->lfs_resblk[i].p = malloc(fs->lfs_sumsize, M_SEGMENT, + M_WAITOK); + for (j = 0; j < LFS_N_SBLOCKS; j++, i++) + fs->lfs_resblk[i].p = malloc(LFS_SBPAD, M_SEGMENT, M_WAITOK); + for (j = 0; j < LFS_N_IBLOCKS; j++, i++) + fs->lfs_resblk[i].p = malloc(fs->lfs_bsize, M_SEGMENT, M_WAITOK); + for (j = 0; j < LFS_N_CLUSTERS; j++, i++) + fs->lfs_resblk[i].p = malloc(MAXPHYS, M_SEGMENT, M_WAITOK); + for (j = 0; j < LFS_N_CLEAN; j++, i++) + fs->lfs_resblk[i].p = malloc(MAXPHYS, M_SEGMENT, M_WAITOK); + + /* + * Initialize pools for small types (XXX is BPP small?) + */ + maxbpp = ((fs->lfs_sumsize - SEGSUM_SIZE(fs)) / sizeof(int32_t) + 2); + maxbpp = MIN(maxbpp, fs->lfs_ssize / fs->lfs_fsize + 2); + pool_init(&fs->lfs_bpppool, maxbpp * sizeof(struct buf *), 0, 0, + LFS_N_BPP, "lfsbpppl", &pool_allocator_nointr); + pool_init(&fs->lfs_clpool, sizeof(struct lfs_cluster), 0, 0, + LFS_N_CL, "lfsclpl", &pool_allocator_nointr); + pool_init(&fs->lfs_segpool, sizeof(struct segment), 0, 0, + LFS_N_SEG, "lfssegpool", &pool_allocator_nointr); +} + +void +lfs_free_resblks(struct lfs *fs) +{ + int i; + + pool_destroy(&fs->lfs_bpppool); + pool_destroy(&fs->lfs_segpool); + pool_destroy(&fs->lfs_clpool); + + for (i = 0; i < LFS_N_TOTAL; i++) { + while(fs->lfs_resblk[i].inuse) + tsleep(&fs->lfs_resblk, PRIBIO + 1, "lfs_free", 0); + if (fs->lfs_resblk[i].p != NULL) + free(fs->lfs_resblk[i].p, M_SEGMENT); + } + free(fs->lfs_resblk, M_SEGMENT); +} + +static unsigned int +lfs_mhash(void *vp) +{ + return (unsigned int)(((unsigned long)vp) >> 2) % LFS_RESHASH_WIDTH; +} + +/* + * Return memory of the given size for the given purpose, or use one of a + * number of spare last-resort buffers, if malloc returns NULL. + */ +void * +lfs_malloc(struct lfs *fs, size_t size, int type) +{ + struct lfs_res_blk *re; + void *r; + int i, s, start; + unsigned int h; + + /* If no mem allocated for this type, it just waits */ + if (lfs_res_qty[type] == 0) + return malloc(size, M_SEGMENT, M_WAITOK); + + /* Otherwise try a quick malloc, and if it works, great */ + if ((r = malloc(size, M_SEGMENT, M_NOWAIT)) != NULL) + return r; + + /* + * If malloc returned NULL, we are forced to use one of our + * reserve blocks. We have on hand at least one summary block, + * at least one cluster block, at least one superblock, + * and several indirect blocks. + */ + /* skip over blocks of other types */ + for (i = 0, start = 0; i < type; i++) + start += lfs_res_qty[i]; + while (r == NULL) { + for (i = 0; i < lfs_res_qty[type]; i++) { + if (fs->lfs_resblk[start + i].inuse == 0) { + re = fs->lfs_resblk + start + i; + re->inuse = 1; + r = re->p; + h = lfs_mhash(r); + s = splbio(); + LIST_INSERT_HEAD(&fs->lfs_reshash[h], re, res); + splx(s); + return r; + } + } +#ifdef LFS_DEBUG_MALLOC + printf("sleeping on %s (%d)\n", lfs_res_names[type], lfs_res_qty[type]); +#endif + tsleep(&fs->lfs_resblk, PVM, "lfs_malloc", 0); +#ifdef LFS_DEBUG_MALLOC + printf("done sleeping on %s\n", lfs_res_names[type]); +#endif + } + /* NOTREACHED */ + return r; +} + +void +lfs_free(struct lfs *fs, void *p, int type) +{ + int s; + unsigned int h; + res_t *re; + + h = lfs_mhash(p); + s = splbio(); + LIST_FOREACH(re, &fs->lfs_reshash[h], res) { + if (re->p == p) { + LIST_REMOVE(re, res); + re->inuse = 0; + wakeup(&fs->lfs_resblk); + splx(s); + return; + } + } + splx(s); + + /* + * If we didn't find it, free it. + */ + free(p, M_SEGMENT); +} /* * lfs_seglock -- * Single thread the segment writer. */ -void +int lfs_seglock(struct lfs *fs, unsigned long flags) { struct segment *sp; @@ -136,8 +303,10 @@ lfs_seglock(struct lfs *fs, unsigned long flags) if (fs->lfs_lockpid == curproc->p_pid) { ++fs->lfs_seglock; fs->lfs_sp->seg_flags |= flags; - return; - } else while (fs->lfs_seglock) + return 0; + } else if (flags & SEGM_PAGEDAEMON) + return EWOULDBLOCK; + else while (fs->lfs_seglock) (void)tsleep(&fs->lfs_seglock, PRIBIO + 1, "lfs seglock", 0); } @@ -148,10 +317,8 @@ lfs_seglock(struct lfs *fs, unsigned long flags) /* Drain fragment size changes out */ lockmgr(&fs->lfs_fraglock, LK_EXCLUSIVE, 0); - sp = fs->lfs_sp = malloc(sizeof(struct segment), M_SEGMENT, M_WAITOK); - sp->bpp = malloc(((fs->lfs_sumsize - SEGSUM_SIZE(fs)) / - sizeof(int32_t) + 1) * sizeof(struct buf *), - M_SEGMENT, M_WAITOK); + sp = fs->lfs_sp = pool_get(&fs->lfs_segpool, PR_WAITOK); + sp->bpp = pool_get(&fs->lfs_bpppool, PR_WAITOK); sp->seg_flags = flags; sp->vp = NULL; sp->seg_iocount = 0; @@ -164,8 +331,70 @@ lfs_seglock(struct lfs *fs, unsigned long flags) * the writes we intend to do. */ ++fs->lfs_iocount; + return 0; } +static void lfs_unmark_dirop(struct lfs *); + +static void +lfs_unmark_dirop(struct lfs *fs) +{ + struct inode *ip, *nip; + struct vnode *vp; + extern int lfs_dirvcount; + + for (ip = TAILQ_FIRST(&fs->lfs_dchainhd); ip != NULL; ip = nip) { + nip = TAILQ_NEXT(ip, i_lfs_dchain); + vp = ITOV(ip); + + if (VOP_ISLOCKED(vp) && + vp->v_lock.lk_lockholder != curproc->p_pid) { + continue; + } + if ((VTOI(vp)->i_flag & IN_ADIROP) == 0) { + --lfs_dirvcount; + vp->v_flag &= ~VDIROP; + TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain); + wakeup(&lfs_dirvcount); + fs->lfs_unlockvp = vp; + vrele(vp); + fs->lfs_unlockvp = NULL; + } + } +} + +#ifndef LFS_NO_AUTO_SEGCLEAN +static void +lfs_auto_segclean(struct lfs *fs) +{ + int i, error; + + /* + * Now that we've swapped lfs_activesb, but while we still + * hold the segment lock, run through the segment list marking + * the empty ones clean. + * XXX - do we really need to do them all at once? + */ + for (i = 0; i < fs->lfs_nseg; i++) { + if ((fs->lfs_suflags[0][i] & + (SEGUSE_ACTIVE | SEGUSE_DIRTY | SEGUSE_EMPTY)) == + (SEGUSE_DIRTY | SEGUSE_EMPTY) && + (fs->lfs_suflags[1][i] & + (SEGUSE_ACTIVE | SEGUSE_DIRTY | SEGUSE_EMPTY)) == + (SEGUSE_DIRTY | SEGUSE_EMPTY)) { + + if ((error = lfs_do_segclean(fs, i)) != 0) { +#ifdef DEBUG + printf("lfs_auto_segclean: lfs_do_segclean returned %d for seg %d\n", error, i); +#endif /* DEBUG */ + } + } + fs->lfs_suflags[1 - fs->lfs_activesb][i] = + fs->lfs_suflags[fs->lfs_activesb][i]; + } +} +#endif /* LFS_AUTO_SEGCLEAN */ + /* * lfs_segunlock -- * Single thread the segment writer. @@ -176,9 +405,6 @@ lfs_segunlock(struct lfs *fs) struct segment *sp; unsigned long sync, ckp; struct buf *bp; - struct vnode *vp, *nvp; - struct mount *mp; - extern int lfs_dirvcount; #ifdef LFS_MALLOC_SUMMARY extern int locked_queue_count; extern long locked_queue_bytes; @@ -186,63 +412,9 @@ lfs_segunlock(struct lfs *fs) sp = fs->lfs_sp; - if (fs->lfs_seglock == 1 && !(sp->seg_flags & SEGM_PROT)) { - - mp = fs->lfs_ivnode->v_mount; - /* - * Go through and unmark all DIROP vnodes, possibly - * calling VOP_INACTIVE (through vrele). This is - * delayed until now in order not to accidentally - * write a DIROP node through lfs_flush. - */ -#ifndef LFS_NO_BACKVP_HACK - /* BEGIN HACK */ -#define VN_OFFSET (((caddr_t)&LIST_NEXT(vp, v_mntvnodes)) - (caddr_t)vp) -#define BACK_VP(VP) ((struct vnode *)(((caddr_t)(VP)->v_mntvnodes.le_prev) - VN_OFFSET)) -#define BEG_OF_VLIST ((struct vnode *)(((caddr_t)&LIST_FIRST(&mp->mnt_vnodelist)) - VN_OFFSET)) - - /* Find last vnode. */ - loop: for (vp = LIST_FIRST(&mp->mnt_vnodelist); - vp && LIST_NEXT(vp, v_mntvnodes) != NULL; - vp = LIST_NEXT(vp, v_mntvnodes)); - for (; vp && vp != BEG_OF_VLIST; vp = nvp) { - nvp = BACK_VP(vp); -#else - loop: - for (vp = LIST_FIRST(&mp->mnt_vnodelist); - vp != NULL; - vp = nvp) { - nvp = LIST_NEXT(vp, v_mntvnodes); -#endif - if (vp->v_mount != mp) { - printf("lfs_segunlock: starting over\n"); - goto loop; - } - if (vp->v_type == VNON) - continue; - if (lfs_vref(vp)) - continue; - if (VOP_ISLOCKED(vp) && - vp->v_lock.lk_lockholder != curproc->p_pid) { - lfs_vunref(vp); - continue; - } - if ((vp->v_flag & VDIROP) && - !(VTOI(vp)->i_flag & IN_ADIROP)) { - --lfs_dirvcount; - vp->v_flag &= ~VDIROP; - wakeup(&lfs_dirvcount); - fs->lfs_unlockvp = vp; - lfs_vunref(vp); - vrele(vp); - fs->lfs_unlockvp = NULL; - } else { - lfs_vunref(vp); - } - } - } - if (fs->lfs_seglock == 1) { + if ((sp->seg_flags & SEGM_PROT) == 0) + lfs_unmark_dirop(fs); sync = sp->seg_flags & SEGM_SYNC; ckp = sp->seg_flags & SEGM_CKP; if (sp->bpp != sp->cbpp) { @@ -250,7 +422,7 @@ lfs_segunlock(struct lfs *fs) fs->lfs_offset -= btofsb(fs, fs->lfs_sumsize); bp = *sp->bpp; #ifdef LFS_MALLOC_SUMMARY - lfs_freebuf(bp); + lfs_freebuf(fs, bp); #else s = splbio(); bremfree(bp); @@ -263,11 +435,11 @@ lfs_segunlock(struct lfs *fs) } else printf ("unlock to 0 with no summary"); - free(sp->bpp, M_SEGMENT); + pool_put(&fs->lfs_bpppool, sp->bpp); sp->bpp = NULL; /* The sync case holds a reference in `sp' to be freed below */ if (!sync) - free(sp, M_SEGMENT); + pool_put(&fs->lfs_segpool, sp); fs->lfs_sp = NULL; /* @@ -275,9 +447,7 @@ lfs_segunlock(struct lfs *fs) * At the moment, the user's process hangs around so we can * sleep. */ - if (--fs->lfs_iocount < LFS_THROTTLE) - wakeup(&fs->lfs_iocount); - if(fs->lfs_iocount == 0) { + if (--fs->lfs_iocount == 0) { lfs_countlocked(&locked_queue_count, &locked_queue_bytes, "lfs_segunlock"); wakeup(&locked_queue_count); @@ -309,15 +479,18 @@ lfs_segunlock(struct lfs *fs) /* printf("sleeping on iocount %x == %d\n", sp, sp->seg_iocount); */ } if (sync) - free(sp, M_SEGMENT); + pool_put(&fs->lfs_segpool, sp); if (ckp) { fs->lfs_nactive = 0; /* If we *know* everything's on disk, write both sbs */ + /* XXX should wait for this one */ if (sync) - lfs_writesuper(fs,fs->lfs_sboffs[fs->lfs_activesb]); + lfs_writesuper(fs, fs->lfs_sboffs[fs->lfs_activesb]); + lfs_writesuper(fs, fs->lfs_sboffs[1 - fs->lfs_activesb]); +#ifndef LFS_NO_AUTO_SEGCLEAN + lfs_auto_segclean(fs); +#endif fs->lfs_activesb = 1 - fs->lfs_activesb; - lfs_writesuper(fs,fs->lfs_sboffs[fs->lfs_activesb]); - --fs->lfs_seglock; fs->lfs_lockpid = 0; wakeup(&fs->lfs_seglock); diff --git a/sys/ufs/lfs/lfs_syscalls.c b/sys/ufs/lfs/lfs_syscalls.c index 77e157bf1c1c..37b8e5b52c8d 100644 --- a/sys/ufs/lfs/lfs_syscalls.c +++ b/sys/ufs/lfs/lfs_syscalls.c @@ -1,7 +1,7 @@ -/* $NetBSD: lfs_syscalls.c,v 1.79 2003/01/24 21:55:28 fvdl Exp $ */ +/* $NetBSD: lfs_syscalls.c,v 1.80 2003/02/17 23:48:20 perseant Exp $ */ /*- - * Copyright (c) 1999, 2000 The NetBSD Foundation, Inc. + * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -71,7 +71,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: lfs_syscalls.c,v 1.79 2003/01/24 21:55:28 fvdl Exp $"); +__KERNEL_RCSID(0, "$NetBSD: lfs_syscalls.c,v 1.80 2003/02/17 23:48:20 perseant Exp $"); #define LFS /* for prototypes in syscallargs.h */ @@ -107,6 +107,9 @@ int verbose_debug = 0; pid_t lfs_cleaner_pid = 0; +extern int lfs_subsys_pages; +extern struct simplelock lfs_subsys_lock; + /* * Definitions for the buffer free lists. */ @@ -578,7 +581,7 @@ lfs_markv(struct proc *p, fsid_t *fsidp, BLOCK_INFO *blkiov, int blkcnt) s = splbio(); for (bp = bufqueues[BQ_LOCKED].tqh_first; bp; bp = nbp) { nbp = bp->b_freelist.tqe_next; - if (bp->b_flags & B_CALL) { + if (LFS_IS_MALLOC_BUF(bp)) { if (bp->b_flags & B_BUSY) { /* not bloody likely */ bp->b_flags |= B_WANTED; tsleep(bp, PRIBIO+1, "markv", 0); @@ -878,15 +881,12 @@ sys_lfs_segclean(struct lwp *l, void *v, register_t *retval) syscallarg(fsid_t *) fsidp; syscallarg(u_long) segment; } */ *uap = v; - struct proc *p = l->l_proc; - CLEANERINFO *cip; - SEGUSE *sup; - struct buf *bp; - struct mount *mntp; struct lfs *fs; + struct mount *mntp; fsid_t fsid; int error; unsigned long segnum; + struct proc *p = l->l_proc; if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) return (error); @@ -899,39 +899,44 @@ sys_lfs_segclean(struct lwp *l, void *v, register_t *retval) fs = VFSTOUFS(mntp)->um_lfs; segnum = SCARG(uap, segment); - if (dtosn(fs, fs->lfs_curseg) == segnum) - return (EBUSY); - if ((error = vfs_busy(mntp, LK_NOWAIT, NULL)) != 0) return (error); -#ifdef LFS_AGGRESSIVE_SEGLOCK + lfs_seglock(fs, SEGM_PROT); -#endif + error = lfs_do_segclean(fs, segnum); + lfs_segunlock(fs); + vfs_unbusy(mntp); + return error; +} + +/* + * Actually mark the segment clean. + * Must be called with the segment lock held. + */ +int +lfs_do_segclean(struct lfs *fs, unsigned long segnum) +{ + struct buf *bp; + CLEANERINFO *cip; + SEGUSE *sup; + + if (dtosn(fs, fs->lfs_curseg) == segnum) { + return (EBUSY); + } + LFS_SEGENTRY(sup, fs, segnum, bp); if (sup->su_nbytes) { printf("lfs_segclean: not cleaning segment %lu: %d live bytes\n", segnum, sup->su_nbytes); brelse(bp); -#ifdef LFS_AGGRESSIVE_SEGLOCK - lfs_segunlock(fs); -#endif - vfs_unbusy(mntp); return (EBUSY); } if (sup->su_flags & SEGUSE_ACTIVE) { brelse(bp); -#ifdef LFS_AGGRESSIVE_SEGLOCK - lfs_segunlock(fs); -#endif - vfs_unbusy(mntp); return (EBUSY); } if (!(sup->su_flags & SEGUSE_DIRTY)) { brelse(bp); -#ifdef LFS_AGGRESSIVE_SEGLOCK - lfs_segunlock(fs); -#endif - vfs_unbusy(mntp); return (EALREADY); } @@ -948,7 +953,7 @@ sys_lfs_segclean(struct lwp *l, void *v, register_t *retval) if (fs->lfs_dmeta < 0) fs->lfs_dmeta = 0; sup->su_flags &= ~SEGUSE_DIRTY; - (void) LFS_BWRITE_LOG(bp); + LFS_WRITESEGENTRY(sup, fs, segnum, bp); LFS_CLEANERINFO(cip, fs, bp); ++cip->clean; @@ -958,10 +963,6 @@ sys_lfs_segclean(struct lwp *l, void *v, register_t *retval) cip->avail = fs->lfs_avail - fs->lfs_ravail; (void) LFS_BWRITE_LOG(bp); wakeup(&fs->lfs_avail); -#ifdef LFS_AGGRESSIVE_SEGLOCK - lfs_segunlock(fs); -#endif - vfs_unbusy(mntp); return (0); } @@ -1228,6 +1229,7 @@ lfs_fakebuf_iodone(struct buf *bp) if (!(obp->b_flags & (B_DELWRI | B_DONE))) obp->b_flags |= B_INVAL; + bp->b_saveaddr = (caddr_t)(VTOI(obp->b_vp)->i_lfs); brelse(obp); lfs_callback(bp); } @@ -1256,11 +1258,10 @@ lfs_fakebuf(struct lfs *fs, struct vnode *vp, int lbn, size_t size, caddr_t uadd if (obp == NULL) panic("lfs_fakebuf: getblk failed"); -#ifndef ALLOW_VFLUSH_CORRUPTION - bp = lfs_newbuf(VTOI(vp)->i_lfs, vp, lbn, size); + bp = lfs_newbuf(VTOI(vp)->i_lfs, vp, lbn, size, LFS_NB_CLEAN); error = copyin(uaddr, bp->b_data, size); if (error) { - lfs_freebuf(bp); + lfs_freebuf(fs, bp); return NULL; } bp->b_saveaddr = obp; @@ -1272,11 +1273,6 @@ lfs_fakebuf(struct lfs *fs, struct vnode *vp, int lbn, size_t size, caddr_t uadd panic("lfs_fakebuf: gathered bp: %p, ino=%u, lbn=%d", bp, VTOI(vp)->i_number, lbn); #endif -#else - bp = lfs_newbuf(VTOI(vp)->i_lfs, vp, lbn, 0); - bp->b_flags |= B_INVAL; - bp->b_saveaddr = uaddr; -#endif #if 0 bp->b_saveaddr = (caddr_t)fs; ++fs->lfs_iocount; diff --git a/sys/ufs/lfs/lfs_vfsops.c b/sys/ufs/lfs/lfs_vfsops.c index 67103ae54917..4f4ed179bb83 100644 --- a/sys/ufs/lfs/lfs_vfsops.c +++ b/sys/ufs/lfs/lfs_vfsops.c @@ -1,7 +1,7 @@ -/* $NetBSD: lfs_vfsops.c,v 1.90 2003/01/29 13:14:36 yamt Exp $ */ +/* $NetBSD: lfs_vfsops.c,v 1.91 2003/02/17 23:48:21 perseant Exp $ */ /*- - * Copyright (c) 1999, 2000 The NetBSD Foundation, Inc. + * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -71,7 +71,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: lfs_vfsops.c,v 1.90 2003/01/29 13:14:36 yamt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: lfs_vfsops.c,v 1.91 2003/02/17 23:48:21 perseant Exp $"); #if defined(_KERNEL_OPT) #include "opt_quota.h" @@ -84,6 +84,7 @@ __KERNEL_RCSID(0, "$NetBSD: lfs_vfsops.c,v 1.90 2003/01/29 13:14:36 yamt Exp $") #include #include #include +#include #include #include #include @@ -105,14 +106,32 @@ __KERNEL_RCSID(0, "$NetBSD: lfs_vfsops.c,v 1.90 2003/01/29 13:14:36 yamt Exp $") #include #include +#include +#include +#include +#include + #include #include -int lfs_mountfs(struct vnode *, struct mount *, struct proc *); +#ifdef LFS_UBC +#include +#include +static int lfs_gop_write(struct vnode *, struct vm_page **, int, int); +#endif + +static int lfs_mountfs(struct vnode *, struct mount *, struct proc *); extern const struct vnodeopv_desc lfs_vnodeop_opv_desc; extern const struct vnodeopv_desc lfs_specop_opv_desc; extern const struct vnodeopv_desc lfs_fifoop_opv_desc; +extern int lfs_subsys_pages; +extern int locked_queue_count; +extern long locked_queue_bytes; +extern struct simplelock lfs_subsys_lock; + +int lfs_writer_daemon = 0; +int lfs_do_flush = 0; const struct vnodeopv_desc * const lfs_vnodeopv_descs[] = { &lfs_vnodeop_opv_desc, @@ -143,15 +162,95 @@ struct vfsops lfs_vfsops = { }; struct genfs_ops lfs_genfsops = { +#ifdef LFS_UBC + lfs_gop_size, + ufs_gop_alloc, + lfs_gop_write, +#else NULL, NULL, genfs_compat_gop_write, +#endif }; -struct pool lfs_inode_pool; +struct pool lfs_inode_pool, lfs_inoext_pool; -extern int locked_queue_count; -extern long locked_queue_bytes; +/* + * The writer daemon. UVM keeps track of how many dirty pages we are holding + * in lfs_subsys_pages; the daemon flushes the filesystem when this value + * crosses the (user-defined) threshhold LFS_MAX_PAGES. + */ +static void +lfs_writerd(void *arg) +{ +#ifdef LFS_PD + struct mount *mp, *nmp; + struct lfs *fs; +#endif + + lfs_writer_daemon = curproc->p_pid; + + for (;;) { + tsleep(&lfs_writer_daemon, PVM, "lfswriter", 0); + +#ifdef LFS_PD + /* + * Look through the list of LFSs to see if any of them + * have requested pageouts. + */ + simple_lock(&mountlist_slock); + for (mp = mountlist.cqh_first; mp != (void *)&mountlist; + mp = nmp) { + if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { + nmp = mp->mnt_list.cqe_next; + continue; + } + if (strncmp(&mp->mnt_stat.f_fstypename[0], MOUNT_LFS, + MFSNAMELEN) == 0) { + fs = ((struct ufsmount *)mp->mnt_data)->ufsmount_u.lfs; + if (fs->lfs_pdflush || + !TAILQ_EMPTY(&fs->lfs_pchainhd)) { + fs->lfs_pdflush = 0; + simple_unlock(&mountlist_slock); + lfs_flush_fs(fs, 0); + simple_lock(&mountlist_slock); + } + } + + simple_lock(&mountlist_slock); + nmp = mp->mnt_list.cqe_next; + vfs_unbusy(mp); + } + simple_unlock(&mountlist_slock); +#endif /* LFS_PD */ + + /* + * If global state wants a flush, flush everything. + */ + while (lfs_do_flush || locked_queue_count > LFS_MAX_BUFS || + locked_queue_bytes > LFS_MAX_BYTES || + lfs_subsys_pages > LFS_MAX_PAGES) { + +#ifdef DEBUG_LFS_FLUSH + if (lfs_do_flush) + printf("daemon: lfs_do_flush\n"); + if (locked_queue_count > LFS_MAX_BUFS) + printf("daemon: lqc = %d, max %d\n", + locked_queue_count, LFS_MAX_BUFS); + if (locked_queue_bytes > LFS_MAX_BYTES) + printf("daemon: lqb = %ld, max %d\n", + locked_queue_bytes, LFS_MAX_BYTES); + if (lfs_subsys_pages > LFS_MAX_PAGES) + printf("daemon: lssp = %d, max %d\n", + lfs_subsys_pages, LFS_MAX_PAGES); +#endif /* DEBUG_LFS_FLUSH */ + lfs_flush(NULL, 0); + lfs_do_flush = 0; + } + wakeup(&lfs_subsys_pages); + } + /* NOTREACHED */ +} /* * Initialize the filesystem, most work done by ufs_init. @@ -166,9 +265,12 @@ lfs_init() */ pool_init(&lfs_inode_pool, sizeof(struct inode), 0, 0, 0, "lfsinopl", &pool_allocator_nointr); + pool_init(&lfs_inoext_pool, sizeof(struct lfs_inode_ext), 8, 0, 0, + "lfsinoextpl", &pool_allocator_nointr); #ifdef DEBUG memset(lfs_log, 0, sizeof(lfs_log)); #endif + simple_lock_init(&lfs_subsys_lock); } void @@ -452,11 +554,11 @@ update_meta(struct lfs *fs, ino_t ino, int version, daddr_t lbn, } #endif sup->su_nbytes -= size; - LFS_BWRITE_LOG(bp); + LFS_WRITESEGENTRY(sup, fs, dtosn(fs, dbtofsb(fs, odaddr)), bp); } LFS_SEGENTRY(sup, fs, dtosn(fs, ndaddr), bp); sup->su_nbytes += size; - LFS_BWRITE_LOG(bp); + LFS_WRITESEGENTRY(sup, fs, dtosn(fs, ndaddr), bp); /* Fix this so it can be released */ /* ip->i_lfs_effnblks = ip->i_ffs_blocks; */ @@ -544,12 +646,16 @@ update_inoblk(struct lfs *fs, daddr_t offset, struct ucred *cred, LFS_SEGENTRY(sup, fs, dtosn(fs, daddr), ibp); sup->su_nbytes -= DINODE_SIZE; - LFS_BWRITE_LOG(ibp); + LFS_WRITESEGENTRY(sup, fs, + dtosn(fs, daddr), + ibp); } LFS_SEGENTRY(sup, fs, dtosn(fs, dbtofsb(fs, dbp->b_blkno)), ibp); sup->su_nbytes += DINODE_SIZE; - LFS_BWRITE_LOG(ibp); + LFS_WRITESEGENTRY(sup, fs, + dtosn(fs, dbtofsb(fs, dbp->b_blkno)), + ibp); } } } @@ -969,7 +1075,7 @@ lfs_mountfs(struct vnode *devvp, struct mount *mp, struct proc *p) fs->lfs_dirops = 0; fs->lfs_nadirop = 0; fs->lfs_seglock = 0; - lockinit(&fs->lfs_freelock, PINOD, "lfs_freelock", 0, 0); + fs->lfs_pdflush = 0; lockinit(&fs->lfs_fraglock, PINOD, "lfs_fraglock", 0, 0); /* Set the file system readonly/modify bits. */ @@ -985,6 +1091,7 @@ lfs_mountfs(struct vnode *devvp, struct mount *mp, struct proc *p) mp->mnt_stat.f_iosize = fs->lfs_bsize; mp->mnt_maxsymlinklen = fs->lfs_maxsymlinklen; mp->mnt_flag |= MNT_LOCAL; + mp->mnt_fs_bshift = fs->lfs_bshift; ump->um_flags = 0; ump->um_mountp = mp; ump->um_dev = dev; @@ -997,6 +1104,16 @@ lfs_mountfs(struct vnode *devvp, struct mount *mp, struct proc *p) ump->um_quotas[i] = NULLVP; devvp->v_specmountpoint = mp; + /* Set up reserved memory for pageout */ + lfs_setup_resblks(fs); + /* Set up vdirop tailq */ + TAILQ_INIT(&fs->lfs_dchainhd); + /* and paging tailq */ + TAILQ_INIT(&fs->lfs_pchainhd); +#if 0 /* XXXDEBUG */ + fs->lfs_lastwrit = dbtofsb(fs, fs->lfs_offset - 1); +#endif + /* * We use the ifile vnode for almost every operation. Instead of * retrieving it from the hash table each time we retrieve it here, @@ -1012,6 +1129,32 @@ lfs_mountfs(struct vnode *devvp, struct mount *mp, struct proc *p) fs->lfs_ivnode = vp; VREF(vp); + /* Set up segment usage flags for the autocleaner. */ + fs->lfs_suflags = (u_int32_t **)malloc(2 * sizeof(u_int32_t *), + M_SEGMENT, M_WAITOK); + fs->lfs_suflags[0] = (u_int32_t *)malloc(fs->lfs_nseg * sizeof(u_int32_t), + M_SEGMENT, M_WAITOK); + fs->lfs_suflags[1] = (u_int32_t *)malloc(fs->lfs_nseg * sizeof(u_int32_t), + M_SEGMENT, M_WAITOK); + memset(fs->lfs_suflags[1], 0, fs->lfs_nseg * sizeof(u_int32_t)); + for (i = 0; i < fs->lfs_nseg; i++) { + LFS_SEGENTRY(sup, fs, i, bp); + if (!ronly && sup->su_nbytes == 0 && + !(sup->su_flags & SEGUSE_EMPTY)) { + sup->su_flags |= SEGUSE_EMPTY; + fs->lfs_suflags[0][i] = sup->su_flags; + LFS_WRITESEGENTRY(sup, fs, i, bp); + } else if (!ronly && !(sup->su_nbytes == 0) && + (sup->su_flags & SEGUSE_EMPTY)) { + sup->su_flags &= ~SEGUSE_EMPTY; + fs->lfs_suflags[0][i] = sup->su_flags; + LFS_WRITESEGENTRY(sup, fs, i, bp); + } else { + fs->lfs_suflags[0][i] = sup->su_flags; + brelse(bp); + } + } + /* * Roll forward. * @@ -1045,7 +1188,7 @@ lfs_mountfs(struct vnode *devvp, struct mount *mp, struct proc *p) if (!(sup->su_flags & SEGUSE_DIRTY)) --fs->lfs_nclean; sup->su_flags |= SEGUSE_DIRTY; - (void) LFS_BWRITE_LOG(bp); + LFS_WRITESEGENTRY(sup, fs, dtosn(fs, offset), bp); while ((offset = check_segsum(fs, offset, cred, CHECK_CKSUM, &flags, p)) > 0) { @@ -1055,7 +1198,8 @@ lfs_mountfs(struct vnode *devvp, struct mount *mp, struct proc *p) if (!(sup->su_flags & SEGUSE_DIRTY)) --fs->lfs_nclean; sup->su_flags |= SEGUSE_DIRTY; - (void) LFS_BWRITE_LOG(bp); + LFS_WRITESEGENTRY(sup, fs, dtosn(fs, oldoffset), + bp); } #ifdef DEBUG_LFS_RFW @@ -1149,7 +1293,7 @@ lfs_mountfs(struct vnode *devvp, struct mount *mp, struct proc *p) */ LFS_SEGENTRY(sup, fs, dtosn(fs, fs->lfs_offset), bp); sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE; - (void) LFS_BWRITE_LOG(bp); /* Ifile */ + LFS_WRITESEGENTRY(sup, fs, dtosn(fs, fs->lfs_offset), bp); /* Ifile */ /* Now that roll-forward is done, unlock the Ifile */ vput(vp); @@ -1180,6 +1324,12 @@ out: free(ump, M_UFSMNT); mp->mnt_data = NULL; } + + /* Start the pagedaemon-anticipating daemon */ + if (lfs_writer_daemon == 0 && + kthread_create1(lfs_writerd, NULL, NULL, "lfs_writer") != 0) + panic("fork lfs_writer"); + return (error); } @@ -1259,12 +1409,18 @@ lfs_unmount(struct mount *mp, int mntflags, struct proc *p) ronly ? FREAD : FREAD|FWRITE, NOCRED, p); vput(ump->um_devvp); - /* XXX KS - wake up the cleaner so it can die */ + /* wake up the cleaner so it can die */ wakeup(&fs->lfs_nextseg); wakeup(&lfs_allclean_wakeup); + /* Free per-mount data structures */ + free(fs->lfs_suflags[0], M_SEGMENT); + free(fs->lfs_suflags[1], M_SEGMENT); + free(fs->lfs_suflags, M_SEGMENT); + lfs_free_resblks(fs); free(fs, M_UFSMNT); free(ump, M_UFSMNT); + mp->mnt_data = NULL; mp->mnt_flag &= ~MNT_LOCAL; return (error); @@ -1586,11 +1742,251 @@ lfs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, si if (lfs_dostats == 0) memset(&lfs_stats,0,sizeof(lfs_stats)); return 0; - case LFS_STATS: - return (sysctl_rdstruct(oldp, oldlenp, newp, - &lfs_stats, sizeof(lfs_stats))); default: return (EOPNOTSUPP); } /* NOTREACHED */ } + +#ifdef LFS_UBC +/* + * lfs_gop_write functions exactly like genfs_gop_write, except that + * (1) it requires the seglock to be held by its caller, and sp->fip + * to be properly initialized (it will return without re-initializing + * sp->fip, and without calling lfs_writeseg). + * (2) it uses the remaining space in the segment, rather than VOP_BMAP, + * to determine how large a block it can write at once (though it does + * still use VOP_BMAP to find holes in the file); + * (3) it calls lfs_gatherblock instead of VOP_STRATEGY on its blocks + * (leaving lfs_writeseg to deal with the cluster blocks, so we might + * now have clusters of clusters, ick.) + */ +static int +lfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages, int flags) +{ + int i, s, error, run; + int fs_bshift, dev_bshift; + vaddr_t kva; + off_t eof, offset, startoffset; + size_t bytes, iobytes, skipbytes; + daddr_t lbn, blkno; + struct vm_page *pg; + struct buf *mbp, *bp; + struct vnode *devvp; + struct inode *ip = VTOI(vp); + struct lfs *fs = ip->i_lfs; + struct segment *sp = fs->lfs_sp; + UVMHIST_FUNC("lfs_gop_write"); UVMHIST_CALLED(ubchist); + + /* The Ifile lives in the buffer cache */ + if (vp == fs->lfs_ivnode) + return genfs_compat_gop_write(vp, pgs, npages, flags); + + /* + * Sometimes things slip past the filters in lfs_putpages, + * and the pagedaemon tries to write pages---problem is + * that the pagedaemon never acquires the segment lock. + * + * Unbusy and unclean the pages, and put them on the ACTIVE + * queue under the hypothesis that they couldn't have got here + * unless they were modified *quite* recently. + * + * XXXUBC that last statement is an oversimplification of course. + */ + if (!(fs->lfs_seglock) || fs->lfs_lockpid != curproc->p_pid) { + simple_lock(&vp->v_interlock); +#ifdef DEBUG + printf("lfs_gop_write: seglock not held\n"); +#endif + uvm_lock_pageq(); + for (i = 0; i < npages; i++) { + if (pgs[i]->flags & PG_WANTED) + wakeup(pgs[i]); + if (pgs[i]->flags & PG_PAGEOUT) + uvmexp.paging--; + pgs[i]->flags &= ~(PG_BUSY|PG_CLEAN|PG_WANTED|PG_DELWRI|PG_PAGEOUT|PG_RELEASED); + UVM_PAGE_OWN(pg, NULL); + uvm_pageactivate(pgs[i]); + } + uvm_page_unbusy(pgs, npages); + uvm_unlock_pageq(); + simple_unlock(&vp->v_interlock); + return EAGAIN; + } + + UVMHIST_LOG(ubchist, "vp %p pgs %p npages %d flags 0x%x", + vp, pgs, npages, flags); + + GOP_SIZE(vp, vp->v_size, &eof, GOP_SIZE_WRITE); + + if (vp->v_type == VREG) { + fs_bshift = vp->v_mount->mnt_fs_bshift; + dev_bshift = vp->v_mount->mnt_dev_bshift; + } else { + fs_bshift = DEV_BSHIFT; + dev_bshift = DEV_BSHIFT; + } + error = 0; + pg = pgs[0]; + startoffset = pg->offset; + bytes = MIN(npages << PAGE_SHIFT, eof - startoffset); + skipbytes = 0; + + KASSERT(bytes != 0); + + /* Swap PG_DELWRI for PG_PAGEOUT */ + for (i = 0; i < npages; i++) + if (pgs[i]->flags & PG_DELWRI) { + KASSERT(!(pgs[i]->flags & PG_PAGEOUT)); + pgs[i]->flags &= ~PG_DELWRI; + pgs[i]->flags |= PG_PAGEOUT; + uvmexp.paging++; + } + + /* + * Check to make sure we're starting on a block boundary. + * We'll check later to make sure we always write entire + * blocks (or fragments). + */ + if (startoffset & fs->lfs_bmask) + printf("%" PRId64 " & %" PRId64 " = %" PRId64 "\n", + startoffset, fs->lfs_bmask, + startoffset & fs->lfs_bmask); + KASSERT((startoffset & fs->lfs_bmask) == 0); + if (bytes & fs->lfs_ffmask) { + printf("lfs_gop_write: asked to write %ld bytes\n", (long)bytes); + panic("lfs_gop_write: non-integer blocks"); + } + + kva = uvm_pagermapin(pgs, npages, + UVMPAGER_MAPIN_WRITE | UVMPAGER_MAPIN_WAITOK); + + s = splbio(); + simple_lock(&global_v_numoutput_slock); + vp->v_numoutput += 2; /* one for biodone, one for aiodone */ + simple_unlock(&global_v_numoutput_slock); + mbp = pool_get(&bufpool, PR_WAITOK); + splx(s); + + memset(mbp, 0, sizeof(*bp)); + UVMHIST_LOG(ubchist, "vp %p mbp %p num now %d bytes 0x%x", + vp, mbp, vp->v_numoutput, bytes); + mbp->b_bufsize = npages << PAGE_SHIFT; + mbp->b_data = (void *)kva; + mbp->b_resid = mbp->b_bcount = bytes; + mbp->b_flags = B_BUSY|B_WRITE|B_AGE|B_CALL; + mbp->b_iodone = uvm_aio_biodone; + mbp->b_vp = vp; + LIST_INIT(&mbp->b_dep); + + bp = NULL; + for (offset = startoffset; + bytes > 0; + offset += iobytes, bytes -= iobytes) { + lbn = offset >> fs_bshift; + error = VOP_BMAP(vp, lbn, &devvp, &blkno, &run); + if (error) { + UVMHIST_LOG(ubchist, "VOP_BMAP() -> %d", error,0,0,0); + skipbytes += bytes; + bytes = 0; + break; + } + + iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset, + bytes); + if (blkno == (daddr_t)-1) { + skipbytes += iobytes; + continue; + } + + /* + * Discover how much we can really pack into this buffer. + */ +#ifdef LFS_UBC_BIGBUFS + /* If no room in the current segment, finish it up */ + if (sp->sum_bytes_left < sizeof(int32_t) || + sp->seg_bytes_left < MIN(iobytes, (1 << fs->lfs_bshift))) { + int version; + + lfs_updatemeta(sp); + + version = sp->fip->fi_version; + (void) lfs_writeseg(fs, sp); + + sp->fip->fi_version = version; + sp->fip->fi_ino = ip->i_number; + /* Add the current file to the segment summary. */ + ++((SEGSUM *)(sp->segsum))->ss_nfinfo; + sp->sum_bytes_left -= FINFOSIZE; + } + iobytes = MIN(iobytes, ((sp->seg_bytes_left >> fs_bshift) << fs_bshift)); +#else + iobytes = MIN(iobytes, (1 << fs_bshift)); + if (iobytes != blksize(fs, ip, lblkno(fs, offset))) { + printf("iobytes = %" PRId64 ", blk = %" PRId64 "\n", + (int64_t)iobytes, + (int64_t)blksize(fs, ip, lblkno(fs, offset))); + } + KASSERT(iobytes == blksize(fs, ip, lblkno(fs, offset))); +#endif + KASSERT(iobytes > 0); + + /* if it's really one i/o, don't make a second buf */ + if (offset == startoffset && iobytes == bytes) { + bp = mbp; + /* printf("bp is mbp\n"); */ + /* correct overcount if there is no second buffer */ + s = splbio(); + simple_lock(&global_v_numoutput_slock); + --vp->v_numoutput; + simple_unlock(&global_v_numoutput_slock); + splx(s); + } else { + /* printf("bp is not mbp\n"); */ + s = splbio(); + bp = pool_get(&bufpool, PR_WAITOK); + UVMHIST_LOG(ubchist, "vp %p bp %p num now %d", + vp, bp, vp->v_numoutput, 0); + memset(bp, 0, sizeof(*bp)); + splx(s); + bp->b_data = (char *)kva + + (vaddr_t)(offset - pg->offset); + bp->b_resid = bp->b_bcount = iobytes; + bp->b_flags = B_BUSY|B_WRITE|B_CALL; + bp->b_iodone = uvm_aio_biodone1; + LIST_INIT(&bp->b_dep); + } + + /* XXX This is silly ... is this necessary? */ + bp->b_vp = NULL; + s = splbio(); + bgetvp(vp, bp); + splx(s); + + bp->b_lblkno = lblkno(fs, offset); + bp->b_private = mbp; + if (devvp->v_type == VBLK) { + bp->b_dev = devvp->v_rdev; + } + VOP_BWRITE(bp); + while(lfs_gatherblock(sp, bp, NULL)) + ; + } + + if (skipbytes) { + UVMHIST_LOG(ubchist, "skipbytes %d", skipbytes, 0,0,0); + s = splbio(); + if (error) { + mbp->b_flags |= B_ERROR; + mbp->b_error = error; + } + mbp->b_resid -= skipbytes; + if (mbp->b_resid == 0) { + biodone(mbp); + } + splx(s); + } + UVMHIST_LOG(ubchist, "returning 0", 0,0,0,0); + return (0); +} +#endif /* LFS_UBC */ diff --git a/sys/ufs/lfs/lfs_vnops.c b/sys/ufs/lfs/lfs_vnops.c index 7493d8606eb4..6eb70d2a9edf 100644 --- a/sys/ufs/lfs/lfs_vnops.c +++ b/sys/ufs/lfs/lfs_vnops.c @@ -1,7 +1,7 @@ -/* $NetBSD: lfs_vnops.c,v 1.83 2003/02/03 00:32:35 perseant Exp $ */ +/* $NetBSD: lfs_vnops.c,v 1.84 2003/02/17 23:48:22 perseant Exp $ */ /*- - * Copyright (c) 1999, 2000 The NetBSD Foundation, Inc. + * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -71,7 +71,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.83 2003/02/03 00:32:35 perseant Exp $"); +__KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.84 2003/02/17 23:48:22 perseant Exp $"); #include #include @@ -97,9 +97,19 @@ __KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.83 2003/02/03 00:32:35 perseant Exp #include #include +#include +#ifdef LFS_UBC +# include +# include +# include +#endif + #include #include +extern int lfs_writer_daemon; +extern int lfs_subsys_pages; + /* Global vfs data structures for lfs. */ int (**lfs_vnodeop_p)(void *); const struct vnodeopv_entry_desc lfs_vnodeop_entries[] = { @@ -121,7 +131,11 @@ const struct vnodeopv_entry_desc lfs_vnodeop_entries[] = { { &vop_poll_desc, ufs_poll }, /* poll */ { &vop_kqfilter_desc, genfs_kqfilter }, /* kqfilter */ { &vop_revoke_desc, ufs_revoke }, /* revoke */ +#ifdef LFS_UBC + { &vop_mmap_desc, lfs_mmap }, /* mmap */ +#else { &vop_mmap_desc, ufs_mmap }, /* mmap */ +#endif { &vop_fsync_desc, lfs_fsync }, /* fsync */ { &vop_seek_desc, ufs_seek }, /* seek */ { &vop_remove_desc, lfs_remove }, /* remove */ @@ -150,7 +164,11 @@ const struct vnodeopv_entry_desc lfs_vnodeop_entries[] = { { &vop_truncate_desc, lfs_truncate }, /* truncate */ { &vop_update_desc, lfs_update }, /* update */ { &vop_bwrite_desc, lfs_bwrite }, /* bwrite */ +#ifdef LFS_UBC + { &vop_getpages_desc, genfs_getpages }, /* getpages */ +#else { &vop_getpages_desc, lfs_getpages }, /* getpages */ +#endif { &vop_putpages_desc, lfs_putpages }, /* putpages */ { NULL, NULL } }; @@ -293,37 +311,46 @@ lfs_fsync(void *v) struct proc *a_p; } */ *ap = v; struct vnode *vp = ap->a_vp; - int error; - - /* Ignore the trickle syncer */ - if (ap->a_flags & FSYNC_LAZY) + int error, wait; + + /* + * Trickle sync checks for need to do a checkpoint after possible + * activity from the pagedaemon. + */ + if (ap->a_flags & FSYNC_LAZY) { + wakeup(&lfs_writer_daemon); return 0; - - simple_lock(&vp->v_interlock); - error = VOP_PUTPAGES(vp, trunc_page(ap->a_offlo), - round_page(ap->a_offhi), PGO_CLEANIT | PGO_SYNCIO); - if (error) - return error; - error = VOP_UPDATE(vp, NULL, NULL, - (ap->a_flags & FSYNC_WAIT) != 0 ? UPDATE_WAIT : 0); -#ifdef DEBUG - /* - * If we were called from vinvalbuf and lfs_update - * didn't flush all our buffers, we're in trouble. - */ - if ((ap->a_flags & FSYNC_WAIT) && LIST_FIRST(&vp->v_dirtyblkhd) != NULL) { - struct buf *bp; - - bp = LIST_FIRST(&vp->v_dirtyblkhd); - printf("lfs_fsync: ino %d failed to sync", VTOI(vp)->i_number); - printf("lfs_fsync: iocount = %d\n", VTOI(vp)->i_lfs->lfs_iocount); - printf("lfs_fsync: flags are 0x%x, numoutput=%d\n", - VTOI(vp)->i_flag, vp->v_numoutput); - printf("lfs_fsync: writecount=%ld\n", vp->v_writecount); - printf("lfs_fsync: first bp: %p, flags=0x%lx, lbn=%" PRId64 "\n", - bp, bp->b_flags, bp->b_lblkno); } + + wait = (ap->a_flags & FSYNC_WAIT); + do { +#ifdef DEBUG + struct buf *bp; #endif + + simple_lock(&vp->v_interlock); + error = VOP_PUTPAGES(vp, trunc_page(ap->a_offlo), + round_page(ap->a_offhi), + PGO_CLEANIT | (wait ? PGO_SYNCIO : 0)); + if (error) + return error; + error = VOP_UPDATE(vp, NULL, NULL, wait ? UPDATE_WAIT : 0); + if (wait && error == 0 && !VPISEMPTY(vp)) { +#ifdef DEBUG + printf("lfs_fsync: reflushing ino %d\n", + VTOI(vp)->i_number); + printf("vflags %x iflags %x npages %d\n", + vp->v_flag, VTOI(vp)->i_flag, + vp->v_uobj.uo_npages); + LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) + printf("%" PRId64 " (%lx)", bp->b_lblkno, + bp->b_flags); + printf("\n"); +#endif + VTOI(vp)->i_flag |= IN_MODIFIED; + } + } while (wait && error == 0 && !VPISEMPTY(vp)); + return error; } @@ -361,6 +388,7 @@ lfs_inactive(void *v) #define SET_DIROP2(vp, vp2) lfs_set_dirop((vp), (vp2)) static int lfs_set_dirop(struct vnode *, struct vnode *); extern int lfs_dirvcount; +extern int lfs_do_flush; #define NRESERVE(fs) (btofsb(fs, (NIADDR + 3 + (2 * NIADDR + 3)) << fs->lfs_bshift)) @@ -383,17 +411,15 @@ lfs_set_dirop(struct vnode *vp, struct vnode *vp2) if (fs->lfs_dirops == 0) lfs_check(vp, LFS_UNUSED_LBN, 0); - while (fs->lfs_writer || lfs_dirvcount > LFS_MAXDIROP) { + while (fs->lfs_writer || lfs_dirvcount > LFS_MAX_DIROP) { if (fs->lfs_writer) tsleep(&fs->lfs_dirops, PRIBIO + 1, "lfs_sdirop", 0); - if (lfs_dirvcount > LFS_MAXDIROP && fs->lfs_dirops == 0) { - ++fs->lfs_writer; - lfs_flush(fs, 0); - if (--fs->lfs_writer == 0) - wakeup(&fs->lfs_dirops); + if (lfs_dirvcount > LFS_MAX_DIROP && fs->lfs_dirops == 0) { + wakeup(&lfs_writer_daemon); + preempt(NULL); } - if (lfs_dirvcount > LFS_MAXDIROP) { + if (lfs_dirvcount > LFS_MAX_DIROP) { #ifdef DEBUG_LFS printf("lfs_set_dirop: sleeping with dirops=%d, " "dirvcount=%d\n", fs->lfs_dirops, @@ -438,15 +464,19 @@ unreserve: } #define MARK_VNODE(dvp) do { \ + struct inode *_ip = VTOI(dvp); \ + struct lfs *_fs = _ip->i_lfs; \ + \ if (!((dvp)->v_flag & VDIROP)) { \ (void)lfs_vref(dvp); \ ++lfs_dirvcount; \ + TAILQ_INSERT_TAIL(&_fs->lfs_dchainhd, _ip, i_lfs_dchain); \ } \ (dvp)->v_flag |= VDIROP; \ - if (!(VTOI(dvp)->i_flag & IN_ADIROP)) { \ - ++VTOI(dvp)->i_lfs->lfs_nadirop; \ + if (!(_ip->i_flag & IN_ADIROP)) { \ + ++_fs->lfs_nadirop; \ } \ - VTOI(dvp)->i_flag |= IN_ADIROP; \ + _ip->i_flag |= IN_ADIROP; \ } while (0) #define UNMARK_VNODE(vp) lfs_unmark_vnode(vp) @@ -656,22 +686,24 @@ lfs_rmdir(void *v) struct vnode *a_vp; struct componentname *a_cnp; } */ *ap = v; + struct vnode *vp; int error; + vp = ap->a_vp; if ((error = SET_DIROP2(ap->a_dvp, ap->a_vp)) != 0) { vrele(ap->a_dvp); if (ap->a_vp != ap->a_dvp) VOP_UNLOCK(ap->a_dvp, 0); - vput(ap->a_vp); + vput(vp); return error; } MARK_VNODE(ap->a_dvp); - MARK_VNODE(ap->a_vp); + MARK_VNODE(vp); error = ufs_rmdir(ap); UNMARK_VNODE(ap->a_dvp); - UNMARK_VNODE(ap->a_vp); + UNMARK_VNODE(vp); - SET_ENDOP2(VTOI(ap->a_dvp)->i_lfs, ap->a_dvp, ap->a_vp, "rmdir"); + SET_ENDOP2(VTOI(ap->a_dvp)->i_lfs, ap->a_dvp, vp, "rmdir"); return (error); } @@ -844,7 +876,7 @@ lfs_getattr(void *v) vap->va_blocksize = MAXBSIZE; else vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize; - vap->va_bytes = fsbtob(fs, (u_quad_t)ip->i_ffs_blocks); + vap->va_bytes = fsbtob(fs, (u_quad_t)ip->i_lfs_effnblks); vap->va_type = vp->v_type; vap->va_filerev = ip->i_modrev; return (0); @@ -964,18 +996,22 @@ lfs_reclaim(void *v) struct proc *a_p; } */ *ap = v; struct vnode *vp = ap->a_vp; + struct inode *ip = VTOI(vp); int error; - KASSERT(VTOI(vp)->i_ffs_nlink == VTOI(vp)->i_ffs_effnlink); + KASSERT(ip->i_ffs_nlink == ip->i_ffs_effnlink); - LFS_CLR_UINO(VTOI(vp), IN_ALLMOD); + LFS_CLR_UINO(ip, IN_ALLMOD); if ((error = ufs_reclaim(vp, ap->a_p))) return (error); + pool_put(&lfs_inoext_pool, ip->inode_ext.lfs); + ip->inode_ext.lfs = NULL; pool_put(&lfs_inode_pool, vp->v_data); vp->v_data = NULL; return (0); } +#ifndef LFS_UBC int lfs_getpages(void *v) { @@ -1004,3 +1040,645 @@ lfs_putpages(void *v) error = genfs_putpages(v); return error; } + +#else /* LFS_UBC */ + +/* + * Make sure that for all pages in every block in the given range, + * either all are dirty or all are clean. If any of the pages + * we've seen so far are dirty, put the vnode on the paging chain, + * and mark it IN_PAGING. + */ +static int +check_dirty(struct lfs *fs, struct vnode *vp, + off_t startoffset, off_t endoffset, off_t blkeof, + int flags) +{ + int by_list; + struct vm_page *curpg, *pgs[MAXBSIZE / PAGE_SIZE], *pg; + struct lwp *l = curlwp ? curlwp : &lwp0; + off_t soff; + voff_t off; + int i, dirty, tdirty, nonexistent, any_dirty; + int pages_per_block = fs->lfs_bsize >> PAGE_SHIFT; + + top: + by_list = (vp->v_uobj.uo_npages <= + ((endoffset - startoffset) >> PAGE_SHIFT) * + UVM_PAGE_HASH_PENALTY); + any_dirty = 0; + + if (by_list) { + curpg = TAILQ_FIRST(&vp->v_uobj.memq); + PHOLD(l); + } else { + soff = startoffset; + } + while (by_list || soff < MIN(blkeof, endoffset)) { + if (by_list) { + if (pages_per_block > 1) { + while (curpg && (curpg->offset & fs->lfs_bmask)) + curpg = TAILQ_NEXT(curpg, listq); + } + if (curpg == NULL) + break; + soff = curpg->offset; + } + + /* + * Mark all pages in extended range busy; find out if any + * of them are dirty. + */ + nonexistent = dirty = 0; + for (i = 0; i == 0 || i < pages_per_block; i++) { + if (by_list && pages_per_block <= 1) { + pgs[i] = pg = curpg; + } else { + off = soff + (i << PAGE_SHIFT); + pgs[i] = pg = uvm_pagelookup(&vp->v_uobj, off); + if (pg == NULL) { + ++nonexistent; + continue; + } + } + KASSERT(pg != NULL); + while (pg->flags & PG_BUSY) { + pg->flags |= PG_WANTED; + UVM_UNLOCK_AND_WAIT(pg, &vp->v_interlock, 0, + "lfsput", 0); + simple_lock(&vp->v_interlock); + if (by_list) + goto top; + } + pg->flags |= PG_BUSY; + UVM_PAGE_OWN(pg, "lfs_putpages"); + + pmap_page_protect(pg, VM_PROT_NONE); + tdirty = (pmap_clear_modify(pg) || + (pg->flags & PG_CLEAN) == 0); + dirty += tdirty; + } + if (pages_per_block > 0 && nonexistent >= pages_per_block) { + if (by_list) { + curpg = TAILQ_NEXT(curpg, listq); + } else { + soff += fs->lfs_bsize; + } + continue; + } + + any_dirty += dirty; + KASSERT(nonexistent == 0); + + /* + * If any are dirty make all dirty; unbusy them, + * but if we were asked to clean, take them off + * of their queue so the pagedaemon doesn't bother + * us about them while they're on their way to disk. + * + * (XXXUBC the page is now on *no* page queue.) + */ + for (i = 0; i == 0 || i < pages_per_block; i++) { + pg = pgs[i]; + KASSERT(!((pg->flags & PG_CLEAN) && (pg->flags & PG_DELWRI))); + if (dirty) { + pg->flags &= ~PG_CLEAN; + if (flags & PGO_FREE) { + /* XXXUBC need better way to update */ + lfs_subsys_pages += MIN(1, pages_per_block); + uvm_lock_pageq(); + UVM_PAGE_OWN(pg, NULL); + uvm_pagedequeue(pg); + /* Suspended write flag */ + pg->flags |= PG_DELWRI; + uvm_unlock_pageq(); + } + } else { + UVM_PAGE_OWN(pg, NULL); + } + if (pg->flags & PG_WANTED) + wakeup(pg); + pg->flags &= ~(PG_WANTED|PG_BUSY); + /* UVM_PAGE_OWN(pg, NULL); */ + } + + if (by_list) { + curpg = TAILQ_NEXT(curpg, listq); + } else { + soff += MAX(PAGE_SIZE, fs->lfs_bsize); + } + } + if (by_list) { + PRELE(l); + } + + /* + * If any pages were dirty, mark this inode as "pageout requested", + * and put it on the paging queue. + * XXXUBC locking (check locking on dchainhd too) + */ +#ifdef notyet + if (any_dirty) { + if (!(ip->i_flags & IN_PAGING)) { + ip->i_flags |= IN_PAGING; + TAILQ_INSERT_TAIL(&fs->lfs_pchainhd, ip, i_lfs_pchain); + } + } +#endif + return any_dirty; +} + +/* + * lfs_putpages functions like genfs_putpages except that + * + * (1) It needs to bounds-check the incoming requests to ensure that + * they are block-aligned; if they are not, expand the range and + * do the right thing in case, e.g., the requested range is clean + * but the expanded range is dirty. + * (2) It needs to explicitly send blocks to be written when it is done. + * VOP_PUTPAGES is not ever called with the seglock held, so + * we simply take the seglock and let lfs_segunlock wait for us. + * XXX Actually we can be called with the seglock held, if we have + * XXX to flush a vnode while lfs_markv is in operation. As of this + * XXX writing we panic in this case. + * + * Assumptions: + * + * (1) The caller does not hold any pages in this vnode busy. If it does, + * there is a danger that when we expand the page range and busy the + * pages we will deadlock. + * (2) We are called with vp->v_interlock held; we must return with it + * released. + * (3) We don't absolutely have to free pages right away, provided that + * the request does not have PGO_SYNCIO. When the pagedaemon gives + * us a request with PGO_FREE, we take the pages out of the paging + * queue and wake up the writer, which will handle freeing them for us. + * + * We ensure that for any filesystem block, all pages for that + * block are either resident or not, even if those pages are higher + * than EOF; that means that we will be getting requests to free + * "unused" pages above EOF all the time, and should ignore them. + */ + +int +lfs_putpages(void *v) +{ + int error; + struct vop_putpages_args /* { + struct vnode *a_vp; + voff_t a_offlo; + voff_t a_offhi; + int a_flags; + } */ *ap = v; + struct vnode *vp; + struct inode *ip; + struct lfs *fs; + struct segment *sp; + off_t origoffset, startoffset, endoffset, origendoffset, blkeof; + off_t max_endoffset; + int pages_per_block; + int s, sync, dirty, pagedaemon; + UVMHIST_FUNC("lfs_putpages"); UVMHIST_CALLED(ubchist); + + vp = ap->a_vp; + ip = VTOI(vp); + fs = ip->i_lfs; + sync = (ap->a_flags & PGO_SYNCIO); + pagedaemon = (curproc == uvm.pagedaemon_proc); + + /* Putpages does nothing for metadata. */ + if (vp == fs->lfs_ivnode || vp->v_type != VREG) { + simple_unlock(&vp->v_interlock); + return 0; + } + + /* + * If there are no pages, don't do anything. + */ + if (vp->v_uobj.uo_npages == 0) { + s = splbio(); + if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL && + (vp->v_flag & VONWORKLST)) { + vp->v_flag &= ~VONWORKLST; + LIST_REMOVE(vp, v_synclist); + } + splx(s); + simple_unlock(&vp->v_interlock); + return 0; + } + + blkeof = blkroundup(fs, ip->i_ffs_size); + + /* + * Ignore requests to free pages past EOF but in the same block + * as EOF, unless the request is synchronous. (XXX why sync?) + * XXXUBC Make these pages look "active" so the pagedaemon won't + * XXXUBC bother us with them again. + */ + if (!sync && ap->a_offlo >= ip->i_ffs_size && ap->a_offlo < blkeof) { + origoffset = ap->a_offlo; + ap->a_offlo = blkeof; + if (ap->a_offhi > 0 && ap->a_offhi <= ap->a_offlo) { + simple_unlock(&vp->v_interlock); + return 0; + } + } + + /* + * Extend page range to start and end at block boundaries. + * (For the purposes of VOP_PUTPAGES, fragments don't exist.) + */ + pages_per_block = fs->lfs_bsize >> PAGE_SHIFT; + origoffset = ap->a_offlo; + origendoffset = ap->a_offhi; + startoffset = origoffset & ~(fs->lfs_bmask); + max_endoffset = (trunc_page(LLONG_MAX) >> fs->lfs_bshift) + << fs->lfs_bshift; + + if (origendoffset == 0 || ap->a_flags & PGO_ALLPAGES) { + endoffset = max_endoffset; + origendoffset = endoffset; + } else { + origendoffset = round_page(ap->a_offhi); + endoffset = round_page(blkroundup(fs, origendoffset)); + } + + KASSERT(startoffset > 0 || endoffset >= startoffset); + if (startoffset == endoffset) { + /* Nothing to do, why were we called? */ + simple_unlock(&vp->v_interlock); +#ifdef DEBUG + printf("lfs_putpages: startoffset = endoffset = %" PRId64 "\n", + startoffset); +#endif + return 0; + } + + ap->a_offlo = startoffset; + ap->a_offhi = endoffset; + + if (!(ap->a_flags & PGO_CLEANIT)) + return genfs_putpages(v); + + /* + * Make sure that all pages in any given block are dirty, or + * none of them are. Find out if any of the pages we've been + * asked about are dirty. If none are dirty, send them on + * through genfs_putpages(), albeit with adjusted offsets. + * XXXUBC I am assuming here that they can't be dirtied in + * XXXUBC the meantime, but I bet that's wrong. + */ + dirty = check_dirty(fs, vp, startoffset, endoffset, blkeof, ap->a_flags); + if (!dirty) + return genfs_putpages(v); + + /* + * Dirty and asked to clean. + * + * Pagedaemon can't actually write LFS pages; wake up + * the writer to take care of that. The writer will + * notice the pager inode queue and act on that. + */ + if (pagedaemon) { + ++fs->lfs_pdflush; + wakeup(&lfs_writer_daemon); + return EWOULDBLOCK; + } + + /* + * If this is a file created in a recent dirop, we can't flush its + * inode until the dirop is complete. Drain dirops, then flush the + * filesystem (taking care of any other pending dirops while we're + * at it). + */ + if ((ap->a_flags & (PGO_CLEANIT|PGO_LOCKED)) == PGO_CLEANIT && + (vp->v_flag & VDIROP)) { + int locked; + + /* printf("putpages to clean VDIROP, flushing\n"); */ + while (fs->lfs_dirops > 0) { + ++fs->lfs_diropwait; + tsleep(&fs->lfs_writer, PRIBIO+1, "ppdirop", 0); + --fs->lfs_diropwait; + } + ++fs->lfs_writer; + locked = VOP_ISLOCKED(vp) && /* XXX */ + vp->v_lock.lk_lockholder == curproc->p_pid; + if (locked) + VOP_UNLOCK(vp, 0); + simple_unlock(&vp->v_interlock); + + lfs_flush_fs(fs, sync ? SEGM_SYNC : 0); + + simple_lock(&vp->v_interlock); + if (locked) + VOP_LOCK(vp, LK_EXCLUSIVE); + if (--fs->lfs_writer == 0) + wakeup(&fs->lfs_dirops); + + /* XXX the flush should have taken care of this one too! */ + } + + + /* + * This is it. We are going to write some pages. From here on + * down it's all just mechanics. + * + * If there are more than one page per block, we don't want to get + * caught locking them backwards; so set PGO_BUSYFAIL to avoid + * deadlocks. Also, don't let genfs_putpages wait; + * lfs_segunlock will wait for us, if need be. + */ + ap->a_flags &= ~PGO_SYNCIO; + if (pages_per_block > 1) + ap->a_flags |= PGO_BUSYFAIL; + + /* + * If we've already got the seglock, flush the node and return. + * The FIP has already been set up for us by lfs_writefile, + * and FIP cleanup and lfs_updatemeta will also be done there, + * unless genfs_putpages returns EDEADLK; then we must flush + * what we have, and correct FIP and segment header accounting. + */ + if (ap->a_flags & PGO_LOCKED) { + sp = fs->lfs_sp; + sp->vp = vp; + + /* + * XXXUBC + * There is some danger here that we might run out of + * buffers if we flush too much at once. If the number + * of dirty buffers is too great, we should cut the range + * down and write in chunks. + */ + while ((error = genfs_putpages(v)) == EDEADLK) { +#ifdef DEBUG_LFS + printf("lfs_putpages: genfs_putpages returned EDEADLK" + " ino %d off %x (seg %d)\n", + ip->i_number, fs->lfs_offset, + dtosn(fs, fs->lfs_offset)); +#endif + /* Write gathered pages */ + lfs_updatemeta(sp); + (void) lfs_writeseg(fs, sp); + + /* Reinitialize brand new FIP and add us to it */ + sp->vp = vp; + sp->fip->fi_version = ip->i_ffs_gen; + sp->fip->fi_ino = ip->i_number; + /* Add us to the new segment summary. */ + ++((SEGSUM *)(sp->segsum))->ss_nfinfo; + sp->sum_bytes_left -= + sizeof(struct finfo) - sizeof(int32_t); + + /* Give the write a chance to complete */ + simple_unlock(&vp->v_interlock); + preempt(NULL); + simple_lock(&vp->v_interlock); + } + return error; + } + + /* + * Take the seglock, because we are going to be writing pages. + */ + if ((error = lfs_seglock(fs, SEGM_PROT | (sync ? SEGM_SYNC : 0))) != 0) + return error; + + /* + * VOP_PUTPAGES should not be called while holding the seglock. + * XXX fix lfs_markv, or do this properly. + */ + KASSERT(fs->lfs_seglock == 1); + + /* + * We assume we're being called with sp->fip pointing at blank space. + * Account for a new FIP in the segment header, and set sp->vp. + * (This should duplicate the setup at the top of lfs_writefile().) + */ + sp = fs->lfs_sp; + if (sp->seg_bytes_left < fs->lfs_bsize || + sp->sum_bytes_left < sizeof(struct finfo)) + (void) lfs_writeseg(fs, fs->lfs_sp); + + sp->sum_bytes_left -= sizeof(struct finfo) - sizeof(int32_t); + ++((SEGSUM *)(sp->segsum))->ss_nfinfo; + sp->vp = vp; + + if (vp->v_flag & VDIROP) + ((SEGSUM *)(sp->segsum))->ss_flags |= (SS_DIROP|SS_CONT); + + sp->fip->fi_nblocks = 0; + sp->fip->fi_ino = ip->i_number; + sp->fip->fi_version = ip->i_ffs_gen; + + /* + * Loop through genfs_putpages until all pages are gathered. + */ + /* + * There is some danger here that we might run out of + * buffers if we flush too much at once. If the number + * of dirty buffers is too great, then, cut the range down + * and write in chunks. + * + * XXXUBC this assumes a uniform dirtying of the pages + * XXXUBC across the address space + * XXXXXX do this + */ + while ((error = genfs_putpages(v)) == EDEADLK) { +#ifdef DEBUG_LFS + printf("lfs_putpages: genfs_putpages returned EDEADLK [2]" + " ino %d off %x (seg %d)\n", + ip->i_number, fs->lfs_offset, + dtosn(fs, fs->lfs_offset)); +#endif + /* Write gathered pages */ + lfs_updatemeta(sp); + (void) lfs_writeseg(fs, sp); + + /* + * Reinitialize brand new FIP and add us to it. + * (This should duplicate the fixup in lfs_gatherpages().) + */ + sp->vp = vp; + sp->fip->fi_version = ip->i_ffs_gen; + sp->fip->fi_ino = ip->i_number; + /* Add us to the new segment summary. */ + ++((SEGSUM *)(sp->segsum))->ss_nfinfo; + sp->sum_bytes_left -= + sizeof(struct finfo) - sizeof(int32_t); + + /* Give the write a chance to complete */ + simple_unlock(&vp->v_interlock); + preempt(NULL); + simple_lock(&vp->v_interlock); + } + + /* + * Blocks are now gathered into a segment waiting to be written. + * All that's left to do is update metadata, and write them. + */ + lfs_updatemeta(fs->lfs_sp); + fs->lfs_sp->vp = NULL; + lfs_writeseg(fs, fs->lfs_sp); + + /* + * Clean up FIP. + * (This should duplicate cleanup at the end of lfs_writefile().) + */ + if (sp->fip->fi_nblocks != 0) { + sp->fip = (FINFO*)((caddr_t)sp->fip + sizeof(struct finfo) + + sizeof(int32_t) * (sp->fip->fi_nblocks - 1)); + sp->start_lbp = &sp->fip->fi_blocks[0]; + } else { + sp->sum_bytes_left += sizeof(FINFO) - sizeof(int32_t); + --((SEGSUM *)(sp->segsum))->ss_nfinfo; + } + /* + * XXX - with the malloc/copy writeseg, the pages are freed by now + * even if we don't wait (e.g. if we hold a nested lock). This + * will not be true if we stop using malloc/copy. + */ + KASSERT(fs->lfs_sp->seg_flags & SEGM_PROT); + lfs_segunlock(fs); + + /* + * Wait for v_numoutput to drop to zero. The seglock should + * take care of this, but there is a slight possibility that + * aiodoned might not have got around to our buffers yet. + */ + if (sync) { + int s; + + s = splbio(); + simple_lock(&global_v_numoutput_slock); + while(vp->v_numoutput > 0) { +#ifdef DEBUG + printf("ino %d sleeping on num %d\n", + ip->i_number, vp->v_numoutput); +#endif + vp->v_flag |= VBWAIT; + simple_unlock(&global_v_numoutput_slock); + tsleep(&vp->v_numoutput, PRIBIO + 1, "lfs_vn", 0); + simple_lock(&global_v_numoutput_slock); + } + simple_unlock(&global_v_numoutput_slock); + splx(s); + } + return error; +} + +/* + * Find out whether the vnode has any blocks or pages waiting to be written. + * We used to just check LIST_EMPTY(&vp->v_dirtyblkhd), but there is not + * presently as simple a mechanism for the page cache. + */ +int +lfs_checkifempty(struct vnode *vp) +{ + struct vm_page *pg; + struct buf *bp; + int r, s; + + if (vp->v_type != VREG || VTOI(vp)->i_number == LFS_IFILE_INUM) + return LIST_EMPTY(&vp->v_dirtyblkhd); + + /* + * For vnodes with pages it is a little more complex. + * Pages that have been written (i.e. are "clean" for our purposes) + * might be in seemingly dirty buffers, so we have to troll + * looking for indirect block buffers as well as pages. + */ + simple_lock(&vp->v_interlock); + s = splbio(); + for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; + bp = LIST_NEXT(bp, b_vnbufs)) { + if (bp->b_lblkno < 0) { + splx(s); + return 0; + } + } + splx(s); + + /* + * Run through the page list to find dirty pages. + * Right now I just walk the memq. + */ + pg = TAILQ_FIRST(&vp->v_uobj.memq); + r = 1; + while(pg) { + if ((pg->flags & PG_CLEAN) == 0 || pmap_is_modified(pg)) { + r = 0; + break; + } + pg = TAILQ_NEXT(pg, listq); + } +#if 0 + if (r != !(vp->v_flag & VONWORKLST)) { + printf("nope, VONWORKLST isn't good enough!\n"); + } +#endif + simple_unlock(&vp->v_interlock); + return r; +} + +/* + * Return the last logical file offset that should be written for this file + * if we're doing a write that ends at "size". If writing, we need to know + * about sizes on disk, i.e. fragments if there are any; if reading, we need + * to know about entire blocks. + */ +void +lfs_gop_size(struct vnode *vp, off_t size, off_t *eobp, int flags) +{ + struct inode *ip = VTOI(vp); + struct lfs *fs = ip->i_lfs; + daddr_t olbn, nlbn; + + KASSERT(flags & (GOP_SIZE_READ | GOP_SIZE_WRITE)); + KASSERT((flags & (GOP_SIZE_READ | GOP_SIZE_WRITE)) + != (GOP_SIZE_READ | GOP_SIZE_WRITE)); + + olbn = lblkno(fs, ip->i_ffs_size); + nlbn = lblkno(fs, size); + if ((flags & GOP_SIZE_WRITE) && nlbn < NDADDR && olbn <= nlbn) { + *eobp = fragroundup(fs, size); + } else { + *eobp = blkroundup(fs, size); + } +} + +#ifdef DEBUG +void lfs_dump_vop(void *); + +void +lfs_dump_vop(void *v) +{ + struct vop_putpages_args /* { + struct vnode *a_vp; + voff_t a_offlo; + voff_t a_offhi; + int a_flags; + } */ *ap = v; + + vfs_vnode_print(ap->a_vp, 0, printf); + lfs_dump_dinode(&VTOI(ap->a_vp)->i_din.ffs_din); +} +#endif + +int +lfs_mmap(void *v) +{ + struct vop_mmap_args /* { + const struct vnodeop_desc *a_desc; + struct vnode *a_vp; + int a_fflags; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap = v; + + if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM) + return EOPNOTSUPP; + return ufs_mmap(v); +} +#endif /* LFS_UBC */ diff --git a/sys/ufs/ufs/inode.h b/sys/ufs/ufs/inode.h index b7b8e4beba53..73c76798bec9 100644 --- a/sys/ufs/ufs/inode.h +++ b/sys/ufs/ufs/inode.h @@ -1,4 +1,4 @@ -/* $NetBSD: inode.h,v 1.32 2003/01/24 21:55:29 fvdl Exp $ */ +/* $NetBSD: inode.h,v 1.33 2003/02/17 23:48:23 perseant Exp $ */ /* * Copyright (c) 1982, 1989, 1993 @@ -58,11 +58,7 @@ struct ext2fs_inode_ext { daddr_t ext2fs_last_blk; /* last block allocated on disk */ }; -struct lfs_inode_ext { - off_t lfs_osize; /* size of file on disk */ - u_int32_t lfs_effnblocks; /* number of blocks when i/o completes */ - size_t lfs_fragsize[NDADDR]; /* size of on-disk direct blocks */ -}; +struct lfs_inode_ext; /* * The inode is used to describe each active (or recently active) file in the @@ -111,13 +107,10 @@ struct inode { union { /* Other extensions could go here... */ struct ext2fs_inode_ext e2fs; - struct lfs_inode_ext lfs; + struct lfs_inode_ext *lfs; } inode_ext; #define i_e2fs_last_lblk inode_ext.e2fs.ext2fs_last_lblk #define i_e2fs_last_blk inode_ext.e2fs.ext2fs_last_blk -#define i_lfs_effnblks inode_ext.lfs.lfs_effnblocks -#define i_lfs_fragsize inode_ext.lfs.lfs_fragsize -#define i_lfs_osize inode_ext.lfs.lfs_osize /* * The on-disk dinode itself. */ @@ -179,6 +172,7 @@ struct inode { #define IN_CLEANING 0x0100 /* LFS: file is being cleaned */ #define IN_ADIROP 0x0200 /* LFS: dirop in progress */ #define IN_SPACECOUNTED 0x0400 /* Blocks to be freed in free count. */ +#define IN_PAGING 0x1000 /* LFS: file is on paging queue */ #if defined(_KERNEL) /* diff --git a/sys/ufs/ufs/ufs_extern.h b/sys/ufs/ufs/ufs_extern.h index b284a1bb4229..8a09bdc95e84 100644 --- a/sys/ufs/ufs/ufs_extern.h +++ b/sys/ufs/ufs/ufs_extern.h @@ -1,4 +1,4 @@ -/* $NetBSD: ufs_extern.h,v 1.30 2003/01/24 21:55:30 fvdl Exp $ */ +/* $NetBSD: ufs_extern.h,v 1.31 2003/02/17 23:48:23 perseant Exp $ */ /*- * Copyright (c) 1991, 1993, 1994 @@ -167,6 +167,7 @@ void ufs_vinit __P((struct mount *, int (**) __P((void *)), int (**) __P((void *)), struct vnode **)); int ufs_makeinode __P((int, struct vnode *, struct vnode **, struct componentname *)); +int ufs_gop_alloc __P((struct vnode *, off_t, off_t, int, struct ucred *)); /* * Soft dependency function prototypes. diff --git a/sys/ufs/ufs/ufs_inode.c b/sys/ufs/ufs/ufs_inode.c index d5c8fc3b2613..d2b00389b975 100644 --- a/sys/ufs/ufs/ufs_inode.c +++ b/sys/ufs/ufs/ufs_inode.c @@ -1,4 +1,4 @@ -/* $NetBSD: ufs_inode.c,v 1.33 2002/01/26 08:32:05 chs Exp $ */ +/* $NetBSD: ufs_inode.c,v 1.34 2003/02/17 23:48:23 perseant Exp $ */ /* * Copyright (c) 1991, 1993 @@ -41,7 +41,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ufs_inode.c,v 1.33 2002/01/26 08:32:05 chs Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ufs_inode.c,v 1.34 2003/02/17 23:48:23 perseant Exp $"); #include "opt_quota.h" @@ -192,10 +192,10 @@ ufs_balloc_range(vp, off, len, cred, flags) vp, off, len, vp->v_size); oldeof = vp->v_size; - GOP_SIZE(vp, oldeof, &oldeob); + GOP_SIZE(vp, oldeof, &oldeob, GOP_SIZE_WRITE); neweof = MAX(vp->v_size, off + len); - GOP_SIZE(vp, neweof, &neweob); + GOP_SIZE(vp, neweof, &neweob, GOP_SIZE_WRITE); error = 0; uobj = &vp->v_uobj; diff --git a/sys/ufs/ufs/ufs_readwrite.c b/sys/ufs/ufs/ufs_readwrite.c index 6a2b4b8add74..caf1dc273e79 100644 --- a/sys/ufs/ufs/ufs_readwrite.c +++ b/sys/ufs/ufs/ufs_readwrite.c @@ -1,4 +1,4 @@ -/* $NetBSD: ufs_readwrite.c,v 1.47 2003/01/24 21:55:30 fvdl Exp $ */ +/* $NetBSD: ufs_readwrite.c,v 1.48 2003/02/17 23:48:23 perseant Exp $ */ /*- * Copyright (c) 1993 @@ -36,7 +36,7 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: ufs_readwrite.c,v 1.47 2003/01/24 21:55:30 fvdl Exp $"); +__KERNEL_RCSID(1, "$NetBSD: ufs_readwrite.c,v 1.48 2003/02/17 23:48:23 perseant Exp $"); #ifdef LFS_READWRITE #define BLKSIZE(a, b, c) blksize(a, b, c) @@ -110,9 +110,13 @@ READ(void *v) goto out; } -#ifndef LFS_READWRITE +#ifdef LFS_READWRITE +# ifdef LFS_UBC + usepc = (vp->v_type == VREG && ip->i_number != LFS_IFILE_INUM); +# endif +#else /* !LFS_READWRITE */ usepc = vp->v_type == VREG; -#endif +#endif /* !LFS_READWRITE */ if (usepc) { while (uio->uio_resid > 0) { bytelen = MIN(ip->i_ffs_size - uio->uio_offset, @@ -278,9 +282,14 @@ WRITE(void *v) bsize = fs->fs_bsize; error = 0; -#ifndef LFS_READWRITE +#ifdef LFS_READWRITE +# ifdef LFS_UBC + async = TRUE; usepc = vp->v_type == VREG; -#endif +# endif +#else /* !LFS_READWRITE */ + usepc = vp->v_type == VREG; +#endif /* !LFS_READWRITE */ if (!usepc) { goto bcache; } diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c index 61db4a63ec4d..cee935954a01 100644 --- a/sys/ufs/ufs/ufs_vnops.c +++ b/sys/ufs/ufs/ufs_vnops.c @@ -1,4 +1,4 @@ -/* $NetBSD: ufs_vnops.c,v 1.89 2002/12/31 15:00:18 yamt Exp $ */ +/* $NetBSD: ufs_vnops.c,v 1.90 2003/02/17 23:48:23 perseant Exp $ */ /* * Copyright (c) 1982, 1986, 1989, 1993, 1995 @@ -41,7 +41,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ufs_vnops.c,v 1.89 2002/12/31 15:00:18 yamt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ufs_vnops.c,v 1.90 2003/02/17 23:48:23 perseant Exp $"); #include "opt_quota.h" #include "fs_lfs.h" @@ -73,6 +73,8 @@ __KERNEL_RCSID(0, "$NetBSD: ufs_vnops.c,v 1.89 2002/12/31 15:00:18 yamt Exp $"); #include #include +#include + static int ufs_chmod(struct vnode *, int, struct ucred *, struct proc *); static int ufs_chown(struct vnode *, uid_t, gid_t, struct ucred *, struct proc *); @@ -2071,3 +2073,49 @@ ufs_makeinode(int mode, struct vnode *dvp, struct vnode **vpp, vput(tvp); return (error); } + +/* + * Allocate len bytes at offset off. + */ +int +ufs_gop_alloc(struct vnode *vp, off_t off, off_t len, int flags, + struct ucred *cred) +{ + struct inode *ip = VTOI(vp); + int error, delta, bshift, bsize; + UVMHIST_FUNC("ufs_gop_alloc"); UVMHIST_CALLED(ubchist); + + error = 0; + bshift = vp->v_mount->mnt_fs_bshift; + bsize = 1 << bshift; + + delta = off & (bsize - 1); + off -= delta; + len += delta; + + while (len > 0) { + bsize = MIN(bsize, len); + + error = VOP_BALLOC(vp, off, bsize, cred, flags, NULL); + if (error) { + goto out; + } + + /* + * increase file size now, VOP_BALLOC() requires that + * EOF be up-to-date before each call. + */ + + if (ip->i_ffs_size < off + bsize) { + UVMHIST_LOG(ubchist, "vp %p old 0x%x new 0x%x", + vp, ip->i_ffs_size, off + bsize, 0); + ip->i_ffs_size = off + bsize; + } + + off += bsize; + len -= bsize; + } + +out: + return error; +} diff --git a/sys/uvm/uvm_page.c b/sys/uvm/uvm_page.c index 37813da079ab..6afeee50fc6c 100644 --- a/sys/uvm/uvm_page.c +++ b/sys/uvm/uvm_page.c @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_page.c,v 1.83 2003/02/01 06:23:55 thorpej Exp $ */ +/* $NetBSD: uvm_page.c,v 1.84 2003/02/17 23:48:24 perseant Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -71,7 +71,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.83 2003/02/01 06:23:55 thorpej Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.84 2003/02/17 23:48:24 perseant Exp $"); #include "opt_uvmhist.h" @@ -1254,7 +1254,7 @@ uvm_pagefree(pg) if (pg->flags & PG_WANTED) { wakeup(pg); } - pg->flags &= ~(PG_WANTED|PG_BUSY|PG_RELEASED); + pg->flags &= ~(PG_WANTED|PG_BUSY|PG_RELEASED|PG_PAGER1); #ifdef UVM_PAGE_TRKOWN pg->owner_tag = NULL; #endif diff --git a/sys/uvm/uvm_pager.h b/sys/uvm/uvm_pager.h index b5e81a0e1fec..ea9352c5a472 100644 --- a/sys/uvm/uvm_pager.h +++ b/sys/uvm/uvm_pager.h @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_pager.h,v 1.25 2002/03/25 02:08:10 chs Exp $ */ +/* $NetBSD: uvm_pager.h,v 1.26 2003/02/17 23:48:24 perseant Exp $ */ /* * @@ -124,6 +124,7 @@ struct uvm_pagerops { #define PGO_ALLPAGES 0x010 /* flush whole object/get all pages */ #define PGO_LOCKED 0x040 /* fault data structures are locked [get] */ +#define PGO_BUSYFAIL 0x080 /* fail if a page is busy [put] */ #define PGO_OVERWRITE 0x200 /* pages will be overwritten before unlocked */ #define PGO_PASTEOF 0x400 /* allow allocation of pages past EOF */ diff --git a/usr.sbin/dumplfs/dumplfs.c b/usr.sbin/dumplfs/dumplfs.c index c79e17622ef9..d071776f08b5 100644 --- a/usr.sbin/dumplfs/dumplfs.c +++ b/usr.sbin/dumplfs/dumplfs.c @@ -1,4 +1,4 @@ -/* $NetBSD: dumplfs.c,v 1.21 2003/01/28 07:44:54 mrg Exp $ */ +/* $NetBSD: dumplfs.c,v 1.22 2003/02/17 23:48:25 perseant Exp $ */ /*- * Copyright (c) 1991, 1993 @@ -45,7 +45,7 @@ __COPYRIGHT( #if 0 static char sccsid[] = "@(#)dumplfs.c 8.5 (Berkeley) 5/24/95"; #else -__RCSID("$NetBSD: dumplfs.c,v 1.21 2003/01/28 07:44:54 mrg Exp $"); +__RCSID("$NetBSD: dumplfs.c,v 1.22 2003/02/17 23:48:25 perseant Exp $"); #endif #endif /* not lint */ @@ -678,7 +678,7 @@ dump_super(struct lfs *lfsp) (void)printf(" Checkpoint Info\n"); (void)printf(" %s%-10d %s0x%-8x %s%-10d\n", - "free ", lfsp->lfs_free, + "freehd ", lfsp->lfs_freehd, "idaddr ", lfsp->lfs_idaddr, "ifile ", lfsp->lfs_ifile); (void)printf(" %s%-10d %s%-10d %s%-10d\n", diff --git a/usr.sbin/quotaon/quotaon.c b/usr.sbin/quotaon/quotaon.c index 576c7fece0e2..0626fb2dbfe6 100644 --- a/usr.sbin/quotaon/quotaon.c +++ b/usr.sbin/quotaon/quotaon.c @@ -1,4 +1,4 @@ -/* $NetBSD: quotaon.c,v 1.17 2002/07/20 08:40:20 grant Exp $ */ +/* $NetBSD: quotaon.c,v 1.18 2003/02/17 23:48:25 perseant Exp $ */ /* * Copyright (c) 1980, 1990, 1993 @@ -46,7 +46,7 @@ __COPYRIGHT("@(#) Copyright (c) 1980, 1990, 1993\n\ #if 0 static char sccsid[] = "@(#)quotaon.c 8.1 (Berkeley) 6/6/93"; #else -__RCSID("$NetBSD: quotaon.c,v 1.17 2002/07/20 08:40:20 grant Exp $"); +__RCSID("$NetBSD: quotaon.c,v 1.18 2003/02/17 23:48:25 perseant Exp $"); #endif #endif /* not lint */ @@ -128,7 +128,8 @@ main(argc, argv) } setfsent(); while ((fs = getfsent()) != NULL) { - if (strcmp(fs->fs_vfstype, "ffs") || + if ((strcmp(fs->fs_vfstype, "ffs") && + strcmp(fs->fs_vfstype, "lfs")) || strcmp(fs->fs_type, FSTAB_RW)) continue; if (aflag) {