diff --git a/sys/kern/vfs_wapbl.c b/sys/kern/vfs_wapbl.c index 6f8d27cab4d8..e232bc9ff1e8 100644 --- a/sys/kern/vfs_wapbl.c +++ b/sys/kern/vfs_wapbl.c @@ -1,7 +1,7 @@ -/* $NetBSD: vfs_wapbl.c,v 1.22 2009/02/18 13:22:10 yamt Exp $ */ +/* $NetBSD: vfs_wapbl.c,v 1.23 2009/02/22 20:10:25 ad Exp $ */ /*- - * Copyright (c) 2003,2008 The NetBSD Foundation, Inc. + * Copyright (c) 2003, 2008, 2009 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -36,7 +36,7 @@ #define WAPBL_INTERNAL #include -__KERNEL_RCSID(0, "$NetBSD: vfs_wapbl.c,v 1.22 2009/02/18 13:22:10 yamt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: vfs_wapbl.c,v 1.23 2009/02/22 20:10:25 ad Exp $"); #include @@ -770,27 +770,9 @@ wapbl_begin(struct wapbl *wl, const char *file, int line) { int doflush; unsigned lockcount; - krw_t op; KDASSERT(wl); -/* - * XXX: The original code calls for the use of a RW_READER lock - * here, but it turns out there are performance issues with high - * metadata-rate workloads (e.g. multiple simultaneous tar - * extractions). For now, we force the lock to be RW_WRITER, - * since that currently has the best performance characteristics - * (even for a single tar-file extraction). - * - */ -#define WAPBL_DEBUG_SERIALIZE 1 - -#ifdef WAPBL_DEBUG_SERIALIZE - op = RW_WRITER; -#else - op = RW_READER; -#endif - /* * XXX this needs to be made much more sophisticated. * perhaps each wapbl_begin could reserve a specified @@ -820,12 +802,12 @@ wapbl_begin(struct wapbl *wl, const char *file, int line) return error; } - rw_enter(&wl->wl_rwlock, op); + rw_enter(&wl->wl_rwlock, RW_READER); mutex_enter(&wl->wl_mtx); wl->wl_lock_count++; mutex_exit(&wl->wl_mtx); -#if defined(WAPBL_DEBUG_PRINT) && defined(WAPBL_DEBUG_SERIALIZE) +#if defined(WAPBL_DEBUG_PRINT) WAPBL_PRINTF(WAPBL_PRINT_TRANSACTION, ("wapbl_begin thread %d.%d with bufcount=%zu " "bufbytes=%zu bcount=%zu at %s:%d\n", @@ -840,7 +822,7 @@ void wapbl_end(struct wapbl *wl) { -#if defined(WAPBL_DEBUG_PRINT) && defined(WAPBL_DEBUG_SERIALIZE) +#if defined(WAPBL_DEBUG_PRINT) WAPBL_PRINTF(WAPBL_PRINT_TRANSACTION, ("wapbl_end thread %d.%d with bufcount=%zu " "bufbytes=%zu bcount=%zu\n", @@ -1552,20 +1534,14 @@ void wapbl_jlock_assert(struct wapbl *wl) { -#ifdef WAPBL_DEBUG_SERIALIZE - KASSERT(rw_write_held(&wl->wl_rwlock)); -#else - KASSERT(rw_read_held(&wl->wl_rwlock) || rw_write_held(&wl->wl_rwlock)); -#endif + KASSERT(rw_lock_held(&wl->wl_rwlock)); } void wapbl_junlock_assert(struct wapbl *wl) { -#ifdef WAPBL_DEBUG_SERIALIZE KASSERT(!rw_write_held(&wl->wl_rwlock)); -#endif } /****************************************************************/ diff --git a/sys/miscfs/syncfs/sync_subr.c b/sys/miscfs/syncfs/sync_subr.c index f4ea4d64d027..e2d5f860a772 100644 --- a/sys/miscfs/syncfs/sync_subr.c +++ b/sys/miscfs/syncfs/sync_subr.c @@ -1,4 +1,33 @@ -/* $NetBSD: sync_subr.c,v 1.35 2009/01/17 07:02:35 yamt Exp $ */ +/* $NetBSD: sync_subr.c,v 1.36 2009/02/22 20:10:25 ad Exp $ */ + +/*- + * Copyright (c) 2009 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Andrew Doran. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ /* * Copyright 1997 Marshall Kirk McKusick. All Rights Reserved. @@ -32,7 +61,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: sync_subr.c,v 1.35 2009/01/17 07:02:35 yamt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: sync_subr.c,v 1.36 2009/02/22 20:10:25 ad Exp $"); #include #include @@ -59,6 +88,7 @@ time_t syncdelay = 30; /* max time to delay syncing data */ time_t filedelay = 30; /* time to delay syncing files */ time_t dirdelay = 15; /* time to delay syncing directories */ time_t metadelay = 10; /* time to delay syncing metadata */ +time_t lockdelay = 1; /* time to delay if locking fails */ kmutex_t syncer_mutex; /* used to freeze syncer, long term */ static kmutex_t syncer_data_lock; /* short term lock on data structures */ @@ -196,6 +226,7 @@ sched_sync(void *v) struct synclist *slp; struct vnode *vp; long starttime; + bool synced; updateproc = curlwp; @@ -206,8 +237,7 @@ sched_sync(void *v) starttime = time_second; /* - * Push files whose dirty time has expired. Be careful - * of interrupt race on slp queue. + * Push files whose dirty time has expired. */ slp = &syncer_workitem_pending[syncer_delayno]; syncer_delayno += 1; @@ -216,10 +246,12 @@ sched_sync(void *v) while ((vp = TAILQ_FIRST(slp)) != NULL) { /* We are locking in the wrong direction. */ + synced = false; if (mutex_tryenter(&vp->v_interlock)) { mutex_exit(&syncer_data_lock); if (vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK) == 0) { + synced = true; (void) VOP_FSYNC(vp, curlwp->l_cred, FSYNC_LAZY, 0, 0); vput(vp); @@ -227,15 +259,36 @@ sched_sync(void *v) mutex_enter(&syncer_data_lock); } - /* XXXAD The vnode may have been recycled. */ + /* + * XXX The vnode may have been recycled, in which + * case it may have a new identity. + */ if (TAILQ_FIRST(slp) == vp) { /* * Put us back on the worklist. The worklist * routine will remove us from our current * position and then add us back in at a later * position. + * + * Try again sooner rather than later if + * we were unable to lock the vnode. Lock + * failure should not prevent us from doing + * the sync "soon". + * + * If we locked it yet arrive here, it's + * likely that lazy sync is in progress and + * so the vnode still has dirty metadata. + * syncdelay is mainly to get this vnode out + * of the way so we do not consider it again + * "soon" in this loop, so the delay time is + * not critical as long as it is not "soon". + * While write-back strategy is the file + * system's domain, we expect write-back to + * occur no later than syncdelay seconds + * into the future. */ - vn_syncer_add1(vp, syncdelay); + vn_syncer_add1(vp, + synced ? syncdelay : lockdelay); } } @@ -247,8 +300,10 @@ sched_sync(void *v) if (bioopsp != NULL) (*bioopsp->io_sync)(NULL); + /* + * Wait until there are more workitems to process. + */ mutex_exit(&syncer_mutex); - mutex_enter(&syncer_data_lock); if (rushjob > 0) { /* diff --git a/sys/miscfs/syncfs/sync_vnops.c b/sys/miscfs/syncfs/sync_vnops.c index b392fc9fcf37..8e1a8ae461f2 100644 --- a/sys/miscfs/syncfs/sync_vnops.c +++ b/sys/miscfs/syncfs/sync_vnops.c @@ -1,4 +1,33 @@ -/* $NetBSD: sync_vnops.c,v 1.25 2008/05/06 18:43:44 ad Exp $ */ +/* $NetBSD: sync_vnops.c,v 1.26 2009/02/22 20:10:25 ad Exp $ */ + +/*- + * Copyright (c) 2009 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Andrew Doran. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ /* * Copyright 1997 Marshall Kirk McKusick. All Rights Reserved. @@ -32,7 +61,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: sync_vnops.c,v 1.25 2008/05/06 18:43:44 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: sync_vnops.c,v 1.26 2009/02/22 20:10:25 ad Exp $"); #include #include @@ -62,6 +91,18 @@ const struct vnodeopv_entry_desc sync_vnodeop_entries[] = { const struct vnodeopv_desc sync_vnodeop_opv_desc = { &sync_vnodeop_p, sync_vnodeop_entries }; +/* + * Return delay factor appropriate for the given file system. For + * WAPBL we use the sync vnode to burst out metadata updates: sync + * those file systems more frequently. + */ +static inline int +sync_delay(struct mount *mp) +{ + + return mp->mnt_wapbl != NULL ? metadelay : syncdelay; +} + /* * Create a new filesystem syncer vnode for the specified mount point. */ @@ -70,8 +111,8 @@ vfs_allocate_syncvnode(mp) struct mount *mp; { struct vnode *vp; - static long start, incr, next; - int error; + static int start, incr, next; + int error, vdelay; /* Allocate a new vnode */ if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) @@ -98,7 +139,8 @@ vfs_allocate_syncvnode(mp) next = start; } mutex_enter(&vp->v_interlock); - vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0); + vdelay = sync_delay(mp); + vn_syncer_add_to_worklist(vp, vdelay > 0 ? next % vdelay : 0); mutex_exit(&vp->v_interlock); mp->mnt_syncer = vp; return (0); @@ -149,7 +191,7 @@ sync_fsync(v) * Move ourselves to the back of the sync list. */ mutex_enter(&syncvp->v_interlock); - vn_syncer_add_to_worklist(syncvp, syncdelay); + vn_syncer_add_to_worklist(syncvp, sync_delay(mp)); mutex_exit(&syncvp->v_interlock); /* diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c index e21be92b0967..d2701293fc8d 100644 --- a/sys/ufs/ffs/ffs_alloc.c +++ b/sys/ufs/ffs/ffs_alloc.c @@ -1,4 +1,4 @@ -/* $NetBSD: ffs_alloc.c,v 1.120 2009/01/11 02:45:56 christos Exp $ */ +/* $NetBSD: ffs_alloc.c,v 1.121 2009/02/22 20:10:25 ad Exp $ */ /*- * Copyright (c) 2008 The NetBSD Foundation, Inc. @@ -70,7 +70,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ffs_alloc.c,v 1.120 2009/01/11 02:45:56 christos Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ffs_alloc.c,v 1.121 2009/02/22 20:10:25 ad Exp $"); #if defined(_KERNEL_OPT) #include "opt_ffs.h" @@ -1284,7 +1284,7 @@ retry: if (ibp != NULL && initediblk != ufs_rw32(cgp->cg_initediblk, needswap)) { /* Another thread allocated more inodes so we retry the test. */ - brelse(ibp, BC_INVAL); + brelse(ibp, 0); ibp = NULL; } /* @@ -1396,7 +1396,7 @@ gotit: if (bp != NULL) brelse(bp, 0); if (ibp != NULL) - brelse(ibp, BC_INVAL); + brelse(ibp, 0); mutex_enter(&ump->um_lock); return (0); } diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c index f2b2e55f29e4..be128a78b031 100644 --- a/sys/ufs/ffs/ffs_vfsops.c +++ b/sys/ufs/ffs/ffs_vfsops.c @@ -1,4 +1,4 @@ -/* $NetBSD: ffs_vfsops.c,v 1.241 2008/11/13 11:09:45 ad Exp $ */ +/* $NetBSD: ffs_vfsops.c,v 1.242 2009/02/22 20:10:25 ad Exp $ */ /*- * Copyright (c) 2008 The NetBSD Foundation, Inc. @@ -61,7 +61,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.241 2008/11/13 11:09:45 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.242 2009/02/22 20:10:25 ad Exp $"); #if defined(_KERNEL_OPT) #include "opt_ffs.h" @@ -111,6 +111,8 @@ __KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.241 2008/11/13 11:09:45 ad Exp $"); MODULE(MODULE_CLASS_VFS, ffs, NULL); +static int ffs_vfs_fsync(vnode_t *, int); + static struct sysctllog *ffs_sysctl_log; /* how many times ffs_init() was called */ @@ -151,7 +153,7 @@ struct vfsops ffs_vfsops = { ffs_suspendctl, genfs_renamelock_enter, genfs_renamelock_exit, - ffs_full_fsync, + ffs_vfs_fsync, ffs_vnodeopv_descs, 0, { NULL, NULL }, @@ -1697,11 +1699,22 @@ loop: continue; mutex_enter(&vp->v_interlock); ip = VTOI(vp); - /* XXXpooka: why wapbl check? */ + + /* + * We deliberately update inode times here. This will + * prevent a massive queue of updates accumulating, only + * to be handled by a call to unmount. + * + * XXX It would be better to have the syncer trickle these + * out. Adjustment needed to allow registering vnodes for + * sync when the vnode is clean, but the inode dirty. Or + * have ufs itself trickle out inode updates. + */ if (ip == NULL || (vp->v_iflag & (VI_XLOCK | VI_CLEAN)) != 0 || vp->v_type == VNON || ((ip->i_flag & - (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) == 0 && - (LIST_EMPTY(&vp->v_dirtyblkhd) || (mp->mnt_wapbl)) && + (IN_ACCESS | IN_CHANGE | IN_UPDATE | IN_MODIFY | + IN_MODIFIED | IN_ACCESSED)) == 0 && + LIST_EMPTY(&vp->v_dirtyblkhd) && UVM_OBJ_IS_CLEAN(&vp->v_uobj))) { mutex_exit(&vp->v_interlock); @@ -2138,3 +2151,152 @@ ffs_suspendctl(struct mount *mp, int cmd) return EINVAL; } } + +/* + * Synch vnode for a mounted file system. This is called for foreign + * vnodes, i.e. non-ffs. + */ +static int +ffs_vfs_fsync(vnode_t *vp, int flags) +{ + int error, passes, skipmeta, i, pflags; + buf_t *bp, *nbp; + struct mount *mp; + + KASSERT(vp->v_type == VBLK); + KASSERT(vp->v_specmountpoint != NULL); + + mp = vp->v_specmountpoint; + if ((mp->mnt_flag & MNT_SOFTDEP) != 0) + softdep_fsync_mountdev(vp); + + /* + * Flush all dirty data associated with the vnode. + */ + pflags = PGO_ALLPAGES | PGO_CLEANIT; + if ((flags & FSYNC_WAIT) != 0) + pflags |= PGO_SYNCIO; + mutex_enter(&vp->v_interlock); + error = VOP_PUTPAGES(vp, 0, 0, pflags); + if (error) + return error; + +#ifdef WAPBL + if (mp && mp->mnt_wapbl) { + /* + * Don't bother writing out metadata if the syncer is + * making the request. We will let the sync vnode + * write it out in a single burst through a call to + * VFS_SYNC(). + */ + if ((flags & (FSYNC_DATAONLY | FSYNC_LAZY | FSYNC_NOLOG)) != 0) + return 0; + + /* + * Don't flush the log if the vnode being flushed + * contains no dirty buffers that could be in the log. + */ + if (!LIST_EMPTY(&vp->v_dirtyblkhd)) { + error = wapbl_flush(mp->mnt_wapbl, 0); + if (error) + return error; + } + + if ((flags & FSYNC_WAIT) != 0) { + mutex_enter(&vp->v_interlock); + while (vp->v_numoutput) + cv_wait(&vp->v_cv, &vp->v_interlock); + mutex_exit(&vp->v_interlock); + } + + return 0; + } +#endif /* WAPBL */ + + /* + * Write out metadata for non-logging file systems. This block can + * be simplified once softdep goes. + */ + passes = NIADDR + 1; + skipmeta = 0; + if (flags & FSYNC_WAIT) + skipmeta = 1; + +loop: + mutex_enter(&bufcache_lock); + LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) { + bp->b_cflags &= ~BC_SCANNED; + } + for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { + nbp = LIST_NEXT(bp, b_vnbufs); + if (bp->b_cflags & (BC_BUSY | BC_SCANNED)) + continue; + if ((bp->b_oflags & BO_DELWRI) == 0) + panic("ffs_fsync: not dirty"); + if (skipmeta && bp->b_lblkno < 0) + continue; + bp->b_cflags |= BC_BUSY | BC_VFLUSH | BC_SCANNED; + mutex_exit(&bufcache_lock); + /* + * On our final pass through, do all I/O synchronously + * so that we can find out if our flush is failing + * because of write errors. + */ + if (passes > 0 || !(flags & FSYNC_WAIT)) + (void) bawrite(bp); + else if ((error = bwrite(bp)) != 0) + return (error); + /* + * Since we unlocked during the I/O, we need + * to start from a known point. + */ + mutex_enter(&bufcache_lock); + nbp = LIST_FIRST(&vp->v_dirtyblkhd); + } + mutex_exit(&bufcache_lock); + if (skipmeta) { + skipmeta = 0; + goto loop; + } + + if ((flags & FSYNC_WAIT) != 0) { + mutex_enter(&vp->v_interlock); + while (vp->v_numoutput) { + cv_wait(&vp->v_cv, &vp->v_interlock); + } + mutex_exit(&vp->v_interlock); + + /* + * Ensure that any filesystem metadata associated + * with the vnode has been written. + */ + if ((error = softdep_sync_metadata(vp)) != 0) + return (error); + + if (!LIST_EMPTY(&vp->v_dirtyblkhd)) { + /* + * Block devices associated with filesystems may + * have new I/O requests posted for them even if + * the vnode is locked, so no amount of trying will + * get them clean. Thus we give block devices a + * good effort, then just give up. For all other file + * types, go around and try again until it is clean. + */ + if (passes > 0) { + passes--; + goto loop; + } +#ifdef DIAGNOSTIC + if (vp->v_type != VBLK) + vprint("ffs_fsync: dirty", vp); +#endif + } + } + + if (error == 0 && (flags & FSYNC_CACHE) != 0) { + (void)VOP_IOCTL(vp, DIOCCACHESYNC, &i, FWRITE, + kauth_cred_get()); + } + + return error; +} diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c index 3d944eb946bf..80b0d7ab17bf 100644 --- a/sys/ufs/ffs/ffs_vnops.c +++ b/sys/ufs/ffs/ffs_vnops.c @@ -1,4 +1,4 @@ -/* $NetBSD: ffs_vnops.c,v 1.109 2009/02/01 17:36:43 ad Exp $ */ +/* $NetBSD: ffs_vnops.c,v 1.110 2009/02/22 20:10:25 ad Exp $ */ /*- * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. @@ -61,7 +61,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ffs_vnops.c,v 1.109 2009/02/01 17:36:43 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ffs_vnops.c,v 1.110 2009/02/22 20:10:25 ad Exp $"); #if defined(_KERNEL_OPT) #include "opt_ffs.h" @@ -319,7 +319,13 @@ ffs_fsync(void *v) #ifdef WAPBL mp = wapbl_vptomp(vp); if (mp->mnt_wapbl) { - if (ap->a_flags & FSYNC_DATAONLY) { + /* + * Don't bother writing out metadata if the syncer is + * making the request. We will let the sync vnode + * write it out in a single burst through a call to + * VFS_SYNC(). + */ + if ((ap->a_flags & (FSYNC_DATAONLY | FSYNC_LAZY)) != 0) { fstrans_done(vp->v_mount); return 0; } @@ -336,7 +342,7 @@ ffs_fsync(void *v) (ap->a_flags & FSYNC_WAIT) ? UPDATE_WAIT : 0); UFS_WAPBL_END(mp); } - if (error || (ap->a_flags & FSYNC_NOLOG)) { + if (error || (ap->a_flags & FSYNC_NOLOG) != 0) { fstrans_done(vp->v_mount); return error; } @@ -393,43 +399,38 @@ out: } /* - * Synch an open file. Called for VOP_FSYNC() and VFS_FSYNC(). - * - * BEWARE: THIS ROUTINE ACCEPTS BOTH FFS AND NON-FFS VNODES. + * Synch an open file. Called for VOP_FSYNC(). */ /* ARGSUSED */ int ffs_full_fsync(struct vnode *vp, int flags) { struct buf *bp, *nbp; - int error, passes, skipmeta, inodedeps_only, waitfor; + int error, passes, skipmeta, inodedeps_only, waitfor, i; struct mount *mp; + KASSERT(VTOI(vp) != NULL); + KASSERT(vp->v_tag == VT_UFS); + error = 0; - if ((flags & FSYNC_VFS) != 0) { - KASSERT(vp->v_specmountpoint != NULL); + mp = vp->v_mount; + if (vp->v_type == VBLK && vp->v_specmountpoint != NULL) { mp = vp->v_specmountpoint; - KASSERT(vp->v_type == VBLK); + if ((mp->mnt_flag & MNT_SOFTDEP) != 0) + softdep_fsync_mountdev(vp); } else { mp = vp->v_mount; - KASSERT(vp->v_tag == VT_UFS); } - if (vp->v_type == VBLK && - vp->v_specmountpoint != NULL && - (vp->v_specmountpoint->mnt_flag & MNT_SOFTDEP)) - softdep_fsync_mountdev(vp); - mutex_enter(&vp->v_interlock); inodedeps_only = DOINGSOFTDEP(vp) && (flags & FSYNC_RECLAIM) && UVM_OBJ_IS_CLEAN(&vp->v_uobj) && LIST_EMPTY(&vp->v_dirtyblkhd); /* - * Flush all dirty data associated with a vnode. + * Flush all dirty data associated with the vnode. */ - if (vp->v_type == VREG || vp->v_type == VBLK) { int pflags = PGO_ALLPAGES | PGO_CLEANIT; @@ -447,21 +448,25 @@ ffs_full_fsync(struct vnode *vp, int flags) #ifdef WAPBL if (mp && mp->mnt_wapbl) { - error = 0; - if (flags & FSYNC_DATAONLY) - return error; + /* + * Don't bother writing out metadata if the syncer is + * making the request. We will let the sync vnode + * write it out in a single burst through a call to + * VFS_SYNC(). + */ + if ((flags & (FSYNC_DATAONLY | FSYNC_LAZY)) != 0) + return 0; - if ((flags & FSYNC_VFS) == 0 && VTOI(vp) != NULL && - (VTOI(vp)->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE + if ((VTOI(vp)->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE | IN_MODIFY | IN_MODIFIED | IN_ACCESSED)) != 0) { error = UFS_WAPBL_BEGIN(mp); if (error) return error; error = ffs_update(vp, NULL, NULL, - (flags & FSYNC_WAIT) ? UPDATE_WAIT : 0); + (flags & FSYNC_WAIT) ? UPDATE_WAIT : 0); UFS_WAPBL_END(mp); } - if (error || (flags & FSYNC_NOLOG)) + if (error || (flags & FSYNC_NOLOG) != 0) return error; /* @@ -476,7 +481,7 @@ ffs_full_fsync(struct vnode *vp, int flags) if ((flags & FSYNC_WAIT) != 0) { mutex_enter(&vp->v_interlock); - while (vp->v_numoutput) + while (vp->v_numoutput != 0) cv_wait(&vp->v_cv, &vp->v_interlock); mutex_exit(&vp->v_interlock); } @@ -485,6 +490,10 @@ ffs_full_fsync(struct vnode *vp, int flags) } #endif /* WAPBL */ + /* + * Write out metadata for non-logging file systems. This block can + * be simplified once softdep goes. + */ passes = NIADDR + 1; skipmeta = 0; if (flags & FSYNC_WAIT) @@ -565,17 +574,11 @@ loop: waitfor = 0; else waitfor = (flags & FSYNC_WAIT) != 0 ? UPDATE_WAIT : 0; - - if ((flags & FSYNC_VFS) == 0) - error = ffs_update(vp, NULL, NULL, waitfor); + error = ffs_update(vp, NULL, NULL, waitfor); if (error == 0 && (flags & FSYNC_CACHE) != 0) { - int i = 0; - if ((flags & FSYNC_VFS) == 0) { - KASSERT(VTOI(vp) != NULL); - vp = VTOI(vp)->i_devvp; - } - VOP_IOCTL(vp, DIOCCACHESYNC, &i, FWRITE, curlwp->l_cred); + (void)VOP_IOCTL(VTOI(vp)->i_devvp, DIOCCACHESYNC, &i, FWRITE, + kauth_cred_get()); } return error;